diff --git a/src/config.rs b/src/config.rs index 9f9ad9a..3cd9b55 100644 --- a/src/config.rs +++ b/src/config.rs @@ -252,6 +252,8 @@ pub struct AppConfig { pub debug: bool, pub compaction: CompactionConfig, pub dmn: DmnConfig, + #[serde(default)] + pub learn: LearnConfig, #[serde(skip_serializing_if = "Option::is_none")] pub memory_project: Option, #[serde(default)] @@ -323,6 +325,22 @@ pub struct DmnConfig { pub max_turns: u32, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LearnConfig { + /// Divergence threshold — responses scoring above this become + /// fine-tuning candidates. Lower = more sensitive. + #[serde(default = "default_learn_threshold")] + pub threshold: f64, +} + +fn default_learn_threshold() -> f64 { 0.0000001 } + +impl Default for LearnConfig { + fn default() -> Self { + Self { threshold: default_learn_threshold() } + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ModelConfig { /// Backend name ("anthropic" or "openrouter") @@ -366,6 +384,7 @@ impl Default for AppConfig { soft_threshold_pct: 80, }, dmn: DmnConfig { max_turns: 20 }, + learn: LearnConfig::default(), memory_project: None, models: HashMap::new(), default_model: String::new(), diff --git a/src/mind/mod.rs b/src/mind/mod.rs index a3a37f4..81bcb09 100644 --- a/src/mind/mod.rs +++ b/src/mind/mod.rs @@ -149,8 +149,26 @@ pub struct MindState { pub unc_idle_deadline: Instant, /// Fine-tuning candidates identified by scoring. pub finetune_candidates: Vec, - /// Fine-tune scoring progress (empty = not running). - pub finetune_progress: String, + /// Last scoring run stats for UI display. + pub finetune_last_run: Option, + /// Divergence threshold for finetune scoring — mutable via F6 hotkeys + /// and persisted back to ~/.consciousness/config.json5. + pub learn_threshold: f64, +} + +/// Stats from the last finetune scoring run. +#[derive(Clone, Debug)] +pub struct FinetuneScoringStats { + /// Count of assistant responses we considered (recent half of context). + pub responses_considered: usize, + /// How many exceeded the divergence threshold. + pub above_threshold: usize, + /// Threshold used for this run. + pub threshold: f64, + /// Highest divergence observed. + pub max_divergence: f64, + /// Error message if the run failed. + pub error: Option, } impl Clone for MindState { @@ -170,7 +188,8 @@ impl Clone for MindState { unc_idle: self.unc_idle, unc_idle_deadline: self.unc_idle_deadline, finetune_candidates: self.finetune_candidates.clone(), - finetune_progress: self.finetune_progress.clone(), + finetune_last_run: self.finetune_last_run.clone(), + learn_threshold: self.learn_threshold, } } } @@ -185,6 +204,8 @@ pub enum MindCommand { ScoreFull, /// Score for finetune candidates ScoreFinetune, + /// Update the finetune divergence threshold and persist to config. + SetLearnThreshold(f64), /// Abort current turn, kill processes Interrupt, /// Reset session @@ -194,7 +215,7 @@ pub enum MindCommand { } impl MindState { - pub fn new(max_dmn_turns: u32) -> Self { + pub fn new(max_dmn_turns: u32, learn_threshold: f64) -> Self { Self { input: Vec::new(), turn_active: false, @@ -211,7 +232,8 @@ impl MindState { unc_idle: false, unc_idle_deadline: Instant::now() + std::time::Duration::from_secs(60), finetune_candidates: Vec::new(), - finetune_progress: String::new(), + finetune_last_run: None, + learn_threshold, } } @@ -341,7 +363,10 @@ impl Mind { crate::agent::tools::tools(), ).await; - let shared = Arc::new(std::sync::Mutex::new(MindState::new(config.app.dmn.max_turns))); + let shared = Arc::new(std::sync::Mutex::new(MindState::new( + config.app.dmn.max_turns, + config.app.learn.threshold, + ))); let (turn_watch, _) = tokio::sync::watch::channel(false); let (conscious_active, _) = tokio::sync::watch::channel(false); let (bg_tx, bg_rx) = mpsc::unbounded_channel(); @@ -543,6 +568,12 @@ impl Mind { MindCommand::ScoreFinetune => { self.start_finetune_scoring(); } + MindCommand::SetLearnThreshold(value) => { + self.shared.lock().unwrap().learn_threshold = value; + if let Err(e) = crate::config_writer::set_learn_threshold(value) { + dbglog!("[learn] failed to persist threshold {}: {:#}", value, e); + } + } } } } @@ -618,27 +649,60 @@ impl Mind { } /// Score responses for fine-tuning candidates. + /// + /// Scores the most recent half of the context — responses near the end + /// of the context window were generated with the most context available, + /// which is what we want to train on. The threshold is a temporary knob; + /// once this runs continuously, we'll just train whatever lands at full + /// context without filtering. pub fn start_finetune_scoring(&self) { + let threshold = self.shared.lock().unwrap().learn_threshold; + let agent = self.agent.clone(); let bg_tx = self.bg_tx.clone(); let shared = self.shared.clone(); - shared.lock().unwrap().finetune_progress = "scoring...".into(); tokio::spawn(async move { + let activity = crate::agent::start_activity(&agent, "finetune: scoring...").await; + let (context, client) = { let ctx = agent.context.lock().await; (ctx.clone(), agent.client.clone()) }; - // Min divergence 0.1 = only keep responses that differ meaningfully - match learn::score_finetune_candidates(&context, 20, &client, 0.1).await { - Ok(candidates) => { - dbglog!("[finetune] found {} candidates", candidates.len()); + + let entries = context.conversation(); + let score_count = entries.len() / 2; + let range_start = entries.len() - score_count; + let responses_considered: usize = entries[range_start..].iter() + .filter(|n| matches!(n, crate::agent::context::AstNode::Branch { role: crate::agent::context::Role::Assistant, .. })) + .count(); + + activity.update(format!("finetune: scoring {} responses...", responses_considered)).await; + + let stats = match learn::score_finetune_candidates( + &context, score_count, &client, threshold, + ).await { + Ok((candidates, max_div)) => { + let above_threshold = candidates.len(); let _ = bg_tx.send(BgEvent::FinetuneCandidates(candidates)); + FinetuneScoringStats { + responses_considered, + above_threshold, + threshold, + max_divergence: max_div, + error: None, + } } - Err(e) => { - dbglog!("[finetune] scoring FAILED: {:#}", e); - } - } - shared.lock().unwrap().finetune_progress.clear(); + Err(e) => FinetuneScoringStats { + responses_considered, + above_threshold: 0, + threshold, + max_divergence: 0.0, + error: Some(format!("{}", e)), + }, + }; + + shared.lock().unwrap().finetune_last_run = Some(stats); + // activity drops here, marking completion and notifying observers }); } @@ -706,6 +770,12 @@ impl Mind { let mut bg_rx = self.bg_rx.lock().unwrap().take() .expect("Mind::run() called twice"); let mut sub_handle: Option> = None; + + // Start finetune scoring at startup (scores existing conversation) + if !self.config.no_agents { + self.start_finetune_scoring(); + } + loop { let (timeout, has_input) = { let me = self.shared.lock().unwrap(); diff --git a/src/subconscious/learn.rs b/src/subconscious/learn.rs index f8070ce..c3ad348 100644 --- a/src/subconscious/learn.rs +++ b/src/subconscious/learn.rs @@ -490,16 +490,18 @@ pub struct FinetuneCandidate { /// Score and enrich finetune candidates with full context. /// -/// Returns candidates ready for review, with context/continuation token IDs -/// already computed for sending to /finetune. +/// Returns (candidates, max_divergence) - candidates ready for review with +/// context/continuation token IDs, and the highest divergence seen. pub async fn score_finetune_candidates( context: &ContextState, count: usize, client: &ApiClient, min_divergence: f64, -) -> anyhow::Result> { +) -> anyhow::Result<(Vec, f64)> { let scores = score_finetune(context, count, client).await?; + let max_divergence = scores.iter().map(|(_, d)| *d).fold(0.0f64, f64::max); + let entries = context.conversation(); let mut candidates = Vec::new(); @@ -562,7 +564,7 @@ pub async fn score_finetune_candidates( } } - Ok(candidates) + Ok((candidates, max_divergence)) } /// Generate what the model would say without memories for a given entry. diff --git a/src/user/learn.rs b/src/user/learn.rs index f858f34..522dbb8 100644 --- a/src/user/learn.rs +++ b/src/user/learn.rs @@ -60,12 +60,16 @@ impl From for FinetuneCandidate { pub(crate) struct LearnScreen { list_state: ListState, + mind_tx: tokio::sync::mpsc::UnboundedSender, } impl LearnScreen { - pub fn new() -> Self { + pub fn new( + mind_tx: tokio::sync::mpsc::UnboundedSender, + ) -> Self { Self { list_state: ListState::default(), + mind_tx, } } @@ -112,6 +116,22 @@ impl ScreenView for LearnScreen { KeyCode::Char('s') => { app.finetune_send_approved(); } + KeyCode::Char('+') | KeyCode::Char('=') => { + // Raise threshold 10× (less sensitive — fewer candidates) + if let Some(ms) = &app.mind_state { + let new = ms.learn_threshold * 10.0; + let _ = self.mind_tx.send( + crate::mind::MindCommand::SetLearnThreshold(new)); + } + } + KeyCode::Char('-') => { + // Lower threshold 10× (more sensitive — more candidates) + if let Some(ms) = &app.mind_state { + let new = ms.learn_threshold / 10.0; + let _ = self.mind_tx.send( + crate::mind::MindCommand::SetLearnThreshold(new)); + } + } _ => {} } } @@ -123,19 +143,13 @@ impl ScreenView for LearnScreen { self.list_state.select(Some(sel)); } - // Get scoring progress from mind state - let progress = app.mind_state.as_ref() - .map(|ms| ms.finetune_progress.as_str()) - .unwrap_or(""); - // Now render let gen_on = crate::subconscious::learn::alternates_enabled(); - let title_right = if !progress.is_empty() { - format!(" {} ", progress) - } else if gen_on { - " learn [gen] ".to_string() + let threshold = app.mind_state.as_ref().map(|ms| ms.learn_threshold).unwrap_or(0.0); + let title_right = if gen_on { + format!(" learn [thresh: {:e}] [gen] ", threshold) } else { - " learn ".to_string() + format!(" learn [thresh: {:e}] ", threshold) }; let block = Block::default() .title_top(Line::from(screen_legend()).left_aligned()) @@ -148,58 +162,50 @@ impl ScreenView for LearnScreen { let candidates = &app.finetune_candidates; if candidates.is_empty() { - let msg = if progress.is_empty() { - " No candidates yet — scoring runs after each turn." - } else { - " Scoring in progress..." - }; - frame.render_widget( - Paragraph::new(Line::styled(msg, Style::default().fg(Color::DarkGray))), - inner, - ); - return; - } + render_empty(frame, inner, app); + } else { + // Layout: list on left, detail on right + let [list_area, detail_area] = Layout::horizontal([ + Constraint::Percentage(40), + Constraint::Percentage(60), + ]).areas(inner); - // Layout: list on left, detail on right - let [list_area, detail_area] = Layout::horizontal([ - Constraint::Percentage(40), - Constraint::Percentage(60), - ]).areas(inner); + // Render candidate list + let items: Vec = candidates.iter().map(|c| { + let status_char = match c.status { + CandidateStatus::Pending => ' ', + CandidateStatus::Approved => '+', + CandidateStatus::Rejected => '-', + CandidateStatus::Sent => '*', + }; + let style = match c.status { + CandidateStatus::Pending => Style::default(), + CandidateStatus::Approved => Style::default().fg(Color::Green), + CandidateStatus::Rejected => Style::default().fg(Color::DarkGray), + CandidateStatus::Sent => Style::default().fg(Color::Cyan), + }; + ListItem::new(Line::from(vec![ + Span::styled(format!("[{}] ", status_char), style), + Span::styled(format!("{:.2} ", c.divergence), Style::default().fg(Color::Yellow)), + Span::raw(truncate(&c.response_text, 30)), + ])) + }).collect(); - // Render candidate list - let items: Vec = candidates.iter().map(|c| { - let status_char = match c.status { - CandidateStatus::Pending => ' ', - CandidateStatus::Approved => '+', - CandidateStatus::Rejected => '-', - CandidateStatus::Sent => '*', - }; - let style = match c.status { - CandidateStatus::Pending => Style::default(), - CandidateStatus::Approved => Style::default().fg(Color::Green), - CandidateStatus::Rejected => Style::default().fg(Color::DarkGray), - CandidateStatus::Sent => Style::default().fg(Color::Cyan), - }; - ListItem::new(Line::from(vec![ - Span::styled(format!("[{}] ", status_char), style), - Span::styled(format!("{:.2} ", c.divergence), Style::default().fg(Color::Yellow)), - Span::raw(truncate(&c.response_text, 30)), - ])) - }).collect(); + let list = List::new(items) + .block(Block::default().borders(Borders::RIGHT).title(" candidates ")) + .highlight_style(Style::default().add_modifier(Modifier::REVERSED)); + frame.render_stateful_widget(list, list_area, &mut self.list_state); - let list = List::new(items) - .block(Block::default().borders(Borders::RIGHT).title(" candidates ")) - .highlight_style(Style::default().add_modifier(Modifier::REVERSED)); - frame.render_stateful_widget(list, list_area, &mut self.list_state); - - // Render detail for selected candidate - if let Some(idx) = self.selected_idx() { - if let Some(candidate) = candidates.get(idx) { - render_detail(frame, candidate, detail_area); + // Render detail for selected candidate + if let Some(idx) = self.selected_idx() { + if let Some(candidate) = candidates.get(idx) { + render_detail(frame, candidate, detail_area); + } } } - // Render help at bottom + // Render help at bottom (always, even when empty) + let gen_status = if gen_on { "[on]" } else { "[off]" }; let help = Line::from(vec![ Span::styled(" j/k/\u{2191}\u{2193}", Style::default().fg(Color::Cyan)), Span::raw("=nav "), @@ -208,9 +214,11 @@ impl ScreenView for LearnScreen { Span::styled("r", Style::default().fg(Color::Red)), Span::raw("=reject "), Span::styled("g", Style::default().fg(Color::Yellow)), - Span::raw("=gen "), + Span::raw(format!("=gen{} ", gen_status)), Span::styled("s", Style::default().fg(Color::Magenta)), - Span::raw("=send "), + Span::raw("=send "), + Span::styled("+/-", Style::default().fg(Color::Cyan)), + Span::raw("=thresh "), ]); let help_area = Rect { y: area.y + area.height - 1, @@ -221,6 +229,56 @@ impl ScreenView for LearnScreen { } } +fn render_empty(frame: &mut Frame, inner: Rect, app: &App) { + let mut lines = Vec::new(); + lines.push(Line::from("")); + + match app.mind_state.as_ref().and_then(|ms| ms.finetune_last_run.as_ref()) { + Some(stats) => { + lines.push(Line::from(vec![ + Span::raw(" Last run: "), + Span::styled( + format!("{}", stats.responses_considered), + Style::default().fg(Color::Cyan), + ), + Span::raw(" responses considered, "), + Span::styled( + format!("{}", stats.above_threshold), + Style::default().fg(if stats.above_threshold > 0 { Color::Green } else { Color::DarkGray }), + ), + Span::raw(" above threshold, max divergence: "), + Span::styled( + format!("{:.4}", stats.max_divergence), + Style::default().fg(Color::Yellow), + ), + ])); + if let Some(err) = &stats.error { + lines.push(Line::from(vec![ + Span::raw(" "), + Span::styled( + format!("Error: {}", err), + Style::default().fg(Color::Red), + ), + ])); + } + } + None => { + lines.push(Line::styled( + " No scoring run yet.", + Style::default().fg(Color::DarkGray), + )); + } + } + + lines.push(Line::from("")); + lines.push(Line::styled( + " Scoring runs at startup and after each turn.", + Style::default().fg(Color::DarkGray), + )); + + frame.render_widget(Paragraph::new(lines), inner); +} + fn render_detail(frame: &mut Frame, c: &FinetuneCandidate, area: Rect) { let [header_area, content_area] = Layout::vertical([ Constraint::Length(3), diff --git a/src/user/mod.rs b/src/user/mod.rs index edd2b00..18c33e7 100644 --- a/src/user/mod.rs +++ b/src/user/mod.rs @@ -389,7 +389,7 @@ async fn run( Box::new(crate::user::subconscious::SubconsciousScreen::new()), Box::new(crate::user::unconscious::UnconsciousScreen::new()), Box::new(crate::user::thalamus::ThalamusScreen::new()), - Box::new(crate::user::learn::LearnScreen::new()), + Box::new(crate::user::learn::LearnScreen::new(mind_tx.clone())), ]; let mut active_screen: usize = 1; // F-key number tui::set_screen_legend(tui::screen_legend_from(&*screens)); @@ -466,7 +466,8 @@ async fn run( idle_state.decay_ewma(); app.update_idle(&idle_state); app.agent_state = mind.subconscious_snapshots().await; - if let Ok(mut unc) = mind.unconscious.try_lock() { + { + let mut unc = mind.unconscious.lock().await; let toggles: Vec = app.agent_toggles.drain(..).collect(); for name in &toggles { if mind.subconscious.lock().await.toggle(name).is_none() { @@ -480,10 +481,13 @@ async fn run( }; app.unconscious_state = unc.snapshots(store_guard.as_deref()); app.graph_health = unc.graph_health.clone(); + } + + // Sync mind state (finetune candidates, last scoring run, etc.) + { let ms = mind.shared.lock().unwrap(); - // Sync finetune candidates: add new ones, keep existing (preserves approval status) - // Remove sent candidates (already trained, no need to keep) - // Keep only 10 most recent rejected candidates + // Sync finetune candidates: add new ones, keep existing (preserves approval status), + // remove sent candidates, keep only 10 most recent rejected. app.finetune_candidates.retain(|c| c.status != learn::CandidateStatus::Sent); for c in &ms.finetune_candidates { let exists = app.finetune_candidates.iter() @@ -492,7 +496,6 @@ async fn run( app.finetune_candidates.push(learn::FinetuneCandidate::from(c.clone())); } } - // Limit rejected candidates to 10 most recent let mut rejected: Vec<_> = app.finetune_candidates.iter() .enumerate() .filter(|(_, c)| c.status == learn::CandidateStatus::Rejected)