learn: F6 screen — scoring stats, ActivityGuard, configurable threshold
Three changes that together reshape the F6 fine-tune-review screen: 1. Finetune scoring reports through the standard agent activity system instead of a separate finetune_progress String. The previous design ran an independent progress field that forced a cross-lock dance and bespoke UI plumbing. start_finetune_scoring now uses start_activity + activity.update, so the usual status line and notifications capture scoring progress uniformly with other background work. 2. MindState gains a FinetuneScoringStats snapshot (responses seen, above threshold, max divergence, error). The F6 empty screen shows this instead of a loading message — so after a scoring run that produced zero candidates, you can see *why* (e.g., max_divergence below threshold). 3. The divergence threshold is configurable from F6 via +/- hotkeys (scales by 10×) and persisted to ~/.consciousness/config.json5 via config_writer::set_learn_threshold. AppConfig grows a learn section with a threshold field (default 1e-7). Also: user/mod.rs no longer uses try_lock() for the per-tick unconscious/mind state sync — we fixed the locking hot paths that made try_lock necessary, so lock().await is now the right choice. And subconscious::learn::score_finetune_candidates now returns (candidates, max_divergence) so the stats can be populated. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
ac40c2cb98
commit
e5dd8312c7
5 changed files with 237 additions and 85 deletions
102
src/mind/mod.rs
102
src/mind/mod.rs
|
|
@ -149,8 +149,26 @@ pub struct MindState {
|
|||
pub unc_idle_deadline: Instant,
|
||||
/// Fine-tuning candidates identified by scoring.
|
||||
pub finetune_candidates: Vec<learn::FinetuneCandidate>,
|
||||
/// Fine-tune scoring progress (empty = not running).
|
||||
pub finetune_progress: String,
|
||||
/// Last scoring run stats for UI display.
|
||||
pub finetune_last_run: Option<FinetuneScoringStats>,
|
||||
/// Divergence threshold for finetune scoring — mutable via F6 hotkeys
|
||||
/// and persisted back to ~/.consciousness/config.json5.
|
||||
pub learn_threshold: f64,
|
||||
}
|
||||
|
||||
/// Stats from the last finetune scoring run.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FinetuneScoringStats {
|
||||
/// Count of assistant responses we considered (recent half of context).
|
||||
pub responses_considered: usize,
|
||||
/// How many exceeded the divergence threshold.
|
||||
pub above_threshold: usize,
|
||||
/// Threshold used for this run.
|
||||
pub threshold: f64,
|
||||
/// Highest divergence observed.
|
||||
pub max_divergence: f64,
|
||||
/// Error message if the run failed.
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
impl Clone for MindState {
|
||||
|
|
@ -170,7 +188,8 @@ impl Clone for MindState {
|
|||
unc_idle: self.unc_idle,
|
||||
unc_idle_deadline: self.unc_idle_deadline,
|
||||
finetune_candidates: self.finetune_candidates.clone(),
|
||||
finetune_progress: self.finetune_progress.clone(),
|
||||
finetune_last_run: self.finetune_last_run.clone(),
|
||||
learn_threshold: self.learn_threshold,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -185,6 +204,8 @@ pub enum MindCommand {
|
|||
ScoreFull,
|
||||
/// Score for finetune candidates
|
||||
ScoreFinetune,
|
||||
/// Update the finetune divergence threshold and persist to config.
|
||||
SetLearnThreshold(f64),
|
||||
/// Abort current turn, kill processes
|
||||
Interrupt,
|
||||
/// Reset session
|
||||
|
|
@ -194,7 +215,7 @@ pub enum MindCommand {
|
|||
}
|
||||
|
||||
impl MindState {
|
||||
pub fn new(max_dmn_turns: u32) -> Self {
|
||||
pub fn new(max_dmn_turns: u32, learn_threshold: f64) -> Self {
|
||||
Self {
|
||||
input: Vec::new(),
|
||||
turn_active: false,
|
||||
|
|
@ -211,7 +232,8 @@ impl MindState {
|
|||
unc_idle: false,
|
||||
unc_idle_deadline: Instant::now() + std::time::Duration::from_secs(60),
|
||||
finetune_candidates: Vec::new(),
|
||||
finetune_progress: String::new(),
|
||||
finetune_last_run: None,
|
||||
learn_threshold,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -341,7 +363,10 @@ impl Mind {
|
|||
crate::agent::tools::tools(),
|
||||
).await;
|
||||
|
||||
let shared = Arc::new(std::sync::Mutex::new(MindState::new(config.app.dmn.max_turns)));
|
||||
let shared = Arc::new(std::sync::Mutex::new(MindState::new(
|
||||
config.app.dmn.max_turns,
|
||||
config.app.learn.threshold,
|
||||
)));
|
||||
let (turn_watch, _) = tokio::sync::watch::channel(false);
|
||||
let (conscious_active, _) = tokio::sync::watch::channel(false);
|
||||
let (bg_tx, bg_rx) = mpsc::unbounded_channel();
|
||||
|
|
@ -543,6 +568,12 @@ impl Mind {
|
|||
MindCommand::ScoreFinetune => {
|
||||
self.start_finetune_scoring();
|
||||
}
|
||||
MindCommand::SetLearnThreshold(value) => {
|
||||
self.shared.lock().unwrap().learn_threshold = value;
|
||||
if let Err(e) = crate::config_writer::set_learn_threshold(value) {
|
||||
dbglog!("[learn] failed to persist threshold {}: {:#}", value, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -618,27 +649,60 @@ impl Mind {
|
|||
}
|
||||
|
||||
/// Score responses for fine-tuning candidates.
|
||||
///
|
||||
/// Scores the most recent half of the context — responses near the end
|
||||
/// of the context window were generated with the most context available,
|
||||
/// which is what we want to train on. The threshold is a temporary knob;
|
||||
/// once this runs continuously, we'll just train whatever lands at full
|
||||
/// context without filtering.
|
||||
pub fn start_finetune_scoring(&self) {
|
||||
let threshold = self.shared.lock().unwrap().learn_threshold;
|
||||
|
||||
let agent = self.agent.clone();
|
||||
let bg_tx = self.bg_tx.clone();
|
||||
let shared = self.shared.clone();
|
||||
shared.lock().unwrap().finetune_progress = "scoring...".into();
|
||||
tokio::spawn(async move {
|
||||
let activity = crate::agent::start_activity(&agent, "finetune: scoring...").await;
|
||||
|
||||
let (context, client) = {
|
||||
let ctx = agent.context.lock().await;
|
||||
(ctx.clone(), agent.client.clone())
|
||||
};
|
||||
// Min divergence 0.1 = only keep responses that differ meaningfully
|
||||
match learn::score_finetune_candidates(&context, 20, &client, 0.1).await {
|
||||
Ok(candidates) => {
|
||||
dbglog!("[finetune] found {} candidates", candidates.len());
|
||||
|
||||
let entries = context.conversation();
|
||||
let score_count = entries.len() / 2;
|
||||
let range_start = entries.len() - score_count;
|
||||
let responses_considered: usize = entries[range_start..].iter()
|
||||
.filter(|n| matches!(n, crate::agent::context::AstNode::Branch { role: crate::agent::context::Role::Assistant, .. }))
|
||||
.count();
|
||||
|
||||
activity.update(format!("finetune: scoring {} responses...", responses_considered)).await;
|
||||
|
||||
let stats = match learn::score_finetune_candidates(
|
||||
&context, score_count, &client, threshold,
|
||||
).await {
|
||||
Ok((candidates, max_div)) => {
|
||||
let above_threshold = candidates.len();
|
||||
let _ = bg_tx.send(BgEvent::FinetuneCandidates(candidates));
|
||||
FinetuneScoringStats {
|
||||
responses_considered,
|
||||
above_threshold,
|
||||
threshold,
|
||||
max_divergence: max_div,
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
dbglog!("[finetune] scoring FAILED: {:#}", e);
|
||||
}
|
||||
}
|
||||
shared.lock().unwrap().finetune_progress.clear();
|
||||
Err(e) => FinetuneScoringStats {
|
||||
responses_considered,
|
||||
above_threshold: 0,
|
||||
threshold,
|
||||
max_divergence: 0.0,
|
||||
error: Some(format!("{}", e)),
|
||||
},
|
||||
};
|
||||
|
||||
shared.lock().unwrap().finetune_last_run = Some(stats);
|
||||
// activity drops here, marking completion and notifying observers
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -706,6 +770,12 @@ impl Mind {
|
|||
let mut bg_rx = self.bg_rx.lock().unwrap().take()
|
||||
.expect("Mind::run() called twice");
|
||||
let mut sub_handle: Option<tokio::task::JoinHandle<()>> = None;
|
||||
|
||||
// Start finetune scoring at startup (scores existing conversation)
|
||||
if !self.config.no_agents {
|
||||
self.start_finetune_scoring();
|
||||
}
|
||||
|
||||
loop {
|
||||
let (timeout, has_input) = {
|
||||
let me = self.shared.lock().unwrap();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue