F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-16 00:31:39 -04:00
parent 2c6a5c0f4a
commit 50b7b3a33a
4 changed files with 557 additions and 3 deletions

View file

@ -147,6 +147,10 @@ pub struct MindState {
pub unc_idle: bool,
/// When the unconscious idle timer will fire (for UI display).
pub unc_idle_deadline: Instant,
/// Fine-tuning candidates identified by scoring.
pub finetune_candidates: Vec<learn::FinetuneCandidate>,
/// Fine-tune scoring progress (empty = not running).
pub finetune_progress: String,
}
impl Clone for MindState {
@ -165,6 +169,8 @@ impl Clone for MindState {
turn_handle: None, // Not cloned — only Mind's loop uses this
unc_idle: self.unc_idle,
unc_idle_deadline: self.unc_idle_deadline,
finetune_candidates: self.finetune_candidates.clone(),
finetune_progress: self.finetune_progress.clone(),
}
}
}
@ -177,6 +183,8 @@ pub enum MindCommand {
Score,
/// Run full N×M memory scoring matrix (/score command)
ScoreFull,
/// Score for finetune candidates
ScoreFinetune,
/// Abort current turn, kill processes
Interrupt,
/// Reset session
@ -202,6 +210,8 @@ impl MindState {
turn_handle: None,
unc_idle: false,
unc_idle_deadline: Instant::now() + std::time::Duration::from_secs(60),
finetune_candidates: Vec::new(),
finetune_progress: String::new(),
}
}
@ -288,6 +298,7 @@ impl MindState {
/// Background task completion events.
enum BgEvent {
ScoringDone,
FinetuneCandidates(Vec<learn::FinetuneCandidate>),
}
// --- Mind: cognitive state machine ---
@ -529,6 +540,9 @@ impl Mind {
}
self.agent.compact().await;
}
MindCommand::ScoreFinetune => {
self.start_finetune_scoring();
}
}
}
}
@ -603,6 +617,31 @@ impl Mind {
});
}
/// Score responses for fine-tuning candidates.
pub fn start_finetune_scoring(&self) {
let agent = self.agent.clone();
let bg_tx = self.bg_tx.clone();
let shared = self.shared.clone();
shared.lock().unwrap().finetune_progress = "scoring...".into();
tokio::spawn(async move {
let (context, client) = {
let ctx = agent.context.lock().await;
(ctx.clone(), agent.client.clone())
};
// Min divergence 0.1 = only keep responses that differ meaningfully
match learn::score_finetune_candidates(&context, 20, &client, 0.1).await {
Ok(candidates) => {
dbglog!("[finetune] found {} candidates", candidates.len());
let _ = bg_tx.send(BgEvent::FinetuneCandidates(candidates));
}
Err(e) => {
dbglog!("[finetune] scoring FAILED: {:#}", e);
}
}
shared.lock().unwrap().finetune_progress.clear();
});
}
async fn start_turn(&self, text: &str, target: StreamTarget) {
{
match target {
@ -692,6 +731,9 @@ impl Mind {
BgEvent::ScoringDone => {
self.shared.lock().unwrap().scoring_in_flight = false;
}
BgEvent::FinetuneCandidates(candidates) => {
self.shared.lock().unwrap().finetune_candidates = candidates;
}
}
}
@ -711,6 +753,7 @@ impl Mind {
cmds.push(MindCommand::Compact);
if !self.config.no_agents {
cmds.push(MindCommand::Score);
cmds.push(MindCommand::ScoreFinetune);
}
}