F6 learn screen: fine-tuning candidate review
Wire up divergence scoring to identify responses that depend heavily on memories the model hasn't internalized. These are candidates for fine-tuning. - Score finetune candidates automatically after each turn - Track trained responses by timestamp to prevent overtraining - F6 screen shows candidates with divergence scores - j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send - Additive sync preserves approval status across ticks - Keeps 10 most recent rejected, removes sent The 's' key currently just marks as trained locally — actual /finetune endpoint call to follow. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
2c6a5c0f4a
commit
50b7b3a33a
4 changed files with 557 additions and 3 deletions
|
|
@ -147,6 +147,10 @@ pub struct MindState {
|
|||
pub unc_idle: bool,
|
||||
/// When the unconscious idle timer will fire (for UI display).
|
||||
pub unc_idle_deadline: Instant,
|
||||
/// Fine-tuning candidates identified by scoring.
|
||||
pub finetune_candidates: Vec<learn::FinetuneCandidate>,
|
||||
/// Fine-tune scoring progress (empty = not running).
|
||||
pub finetune_progress: String,
|
||||
}
|
||||
|
||||
impl Clone for MindState {
|
||||
|
|
@ -165,6 +169,8 @@ impl Clone for MindState {
|
|||
turn_handle: None, // Not cloned — only Mind's loop uses this
|
||||
unc_idle: self.unc_idle,
|
||||
unc_idle_deadline: self.unc_idle_deadline,
|
||||
finetune_candidates: self.finetune_candidates.clone(),
|
||||
finetune_progress: self.finetune_progress.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -177,6 +183,8 @@ pub enum MindCommand {
|
|||
Score,
|
||||
/// Run full N×M memory scoring matrix (/score command)
|
||||
ScoreFull,
|
||||
/// Score for finetune candidates
|
||||
ScoreFinetune,
|
||||
/// Abort current turn, kill processes
|
||||
Interrupt,
|
||||
/// Reset session
|
||||
|
|
@ -202,6 +210,8 @@ impl MindState {
|
|||
turn_handle: None,
|
||||
unc_idle: false,
|
||||
unc_idle_deadline: Instant::now() + std::time::Duration::from_secs(60),
|
||||
finetune_candidates: Vec::new(),
|
||||
finetune_progress: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -288,6 +298,7 @@ impl MindState {
|
|||
/// Background task completion events.
|
||||
enum BgEvent {
|
||||
ScoringDone,
|
||||
FinetuneCandidates(Vec<learn::FinetuneCandidate>),
|
||||
}
|
||||
|
||||
// --- Mind: cognitive state machine ---
|
||||
|
|
@ -529,6 +540,9 @@ impl Mind {
|
|||
}
|
||||
self.agent.compact().await;
|
||||
}
|
||||
MindCommand::ScoreFinetune => {
|
||||
self.start_finetune_scoring();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -603,6 +617,31 @@ impl Mind {
|
|||
});
|
||||
}
|
||||
|
||||
/// Score responses for fine-tuning candidates.
|
||||
pub fn start_finetune_scoring(&self) {
|
||||
let agent = self.agent.clone();
|
||||
let bg_tx = self.bg_tx.clone();
|
||||
let shared = self.shared.clone();
|
||||
shared.lock().unwrap().finetune_progress = "scoring...".into();
|
||||
tokio::spawn(async move {
|
||||
let (context, client) = {
|
||||
let ctx = agent.context.lock().await;
|
||||
(ctx.clone(), agent.client.clone())
|
||||
};
|
||||
// Min divergence 0.1 = only keep responses that differ meaningfully
|
||||
match learn::score_finetune_candidates(&context, 20, &client, 0.1).await {
|
||||
Ok(candidates) => {
|
||||
dbglog!("[finetune] found {} candidates", candidates.len());
|
||||
let _ = bg_tx.send(BgEvent::FinetuneCandidates(candidates));
|
||||
}
|
||||
Err(e) => {
|
||||
dbglog!("[finetune] scoring FAILED: {:#}", e);
|
||||
}
|
||||
}
|
||||
shared.lock().unwrap().finetune_progress.clear();
|
||||
});
|
||||
}
|
||||
|
||||
async fn start_turn(&self, text: &str, target: StreamTarget) {
|
||||
{
|
||||
match target {
|
||||
|
|
@ -692,6 +731,9 @@ impl Mind {
|
|||
BgEvent::ScoringDone => {
|
||||
self.shared.lock().unwrap().scoring_in_flight = false;
|
||||
}
|
||||
BgEvent::FinetuneCandidates(candidates) => {
|
||||
self.shared.lock().unwrap().finetune_candidates = candidates;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -711,6 +753,7 @@ impl Mind {
|
|||
cmds.push(MindCommand::Compact);
|
||||
if !self.config.no_agents {
|
||||
cmds.push(MindCommand::Score);
|
||||
cmds.push(MindCommand::ScoreFinetune);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue