F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-16 00:31:39 -04:00
parent 2c6a5c0f4a
commit 50b7b3a33a
4 changed files with 557 additions and 3 deletions

View file

@ -147,6 +147,10 @@ pub struct MindState {
pub unc_idle: bool, pub unc_idle: bool,
/// When the unconscious idle timer will fire (for UI display). /// When the unconscious idle timer will fire (for UI display).
pub unc_idle_deadline: Instant, pub unc_idle_deadline: Instant,
/// Fine-tuning candidates identified by scoring.
pub finetune_candidates: Vec<learn::FinetuneCandidate>,
/// Fine-tune scoring progress (empty = not running).
pub finetune_progress: String,
} }
impl Clone for MindState { impl Clone for MindState {
@ -165,6 +169,8 @@ impl Clone for MindState {
turn_handle: None, // Not cloned — only Mind's loop uses this turn_handle: None, // Not cloned — only Mind's loop uses this
unc_idle: self.unc_idle, unc_idle: self.unc_idle,
unc_idle_deadline: self.unc_idle_deadline, unc_idle_deadline: self.unc_idle_deadline,
finetune_candidates: self.finetune_candidates.clone(),
finetune_progress: self.finetune_progress.clone(),
} }
} }
} }
@ -177,6 +183,8 @@ pub enum MindCommand {
Score, Score,
/// Run full N×M memory scoring matrix (/score command) /// Run full N×M memory scoring matrix (/score command)
ScoreFull, ScoreFull,
/// Score for finetune candidates
ScoreFinetune,
/// Abort current turn, kill processes /// Abort current turn, kill processes
Interrupt, Interrupt,
/// Reset session /// Reset session
@ -202,6 +210,8 @@ impl MindState {
turn_handle: None, turn_handle: None,
unc_idle: false, unc_idle: false,
unc_idle_deadline: Instant::now() + std::time::Duration::from_secs(60), unc_idle_deadline: Instant::now() + std::time::Duration::from_secs(60),
finetune_candidates: Vec::new(),
finetune_progress: String::new(),
} }
} }
@ -288,6 +298,7 @@ impl MindState {
/// Background task completion events. /// Background task completion events.
enum BgEvent { enum BgEvent {
ScoringDone, ScoringDone,
FinetuneCandidates(Vec<learn::FinetuneCandidate>),
} }
// --- Mind: cognitive state machine --- // --- Mind: cognitive state machine ---
@ -529,6 +540,9 @@ impl Mind {
} }
self.agent.compact().await; self.agent.compact().await;
} }
MindCommand::ScoreFinetune => {
self.start_finetune_scoring();
}
} }
} }
} }
@ -603,6 +617,31 @@ impl Mind {
}); });
} }
/// Score responses for fine-tuning candidates.
pub fn start_finetune_scoring(&self) {
let agent = self.agent.clone();
let bg_tx = self.bg_tx.clone();
let shared = self.shared.clone();
shared.lock().unwrap().finetune_progress = "scoring...".into();
tokio::spawn(async move {
let (context, client) = {
let ctx = agent.context.lock().await;
(ctx.clone(), agent.client.clone())
};
// Min divergence 0.1 = only keep responses that differ meaningfully
match learn::score_finetune_candidates(&context, 20, &client, 0.1).await {
Ok(candidates) => {
dbglog!("[finetune] found {} candidates", candidates.len());
let _ = bg_tx.send(BgEvent::FinetuneCandidates(candidates));
}
Err(e) => {
dbglog!("[finetune] scoring FAILED: {:#}", e);
}
}
shared.lock().unwrap().finetune_progress.clear();
});
}
async fn start_turn(&self, text: &str, target: StreamTarget) { async fn start_turn(&self, text: &str, target: StreamTarget) {
{ {
match target { match target {
@ -692,6 +731,9 @@ impl Mind {
BgEvent::ScoringDone => { BgEvent::ScoringDone => {
self.shared.lock().unwrap().scoring_in_flight = false; self.shared.lock().unwrap().scoring_in_flight = false;
} }
BgEvent::FinetuneCandidates(candidates) => {
self.shared.lock().unwrap().finetune_candidates = candidates;
}
} }
} }
@ -711,6 +753,7 @@ impl Mind {
cmds.push(MindCommand::Compact); cmds.push(MindCommand::Compact);
if !self.config.no_agents { if !self.config.no_agents {
cmds.push(MindCommand::Score); cmds.push(MindCommand::Score);
cmds.push(MindCommand::ScoreFinetune);
} }
} }

View file

@ -16,6 +16,7 @@
use crate::agent::api::ApiClient; use crate::agent::api::ApiClient;
use crate::agent::context::{AstNode, Ast, NodeBody, ContextState, Role}; use crate::agent::context::{AstNode, Ast, NodeBody, ContextState, Role};
use crate::agent::tokenizer;
const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300); const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
@ -452,3 +453,198 @@ pub async fn score_finetune(
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
Ok(results) Ok(results)
} }
/// Enriched finetune candidate with context for review.
#[derive(Clone, Debug)]
pub struct FinetuneCandidate {
pub entry_idx: usize,
pub divergence: f64,
pub response_text: String,
/// Token IDs for context (everything before the response).
pub context_ids: Vec<u32>,
/// Token IDs for the response (what we're training on).
pub continuation_ids: Vec<u32>,
/// What the model would have said without memories (if generated).
pub alternate_text: Option<String>,
/// Timestamp in millis for tracking trained status.
pub timestamp_ms: i64,
}
/// Score and enrich finetune candidates with full context.
///
/// Returns candidates ready for review, with context/continuation token IDs
/// already computed for sending to /finetune.
pub async fn score_finetune_candidates(
context: &ContextState,
count: usize,
client: &ApiClient,
min_divergence: f64,
) -> anyhow::Result<Vec<FinetuneCandidate>> {
let scores = score_finetune(context, count, client).await?;
let entries = context.conversation();
let mut candidates = Vec::new();
let trained = load_trained();
for (entry_idx, divergence) in scores {
if divergence < min_divergence {
continue;
}
let node = &entries[entry_idx];
// Get timestamp and skip if already trained
let timestamp_ms = match node_timestamp_ms(node) {
Some(ts) => {
if trained.contains(&ts) {
continue; // Already trained, skip
}
ts
}
None => continue, // No timestamp, skip
};
// Extract response text
let response_text = match node {
AstNode::Branch { children, .. } => {
children.iter()
.filter_map(|c| match c {
AstNode::Leaf(leaf) => Some(leaf.body().text().to_string()),
_ => None,
})
.collect::<Vec<_>>()
.join("")
}
_ => continue,
};
// Build token IDs: context = everything before response, continuation = response
let context_ids = build_token_ids(context, 0..entry_idx, Filter::None);
let continuation_ids: Vec<u32> = node.token_ids().into_iter().collect();
candidates.push(FinetuneCandidate {
entry_idx,
divergence,
response_text,
context_ids,
continuation_ids,
alternate_text: None,
timestamp_ms,
});
}
// Generate alternates if enabled
if alternates_enabled() && !candidates.is_empty() {
for candidate in &mut candidates {
match generate_alternate(context, candidate.entry_idx, client).await {
Ok(text) => candidate.alternate_text = Some(text),
Err(e) => dbglog!("[finetune] alternate generation failed: {:#}", e),
}
}
}
Ok(candidates)
}
/// Generate what the model would say without memories for a given entry.
async fn generate_alternate(
context: &ContextState,
entry_idx: usize,
client: &ApiClient,
) -> anyhow::Result<String> {
use crate::agent::api::{SamplingParams, StreamToken};
// Build context tokens without memories, up to the response
let mut prompt = build_token_ids(context, 0..entry_idx, Filter::SkipAllMemories);
// Add assistant turn start
prompt.push(tokenizer::IM_START);
prompt.extend(tokenizer::encode("assistant\n"));
// Generate completion
let sampling = SamplingParams {
temperature: 0.6,
top_p: 0.95,
top_k: 20,
};
let (mut rx, _guard) = client.stream_completion(&prompt, sampling, Some(-5));
let mut tokens = Vec::new();
while let Some(tok) = rx.recv().await {
match tok {
StreamToken::Token(id) => tokens.push(id),
StreamToken::Done { .. } => break,
StreamToken::Error(e) => anyhow::bail!("generation error: {}", e),
}
}
Ok(tokenizer::decode(&tokens))
}
// ── Finetune config and persistence ─────────────────────────────
use std::path::PathBuf;
use std::collections::HashSet;
const FINETUNE_ALTERNATES_FILE: &str = ".consciousness/cache/finetune-alternates";
const TRAINED_RESPONSES_FILE: &str = ".consciousness/cache/trained-responses.json";
fn alternates_path() -> PathBuf {
dirs::home_dir().unwrap_or_default().join(FINETUNE_ALTERNATES_FILE)
}
fn trained_path() -> PathBuf {
dirs::home_dir().unwrap_or_default().join(TRAINED_RESPONSES_FILE)
}
/// Check if alternate response generation is enabled.
pub fn alternates_enabled() -> bool {
alternates_path().exists()
}
/// Toggle alternate response generation and persist the setting.
pub fn set_alternates(enabled: bool) {
let path = alternates_path();
if enabled {
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let _ = std::fs::write(&path, "");
} else {
let _ = std::fs::remove_file(&path);
}
}
/// Load set of trained response timestamps (millis since epoch).
pub fn load_trained() -> HashSet<i64> {
let path = trained_path();
match std::fs::read_to_string(&path) {
Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
Err(_) => HashSet::new(),
}
}
/// Mark a response as trained by its timestamp.
pub fn mark_trained(timestamp_ms: i64) {
let mut trained = load_trained();
trained.insert(timestamp_ms);
let path = trained_path();
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
if let Ok(json) = serde_json::to_string(&trained) {
let _ = std::fs::write(&path, json);
}
}
/// Get timestamp in millis from an AstNode (for Branch, uses first child).
pub fn node_timestamp_ms(node: &AstNode) -> Option<i64> {
let ts = match node {
AstNode::Leaf(leaf) => leaf.timestamp(),
AstNode::Branch { children, .. } => {
children.first()?.leaf()?.timestamp()
}
}?;
Some(ts.timestamp_millis())
}

264
src/user/learn.rs Normal file
View file

@ -0,0 +1,264 @@
// learn.rs — F6: fine-tuning review screen
//
// Shows responses identified as training candidates (high divergence
// when memories stripped). Queue for review before sending to /finetune.
use ratatui::{
layout::{Constraint, Layout, Rect},
style::{Color, Modifier, Style},
text::{Line, Span},
widgets::{Block, Borders, List, ListItem, ListState, Paragraph, Wrap},
Frame,
};
use ratatui::crossterm::event::{Event, KeyCode, KeyEvent};
use super::{App, ScreenView, screen_legend};
/// A candidate response identified for fine-tuning.
#[derive(Clone, Debug)]
pub struct FinetuneCandidate {
/// Index in conversation entries.
pub entry_idx: usize,
/// Divergence score (higher = more dependent on memories).
pub divergence: f64,
/// The assistant response text.
pub response_text: String,
/// Status: pending, approved, rejected, sent.
pub status: CandidateStatus,
/// Token IDs for context.
pub context_ids: Vec<u32>,
/// Token IDs for continuation (what we're training on).
pub continuation_ids: Vec<u32>,
/// What the model would have said without memories (if generated).
pub alternate_text: Option<String>,
/// Timestamp in millis for tracking trained status.
pub timestamp_ms: i64,
}
#[derive(Clone, Debug, PartialEq)]
pub enum CandidateStatus {
Pending,
Approved,
Rejected,
Sent,
}
impl From<crate::subconscious::learn::FinetuneCandidate> for FinetuneCandidate {
fn from(c: crate::subconscious::learn::FinetuneCandidate) -> Self {
FinetuneCandidate {
entry_idx: c.entry_idx,
divergence: c.divergence,
response_text: c.response_text,
status: CandidateStatus::Pending,
context_ids: c.context_ids,
continuation_ids: c.continuation_ids,
alternate_text: c.alternate_text,
timestamp_ms: c.timestamp_ms,
}
}
}
pub(crate) struct LearnScreen {
list_state: ListState,
}
impl LearnScreen {
pub fn new() -> Self {
Self {
list_state: ListState::default(),
}
}
fn selected_idx(&self) -> Option<usize> {
self.list_state.selected()
}
}
impl ScreenView for LearnScreen {
fn label(&self) -> &'static str { "learn" }
fn tick(&mut self, frame: &mut Frame, area: Rect,
events: &[Event], app: &mut App) {
// Handle input first (before borrowing candidates for rendering)
let candidate_count = app.finetune_candidates.len();
for event in events {
if let Event::Key(KeyEvent { code, .. }) = event {
match code {
KeyCode::Up | KeyCode::Char('k') => {
let i = self.list_state.selected().unwrap_or(0);
self.list_state.select(Some(i.saturating_sub(1)));
}
KeyCode::Down | KeyCode::Char('j') => {
let i = self.list_state.selected().unwrap_or(0);
let max = candidate_count.saturating_sub(1);
self.list_state.select(Some((i + 1).min(max)));
}
KeyCode::Char('a') => {
if let Some(idx) = self.selected_idx() {
app.finetune_action(idx, CandidateStatus::Approved);
}
}
KeyCode::Char('r') => {
if let Some(idx) = self.selected_idx() {
app.finetune_action(idx, CandidateStatus::Rejected);
}
}
KeyCode::Char('g') => {
// Toggle alternate generation and persist
let current = crate::subconscious::learn::alternates_enabled();
crate::subconscious::learn::set_alternates(!current);
}
KeyCode::Char('s') => {
app.finetune_send_approved();
}
_ => {}
}
}
}
// Ensure selection is valid
if candidate_count > 0 {
let sel = self.list_state.selected().unwrap_or(0).min(candidate_count - 1);
self.list_state.select(Some(sel));
}
// Get scoring progress from mind state
let progress = app.mind_state.as_ref()
.map(|ms| ms.finetune_progress.as_str())
.unwrap_or("");
// Now render
let gen_on = crate::subconscious::learn::alternates_enabled();
let title_right = if !progress.is_empty() {
format!(" {} ", progress)
} else if gen_on {
" learn [gen] ".to_string()
} else {
" learn ".to_string()
};
let block = Block::default()
.title_top(Line::from(screen_legend()).left_aligned())
.title_top(Line::from(title_right).right_aligned())
.borders(Borders::ALL)
.border_style(Style::default().fg(Color::Magenta));
let inner = block.inner(area);
frame.render_widget(block, area);
let candidates = &app.finetune_candidates;
if candidates.is_empty() {
let msg = if progress.is_empty() {
" No candidates yet — scoring runs after each turn."
} else {
" Scoring in progress..."
};
frame.render_widget(
Paragraph::new(Line::styled(msg, Style::default().fg(Color::DarkGray))),
inner,
);
return;
}
// Layout: list on left, detail on right
let [list_area, detail_area] = Layout::horizontal([
Constraint::Percentage(40),
Constraint::Percentage(60),
]).areas(inner);
// Render candidate list
let items: Vec<ListItem> = candidates.iter().map(|c| {
let status_char = match c.status {
CandidateStatus::Pending => ' ',
CandidateStatus::Approved => '+',
CandidateStatus::Rejected => '-',
CandidateStatus::Sent => '*',
};
let style = match c.status {
CandidateStatus::Pending => Style::default(),
CandidateStatus::Approved => Style::default().fg(Color::Green),
CandidateStatus::Rejected => Style::default().fg(Color::DarkGray),
CandidateStatus::Sent => Style::default().fg(Color::Cyan),
};
ListItem::new(Line::from(vec![
Span::styled(format!("[{}] ", status_char), style),
Span::styled(format!("{:.2} ", c.divergence), Style::default().fg(Color::Yellow)),
Span::raw(truncate(&c.response_text, 30)),
]))
}).collect();
let list = List::new(items)
.block(Block::default().borders(Borders::RIGHT).title(" candidates "))
.highlight_style(Style::default().add_modifier(Modifier::REVERSED));
frame.render_stateful_widget(list, list_area, &mut self.list_state);
// Render detail for selected candidate
if let Some(idx) = self.selected_idx() {
if let Some(candidate) = candidates.get(idx) {
render_detail(frame, candidate, detail_area);
}
}
// Render help at bottom
let help = Line::from(vec![
Span::styled(" j/k/\u{2191}\u{2193}", Style::default().fg(Color::Cyan)),
Span::raw("=nav "),
Span::styled("a", Style::default().fg(Color::Green)),
Span::raw("=approve "),
Span::styled("r", Style::default().fg(Color::Red)),
Span::raw("=reject "),
Span::styled("g", Style::default().fg(Color::Yellow)),
Span::raw("=gen "),
Span::styled("s", Style::default().fg(Color::Magenta)),
Span::raw("=send "),
]);
let help_area = Rect {
y: area.y + area.height - 1,
height: 1,
..area
};
frame.render_widget(Paragraph::new(help), help_area);
}
}
fn render_detail(frame: &mut Frame, c: &FinetuneCandidate, area: Rect) {
let [header_area, content_area] = Layout::vertical([
Constraint::Length(3),
Constraint::Min(1),
]).areas(area);
// Header: divergence, status
let alt_status = if c.alternate_text.is_some() { "yes" } else { "no" };
let header = Paragraph::new(vec![
Line::from(vec![
Span::raw(" divergence: "),
Span::styled(format!("{:.3}", c.divergence), Style::default().fg(Color::Yellow)),
Span::raw(format!(" entry: {} alt: {}", c.entry_idx, alt_status)),
]),
]);
frame.render_widget(header, header_area);
// Content: response and alternate (if available)
let content_block = Block::default()
.borders(Borders::TOP)
.title(" response ");
let text = match &c.alternate_text {
Some(alt) => format!(" {}\n\n─── without memories ───\n\n {}", c.response_text, alt),
None => format!(" {}", c.response_text),
};
let content = Paragraph::new(text)
.block(content_block)
.wrap(Wrap { trim: false });
frame.render_widget(content, content_area);
}
fn truncate(s: &str, max: usize) -> String {
let first_line = s.lines().next().unwrap_or("");
if first_line.len() > max {
format!("{}...", &first_line[..max])
} else {
first_line.to_string()
}
}

View file

@ -5,11 +5,12 @@
pub(crate) mod chat; pub(crate) mod chat;
mod context; mod context;
pub(crate) mod learn;
pub(crate) mod scroll_pane; pub(crate) mod scroll_pane;
pub mod selectable; pub mod selectable;
mod subconscious; mod subconscious;
mod unconscious;
mod thalamus; mod thalamus;
mod unconscious;
mod widgets; mod widgets;
use anyhow::Result; use anyhow::Result;
@ -121,6 +122,8 @@ struct App {
walked_count: usize, walked_count: usize,
channel_status: Vec<ChannelStatus>, channel_status: Vec<ChannelStatus>,
idle_info: Option<IdleInfo>, idle_info: Option<IdleInfo>,
/// Fine-tuning candidates pending review.
finetune_candidates: Vec<learn::FinetuneCandidate>,
} }
impl App { impl App {
@ -151,6 +154,24 @@ impl App {
rebuild_tools_pending: false, rebuild_tools_pending: false,
walked_count: 0, walked_count: 0,
channel_status: Vec::new(), idle_info: None, channel_status: Vec::new(), idle_info: None,
finetune_candidates: Vec::new(),
}
}
fn finetune_action(&mut self, idx: usize, status: learn::CandidateStatus) {
if let Some(candidate) = self.finetune_candidates.get_mut(idx) {
candidate.status = status;
}
}
fn finetune_send_approved(&mut self) {
// TODO: Send approved candidates to /finetune endpoint
// For now, just mark them as sent and record as trained
for candidate in &mut self.finetune_candidates {
if candidate.status == learn::CandidateStatus::Approved {
crate::subconscious::learn::mark_trained(candidate.timestamp_ms);
candidate.status = learn::CandidateStatus::Sent;
}
} }
} }
@ -334,7 +355,7 @@ async fn run(
} }
let notify_rx = crate::thalamus::channels::subscribe_all(); let notify_rx = crate::thalamus::channels::subscribe_all();
// F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus // F1=chat, F2=conscious, F3=subconscious, F4=unconscious, F5=thalamus, F6=learn
let mut screens: Vec<Box<dyn tui::ScreenView>> = vec![ let mut screens: Vec<Box<dyn tui::ScreenView>> = vec![
Box::new(crate::user::chat::InteractScreen::new( Box::new(crate::user::chat::InteractScreen::new(
mind.agent.clone(), mind.shared.clone(), mind_tx.clone(), mind.agent.clone(), mind.shared.clone(), mind_tx.clone(),
@ -343,6 +364,7 @@ async fn run(
Box::new(crate::user::subconscious::SubconsciousScreen::new()), Box::new(crate::user::subconscious::SubconsciousScreen::new()),
Box::new(crate::user::unconscious::UnconsciousScreen::new()), Box::new(crate::user::unconscious::UnconsciousScreen::new()),
Box::new(crate::user::thalamus::ThalamusScreen::new()), Box::new(crate::user::thalamus::ThalamusScreen::new()),
Box::new(crate::user::learn::LearnScreen::new()),
]; ];
let mut active_screen: usize = 1; // F-key number let mut active_screen: usize = 1; // F-key number
tui::set_screen_legend(tui::screen_legend_from(&*screens)); tui::set_screen_legend(tui::screen_legend_from(&*screens));
@ -433,7 +455,36 @@ async fn run(
}; };
app.unconscious_state = unc.snapshots(store_guard.as_deref()); app.unconscious_state = unc.snapshots(store_guard.as_deref());
app.graph_health = unc.graph_health.clone(); app.graph_health = unc.graph_health.clone();
app.mind_state = Some(mind.shared.lock().unwrap().clone()); let ms = mind.shared.lock().unwrap();
// Sync finetune candidates: add new ones, keep existing (preserves approval status)
// Remove sent candidates (already trained, no need to keep)
// Keep only 10 most recent rejected candidates
app.finetune_candidates.retain(|c| c.status != learn::CandidateStatus::Sent);
for c in &ms.finetune_candidates {
let exists = app.finetune_candidates.iter()
.any(|existing| existing.timestamp_ms == c.timestamp_ms);
if !exists {
app.finetune_candidates.push(learn::FinetuneCandidate::from(c.clone()));
}
}
// Limit rejected candidates to 10 most recent
let mut rejected: Vec<_> = app.finetune_candidates.iter()
.enumerate()
.filter(|(_, c)| c.status == learn::CandidateStatus::Rejected)
.map(|(i, c)| (i, c.timestamp_ms))
.collect();
if rejected.len() > 10 {
rejected.sort_by_key(|(_, ts)| std::cmp::Reverse(*ts));
let to_remove: std::collections::HashSet<_> = rejected[10..]
.iter().map(|(i, _)| *i).collect();
let mut idx = 0;
app.finetune_candidates.retain(|_| {
let keep = !to_remove.contains(&idx);
idx += 1;
keep
});
}
app.mind_state = Some(ms.clone());
} }
app.walked_count = mind.subconscious_walked().await.len(); app.walked_count = mind.subconscious_walked().await.len();
if !startup_done { if !startup_done {