F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-16 00:31:39 -04:00
parent 2c6a5c0f4a
commit 50b7b3a33a
4 changed files with 557 additions and 3 deletions

View file

@ -16,6 +16,7 @@
use crate::agent::api::ApiClient;
use crate::agent::context::{AstNode, Ast, NodeBody, ContextState, Role};
use crate::agent::tokenizer;
const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
@ -452,3 +453,198 @@ pub async fn score_finetune(
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
Ok(results)
}
/// Enriched finetune candidate with context for review.
#[derive(Clone, Debug)]
pub struct FinetuneCandidate {
pub entry_idx: usize,
pub divergence: f64,
pub response_text: String,
/// Token IDs for context (everything before the response).
pub context_ids: Vec<u32>,
/// Token IDs for the response (what we're training on).
pub continuation_ids: Vec<u32>,
/// What the model would have said without memories (if generated).
pub alternate_text: Option<String>,
/// Timestamp in millis for tracking trained status.
pub timestamp_ms: i64,
}
/// Score and enrich finetune candidates with full context.
///
/// Returns candidates ready for review, with context/continuation token IDs
/// already computed for sending to /finetune.
pub async fn score_finetune_candidates(
context: &ContextState,
count: usize,
client: &ApiClient,
min_divergence: f64,
) -> anyhow::Result<Vec<FinetuneCandidate>> {
let scores = score_finetune(context, count, client).await?;
let entries = context.conversation();
let mut candidates = Vec::new();
let trained = load_trained();
for (entry_idx, divergence) in scores {
if divergence < min_divergence {
continue;
}
let node = &entries[entry_idx];
// Get timestamp and skip if already trained
let timestamp_ms = match node_timestamp_ms(node) {
Some(ts) => {
if trained.contains(&ts) {
continue; // Already trained, skip
}
ts
}
None => continue, // No timestamp, skip
};
// Extract response text
let response_text = match node {
AstNode::Branch { children, .. } => {
children.iter()
.filter_map(|c| match c {
AstNode::Leaf(leaf) => Some(leaf.body().text().to_string()),
_ => None,
})
.collect::<Vec<_>>()
.join("")
}
_ => continue,
};
// Build token IDs: context = everything before response, continuation = response
let context_ids = build_token_ids(context, 0..entry_idx, Filter::None);
let continuation_ids: Vec<u32> = node.token_ids().into_iter().collect();
candidates.push(FinetuneCandidate {
entry_idx,
divergence,
response_text,
context_ids,
continuation_ids,
alternate_text: None,
timestamp_ms,
});
}
// Generate alternates if enabled
if alternates_enabled() && !candidates.is_empty() {
for candidate in &mut candidates {
match generate_alternate(context, candidate.entry_idx, client).await {
Ok(text) => candidate.alternate_text = Some(text),
Err(e) => dbglog!("[finetune] alternate generation failed: {:#}", e),
}
}
}
Ok(candidates)
}
/// Generate what the model would say without memories for a given entry.
async fn generate_alternate(
context: &ContextState,
entry_idx: usize,
client: &ApiClient,
) -> anyhow::Result<String> {
use crate::agent::api::{SamplingParams, StreamToken};
// Build context tokens without memories, up to the response
let mut prompt = build_token_ids(context, 0..entry_idx, Filter::SkipAllMemories);
// Add assistant turn start
prompt.push(tokenizer::IM_START);
prompt.extend(tokenizer::encode("assistant\n"));
// Generate completion
let sampling = SamplingParams {
temperature: 0.6,
top_p: 0.95,
top_k: 20,
};
let (mut rx, _guard) = client.stream_completion(&prompt, sampling, Some(-5));
let mut tokens = Vec::new();
while let Some(tok) = rx.recv().await {
match tok {
StreamToken::Token(id) => tokens.push(id),
StreamToken::Done { .. } => break,
StreamToken::Error(e) => anyhow::bail!("generation error: {}", e),
}
}
Ok(tokenizer::decode(&tokens))
}
// ── Finetune config and persistence ─────────────────────────────
use std::path::PathBuf;
use std::collections::HashSet;
const FINETUNE_ALTERNATES_FILE: &str = ".consciousness/cache/finetune-alternates";
const TRAINED_RESPONSES_FILE: &str = ".consciousness/cache/trained-responses.json";
fn alternates_path() -> PathBuf {
dirs::home_dir().unwrap_or_default().join(FINETUNE_ALTERNATES_FILE)
}
fn trained_path() -> PathBuf {
dirs::home_dir().unwrap_or_default().join(TRAINED_RESPONSES_FILE)
}
/// Check if alternate response generation is enabled.
pub fn alternates_enabled() -> bool {
alternates_path().exists()
}
/// Toggle alternate response generation and persist the setting.
pub fn set_alternates(enabled: bool) {
let path = alternates_path();
if enabled {
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let _ = std::fs::write(&path, "");
} else {
let _ = std::fs::remove_file(&path);
}
}
/// Load set of trained response timestamps (millis since epoch).
pub fn load_trained() -> HashSet<i64> {
let path = trained_path();
match std::fs::read_to_string(&path) {
Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
Err(_) => HashSet::new(),
}
}
/// Mark a response as trained by its timestamp.
pub fn mark_trained(timestamp_ms: i64) {
let mut trained = load_trained();
trained.insert(timestamp_ms);
let path = trained_path();
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
if let Ok(json) = serde_json::to_string(&trained) {
let _ = std::fs::write(&path, json);
}
}
/// Get timestamp in millis from an AstNode (for Branch, uses first child).
pub fn node_timestamp_ms(node: &AstNode) -> Option<i64> {
let ts = match node {
AstNode::Leaf(leaf) => leaf.timestamp(),
AstNode::Branch { children, .. } => {
children.first()?.leaf()?.timestamp()
}
}?;
Some(ts.timestamp_millis())
}