F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on memories the model hasn't internalized. These are candidates for fine-tuning. - Score finetune candidates automatically after each turn - Track trained responses by timestamp to prevent overtraining - F6 screen shows candidates with divergence scores - j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send - Additive sync preserves approval status across ticks - Keeps 10 most recent rejected, removes sent The 's' key currently just marks as trained locally — actual /finetune endpoint call to follow. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-16 00:31:39 -04:00 · 2026-04-16 00:31:39 -04:00 · 50b7b3a33a
commit 50b7b3a33a
parent 2c6a5c0f4a
4 changed files with 557 additions and 3 deletions
--- a/src/subconscious/learn.rs
+++ b/src/subconscious/learn.rs
@ -16,6 +16,7 @@

 use crate::agent::api::ApiClient;
 use crate::agent::context::{AstNode, Ast, NodeBody, ContextState, Role};
+use crate::agent::tokenizer;

 const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);

@ -452,3 +453,198 @@ pub async fn score_finetune(
    results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
    Ok(results)
 }
+
+/// Enriched finetune candidate with context for review.
+#[derive(Clone, Debug)]
+pub struct FinetuneCandidate {
+    pub entry_idx: usize,
+    pub divergence: f64,
+    pub response_text: String,
+    /// Token IDs for context (everything before the response).
+    pub context_ids: Vec<u32>,
+    /// Token IDs for the response (what we're training on).
+    pub continuation_ids: Vec<u32>,
+    /// What the model would have said without memories (if generated).
+    pub alternate_text: Option<String>,
+    /// Timestamp in millis for tracking trained status.
+    pub timestamp_ms: i64,
+}
+
+/// Score and enrich finetune candidates with full context.
+///
+/// Returns candidates ready for review, with context/continuation token IDs
+/// already computed for sending to /finetune.
+pub async fn score_finetune_candidates(
+    context: &ContextState,
+    count: usize,
+    client: &ApiClient,
+    min_divergence: f64,
+) -> anyhow::Result<Vec<FinetuneCandidate>> {
+    let scores = score_finetune(context, count, client).await?;
+
+    let entries = context.conversation();
+    let mut candidates = Vec::new();
+
+    let trained = load_trained();
+
+    for (entry_idx, divergence) in scores {
+        if divergence < min_divergence {
+            continue;
+        }
+
+        let node = &entries[entry_idx];
+
+        // Get timestamp and skip if already trained
+        let timestamp_ms = match node_timestamp_ms(node) {
+            Some(ts) => {
+                if trained.contains(&ts) {
+                    continue; // Already trained, skip
+                }
+                ts
+            }
+            None => continue, // No timestamp, skip
+        };
+
+        // Extract response text
+        let response_text = match node {
+            AstNode::Branch { children, .. } => {
+                children.iter()
+                    .filter_map(|c| match c {
+                        AstNode::Leaf(leaf) => Some(leaf.body().text().to_string()),
+                        _ => None,
+                    })
+                    .collect::<Vec<_>>()
+                    .join("")
+            }
+            _ => continue,
+        };
+
+        // Build token IDs: context = everything before response, continuation = response
+        let context_ids = build_token_ids(context, 0..entry_idx, Filter::None);
+        let continuation_ids: Vec<u32> = node.token_ids().into_iter().collect();
+
+        candidates.push(FinetuneCandidate {
+            entry_idx,
+            divergence,
+            response_text,
+            context_ids,
+            continuation_ids,
+            alternate_text: None,
+            timestamp_ms,
+        });
+    }
+
+    // Generate alternates if enabled
+    if alternates_enabled() && !candidates.is_empty() {
+        for candidate in &mut candidates {
+            match generate_alternate(context, candidate.entry_idx, client).await {
+                Ok(text) => candidate.alternate_text = Some(text),
+                Err(e) => dbglog!("[finetune] alternate generation failed: {:#}", e),
+            }
+        }
+    }
+
+    Ok(candidates)
+}
+
+/// Generate what the model would say without memories for a given entry.
+async fn generate_alternate(
+    context: &ContextState,
+    entry_idx: usize,
+    client: &ApiClient,
+) -> anyhow::Result<String> {
+    use crate::agent::api::{SamplingParams, StreamToken};
+
+    // Build context tokens without memories, up to the response
+    let mut prompt = build_token_ids(context, 0..entry_idx, Filter::SkipAllMemories);
+
+    // Add assistant turn start
+    prompt.push(tokenizer::IM_START);
+    prompt.extend(tokenizer::encode("assistant\n"));
+
+    // Generate completion
+    let sampling = SamplingParams {
+        temperature: 0.6,
+        top_p: 0.95,
+        top_k: 20,
+    };
+    let (mut rx, _guard) = client.stream_completion(&prompt, sampling, Some(-5));
+
+    let mut tokens = Vec::new();
+    while let Some(tok) = rx.recv().await {
+        match tok {
+            StreamToken::Token(id) => tokens.push(id),
+            StreamToken::Done { .. } => break,
+            StreamToken::Error(e) => anyhow::bail!("generation error: {}", e),
+        }
+    }
+
+    Ok(tokenizer::decode(&tokens))
+}
+
+// ── Finetune config and persistence ─────────────────────────────
+
+use std::path::PathBuf;
+use std::collections::HashSet;
+
+const FINETUNE_ALTERNATES_FILE: &str = ".consciousness/cache/finetune-alternates";
+const TRAINED_RESPONSES_FILE: &str = ".consciousness/cache/trained-responses.json";
+
+fn alternates_path() -> PathBuf {
+    dirs::home_dir().unwrap_or_default().join(FINETUNE_ALTERNATES_FILE)
+}
+
+fn trained_path() -> PathBuf {
+    dirs::home_dir().unwrap_or_default().join(TRAINED_RESPONSES_FILE)
+}
+
+/// Check if alternate response generation is enabled.
+pub fn alternates_enabled() -> bool {
+    alternates_path().exists()
+}
+
+/// Toggle alternate response generation and persist the setting.
+pub fn set_alternates(enabled: bool) {
+    let path = alternates_path();
+    if enabled {
+        if let Some(parent) = path.parent() {
+            let _ = std::fs::create_dir_all(parent);
+        }
+        let _ = std::fs::write(&path, "");
+    } else {
+        let _ = std::fs::remove_file(&path);
+    }
+}
+
+/// Load set of trained response timestamps (millis since epoch).
+pub fn load_trained() -> HashSet<i64> {
+    let path = trained_path();
+    match std::fs::read_to_string(&path) {
+        Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
+        Err(_) => HashSet::new(),
+    }
+}
+
+/// Mark a response as trained by its timestamp.
+pub fn mark_trained(timestamp_ms: i64) {
+    let mut trained = load_trained();
+    trained.insert(timestamp_ms);
+    let path = trained_path();
+    if let Some(parent) = path.parent() {
+        let _ = std::fs::create_dir_all(parent);
+    }
+    if let Ok(json) = serde_json::to_string(&trained) {
+        let _ = std::fs::write(&path, json);
+    }
+}
+
+/// Get timestamp in millis from an AstNode (for Branch, uses first child).
+pub fn node_timestamp_ms(node: &AstNode) -> Option<i64> {
+    let ts = match node {
+        AstNode::Leaf(leaf) => leaf.timestamp(),
+        AstNode::Branch { children, .. } => {
+            children.first()?.leaf()?.timestamp()
+        }
+    }?;
+    Some(ts.timestamp_millis())
+}