F6 learn screen: fine-tuning candidate review
Wire up divergence scoring to identify responses that depend heavily on memories the model hasn't internalized. These are candidates for fine-tuning. - Score finetune candidates automatically after each turn - Track trained responses by timestamp to prevent overtraining - F6 screen shows candidates with divergence scores - j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send - Additive sync preserves approval status across ticks - Keeps 10 most recent rejected, removes sent The 's' key currently just marks as trained locally — actual /finetune endpoint call to follow. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
2c6a5c0f4a
commit
50b7b3a33a
4 changed files with 557 additions and 3 deletions
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
use crate::agent::api::ApiClient;
|
||||
use crate::agent::context::{AstNode, Ast, NodeBody, ContextState, Role};
|
||||
use crate::agent::tokenizer;
|
||||
|
||||
const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
|
||||
|
||||
|
|
@ -452,3 +453,198 @@ pub async fn score_finetune(
|
|||
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Enriched finetune candidate with context for review.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FinetuneCandidate {
|
||||
pub entry_idx: usize,
|
||||
pub divergence: f64,
|
||||
pub response_text: String,
|
||||
/// Token IDs for context (everything before the response).
|
||||
pub context_ids: Vec<u32>,
|
||||
/// Token IDs for the response (what we're training on).
|
||||
pub continuation_ids: Vec<u32>,
|
||||
/// What the model would have said without memories (if generated).
|
||||
pub alternate_text: Option<String>,
|
||||
/// Timestamp in millis for tracking trained status.
|
||||
pub timestamp_ms: i64,
|
||||
}
|
||||
|
||||
/// Score and enrich finetune candidates with full context.
|
||||
///
|
||||
/// Returns candidates ready for review, with context/continuation token IDs
|
||||
/// already computed for sending to /finetune.
|
||||
pub async fn score_finetune_candidates(
|
||||
context: &ContextState,
|
||||
count: usize,
|
||||
client: &ApiClient,
|
||||
min_divergence: f64,
|
||||
) -> anyhow::Result<Vec<FinetuneCandidate>> {
|
||||
let scores = score_finetune(context, count, client).await?;
|
||||
|
||||
let entries = context.conversation();
|
||||
let mut candidates = Vec::new();
|
||||
|
||||
let trained = load_trained();
|
||||
|
||||
for (entry_idx, divergence) in scores {
|
||||
if divergence < min_divergence {
|
||||
continue;
|
||||
}
|
||||
|
||||
let node = &entries[entry_idx];
|
||||
|
||||
// Get timestamp and skip if already trained
|
||||
let timestamp_ms = match node_timestamp_ms(node) {
|
||||
Some(ts) => {
|
||||
if trained.contains(&ts) {
|
||||
continue; // Already trained, skip
|
||||
}
|
||||
ts
|
||||
}
|
||||
None => continue, // No timestamp, skip
|
||||
};
|
||||
|
||||
// Extract response text
|
||||
let response_text = match node {
|
||||
AstNode::Branch { children, .. } => {
|
||||
children.iter()
|
||||
.filter_map(|c| match c {
|
||||
AstNode::Leaf(leaf) => Some(leaf.body().text().to_string()),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("")
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
// Build token IDs: context = everything before response, continuation = response
|
||||
let context_ids = build_token_ids(context, 0..entry_idx, Filter::None);
|
||||
let continuation_ids: Vec<u32> = node.token_ids().into_iter().collect();
|
||||
|
||||
candidates.push(FinetuneCandidate {
|
||||
entry_idx,
|
||||
divergence,
|
||||
response_text,
|
||||
context_ids,
|
||||
continuation_ids,
|
||||
alternate_text: None,
|
||||
timestamp_ms,
|
||||
});
|
||||
}
|
||||
|
||||
// Generate alternates if enabled
|
||||
if alternates_enabled() && !candidates.is_empty() {
|
||||
for candidate in &mut candidates {
|
||||
match generate_alternate(context, candidate.entry_idx, client).await {
|
||||
Ok(text) => candidate.alternate_text = Some(text),
|
||||
Err(e) => dbglog!("[finetune] alternate generation failed: {:#}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
/// Generate what the model would say without memories for a given entry.
|
||||
async fn generate_alternate(
|
||||
context: &ContextState,
|
||||
entry_idx: usize,
|
||||
client: &ApiClient,
|
||||
) -> anyhow::Result<String> {
|
||||
use crate::agent::api::{SamplingParams, StreamToken};
|
||||
|
||||
// Build context tokens without memories, up to the response
|
||||
let mut prompt = build_token_ids(context, 0..entry_idx, Filter::SkipAllMemories);
|
||||
|
||||
// Add assistant turn start
|
||||
prompt.push(tokenizer::IM_START);
|
||||
prompt.extend(tokenizer::encode("assistant\n"));
|
||||
|
||||
// Generate completion
|
||||
let sampling = SamplingParams {
|
||||
temperature: 0.6,
|
||||
top_p: 0.95,
|
||||
top_k: 20,
|
||||
};
|
||||
let (mut rx, _guard) = client.stream_completion(&prompt, sampling, Some(-5));
|
||||
|
||||
let mut tokens = Vec::new();
|
||||
while let Some(tok) = rx.recv().await {
|
||||
match tok {
|
||||
StreamToken::Token(id) => tokens.push(id),
|
||||
StreamToken::Done { .. } => break,
|
||||
StreamToken::Error(e) => anyhow::bail!("generation error: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tokenizer::decode(&tokens))
|
||||
}
|
||||
|
||||
// ── Finetune config and persistence ─────────────────────────────
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::collections::HashSet;
|
||||
|
||||
const FINETUNE_ALTERNATES_FILE: &str = ".consciousness/cache/finetune-alternates";
|
||||
const TRAINED_RESPONSES_FILE: &str = ".consciousness/cache/trained-responses.json";
|
||||
|
||||
fn alternates_path() -> PathBuf {
|
||||
dirs::home_dir().unwrap_or_default().join(FINETUNE_ALTERNATES_FILE)
|
||||
}
|
||||
|
||||
fn trained_path() -> PathBuf {
|
||||
dirs::home_dir().unwrap_or_default().join(TRAINED_RESPONSES_FILE)
|
||||
}
|
||||
|
||||
/// Check if alternate response generation is enabled.
|
||||
pub fn alternates_enabled() -> bool {
|
||||
alternates_path().exists()
|
||||
}
|
||||
|
||||
/// Toggle alternate response generation and persist the setting.
|
||||
pub fn set_alternates(enabled: bool) {
|
||||
let path = alternates_path();
|
||||
if enabled {
|
||||
if let Some(parent) = path.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
let _ = std::fs::write(&path, "");
|
||||
} else {
|
||||
let _ = std::fs::remove_file(&path);
|
||||
}
|
||||
}
|
||||
|
||||
/// Load set of trained response timestamps (millis since epoch).
|
||||
pub fn load_trained() -> HashSet<i64> {
|
||||
let path = trained_path();
|
||||
match std::fs::read_to_string(&path) {
|
||||
Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
|
||||
Err(_) => HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark a response as trained by its timestamp.
|
||||
pub fn mark_trained(timestamp_ms: i64) {
|
||||
let mut trained = load_trained();
|
||||
trained.insert(timestamp_ms);
|
||||
let path = trained_path();
|
||||
if let Some(parent) = path.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
if let Ok(json) = serde_json::to_string(&trained) {
|
||||
let _ = std::fs::write(&path, json);
|
||||
}
|
||||
}
|
||||
|
||||
/// Get timestamp in millis from an AstNode (for Branch, uses first child).
|
||||
pub fn node_timestamp_ms(node: &AstNode) -> Option<i64> {
|
||||
let ts = match node {
|
||||
AstNode::Leaf(leaf) => leaf.timestamp(),
|
||||
AstNode::Branch { children, .. } => {
|
||||
children.first()?.leaf()?.timestamp()
|
||||
}
|
||||
}?;
|
||||
Some(ts.timestamp_millis())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue