consciousness/src/subconscious/learn.rs

// training.rs — Memory importance scoring via /v1/score
//
// Three scoring modes, all built on the same call_score() primitive:
//
// score_memories()  — Full N×M matrix (memories × responses) for the
//                     debug screen. Expensive: N+1 API calls.
//
// memory_score()    — Single memory importance. Scores the 50 messages
//                     after it was surfaced, with/without that memory.
//                     2 API calls.
//
// finetune_score()  — Identifies training candidates. Scores recent
//                     messages with all memories stripped. Responses
//                     with high divergence depend on memories the model
//                     hasn't internalized. 2 API calls.

use crate::agent::api::ApiClient;
use crate::agent::context::{AstNode, Ast, NodeBody, ContextState, Role};
use crate::agent::tokenizer;

const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);

// ── Message building ────────────────────────────────────────────

/// What to filter when building the message array for scoring.
#[allow(dead_code)]
enum Filter<'a> {
    None,
    SkipIndex(usize),
    SkipKey(&'a str),
    SkipAllMemories,
}

fn is_memory(node: &AstNode) -> bool {
    matches!(node, AstNode::Leaf(leaf) if matches!(leaf.body(), NodeBody::Memory { .. }))
}

fn memory_key(node: &AstNode) -> Option<&str> {
    match node {
        AstNode::Leaf(leaf) => match leaf.body() {
            NodeBody::Memory { key, .. } => Some(key),
            _ => None,
        },
        _ => None,
    }
}

fn is_assistant(node: &AstNode) -> bool {
    matches!(node, AstNode::Branch { role: Role::Assistant, .. })
}

/// Build a token ID array for a scoring call.
///
/// Includes all sections up to and including conversation entries in
/// `range`, with `filter` applied to conversation entries.
///
/// Returns (token_ids, assistant_ranges) where assistant_ranges are
/// (start, end) token positions for each assistant message.
fn build_token_ids(
    context: &ContextState,
    range: std::ops::Range<usize>,
    filter: Filter,
) -> (Vec<u32>, Vec<(usize, usize)>) {
    use crate::agent::context::Ast;
    let mut ids = Vec::new();
    let mut assistant_ranges = Vec::new();

    for node in context.system() {
        ids.extend(node.token_ids());
    }
    // Identity nodes can be filtered by key for scoring
    for node in context.identity() {
        let skip = match &filter {
            Filter::SkipKey(key) => memory_key(node) == Some(*key),
            Filter::SkipAllMemories => is_memory(node),
            _ => false,
        };
        if !skip {
            ids.extend(node.token_ids());
        }
    }
    for node in context.journal() {
        ids.extend(node.token_ids());
    }
    let entries = context.conversation();
    for i in range {
        let node = &entries[i];
        let skip = match &filter {
            Filter::None => false,
            Filter::SkipIndex(idx) => i == *idx,
            Filter::SkipKey(key) => memory_key(node) == Some(*key),
            Filter::SkipAllMemories => is_memory(node),
        };
        if skip { continue; }

        // Track assistant message boundaries
        let is_asst = is_assistant(node);
        let start = ids.len();
        ids.extend(node.token_ids());
        if is_asst {
            assistant_ranges.push((start, ids.len()));
        }
    }
    (ids, assistant_ranges)
}

// ── Score API ───────────────────────────────────────────────────

#[derive(serde::Deserialize)]
struct ScoreResult {
    total_logprob: f64,
}

#[derive(serde::Deserialize)]
struct ScoreResponse {
    scores: Vec<ScoreResult>,
}

fn http_client() -> crate::agent::api::http::HttpClient {
    crate::agent::api::http::HttpClient::builder()
        .timeout(SCORE_TIMEOUT)
        .build()
}

async fn call_score(
    http: &crate::agent::api::http::HttpClient,
    client: &ApiClient,
    prompt: &[u32],
    ranges: &[(usize, usize)],
    priority: Option<i32>,
) -> anyhow::Result<Vec<ScoreResult>> {
    let url = format!("{}/score", client.base_url());
    let auth = format!("Bearer {}", client.api_key());
    let mut body = serde_json::json!({
        "model": client.model,
        "prompt": prompt,
        "logprobs": 1,
    });
    if !ranges.is_empty() {
        body["score_ranges"] = serde_json::json!(ranges);
    }
    if let Some(p) = priority {
        body["priority"] = serde_json::json!(p);
    }
    let response = http
        .send_json("POST", &url, &[
            ("authorization", &auth),
        ], &body)
        .await?;

    let status = response.status();
    let body: serde_json::Value = response.json().await?;

    if !status.is_success() {
        let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("unknown error");
        anyhow::bail!("score API HTTP {}: {}", status, msg);
    }
    if let Some(err) = body.get("error").and_then(|e| e.as_str()) {
        anyhow::bail!("score API error: {}", err);
    }

    let result: ScoreResponse = serde_json::from_value(body)
        .map_err(|e| anyhow::anyhow!("failed to parse score response: {}", e))?;
    Ok(result.scores)
}

/// Compute per-position logprob divergence: how much worse the model
/// scores each response without something vs with it.
fn divergence(baseline: &[ScoreResult], without: &[ScoreResult]) -> Vec<f64> {
    baseline.iter().enumerate()
        .map(|(i, base)| {
            let without_lp = without.get(i).map(|s| s.total_logprob).unwrap_or(base.total_logprob);
            (base.total_logprob - without_lp).max(0.0)
        })
        .collect()
}

/// Score two message sets and return total divergence.
async fn score_divergence(
    http: &crate::agent::api::http::HttpClient,
    client: &ApiClient,
    context: &ContextState,
    range: std::ops::Range<usize>,
    filter: Filter<'_>,
    priority: Option<i32>,
) -> anyhow::Result<(Vec<f64>, Vec<ScoreResult>)> {
    let (baseline_tokens, baseline_ranges) = build_token_ids(context, range.clone(), Filter::None);
    let (without_tokens, without_ranges) = build_token_ids(context, range, filter);
    let baseline = call_score(http, client, &baseline_tokens, &baseline_ranges, priority).await?;
    let without = call_score(http, client, &without_tokens, &without_ranges, priority).await?;
    let divs = divergence(&baseline, &without);
    Ok((divs, baseline))
}

// ── Full matrix scoring (debug screen) ──────────────────────────

/// Score how important each memory is to the conversation (full matrix).
pub async fn score_memories(
    client: &ApiClient,
    agent: &std::sync::Arc<crate::agent::Agent>,
) -> anyhow::Result<()> {
    // Collect memory keys and response indices under a brief lock
    let (memory_keys, response_indices) = {
        let ctx = agent.context.lock().await;
        // Include identity nodes and conversation memories
        let mut keys: Vec<String> = ctx.identity().iter()
            .chain(ctx.conversation().iter())
            .filter_map(|node| memory_key(node).map(String::from))
            .collect();
        keys.dedup();
        let resp: Vec<usize> = ctx.conversation().iter().enumerate()
            .filter(|(_, node)| is_assistant(node))
            .map(|(i, _)| i)
            .collect();
        (keys, resp)
    };

    if memory_keys.is_empty() || response_indices.is_empty() {
        return Ok(());
    }

    let total = memory_keys.len();
    dbglog!("[scoring-full] starting: {} memories × {} responses",
        total, response_indices.len());

    let http = http_client();

    let activity = crate::agent::start_activity(agent, "scoring: baseline").await;
    let (baseline_tokens, baseline_ranges) = {
        let ctx = agent.context.lock().await;
        build_token_ids(&ctx, 0..ctx.conversation().len(), Filter::None)
    };
    let baseline = call_score(&http, client, &baseline_tokens, &baseline_ranges, Some(5)).await?;
    dbglog!("[scoring-full] baseline done ({} response scores)", baseline.len());

    for (mem_idx, key) in memory_keys.iter().enumerate() {
        activity.update(format!("scoring: {}/{}", mem_idx + 1, total)).await;
        dbglog!("[scoring-full] {}/{}: {}", mem_idx + 1, total, key);
        let (tokens, ranges) = {
            let ctx = agent.context.lock().await;
            build_token_ids(&ctx, 0..ctx.conversation().len(), Filter::SkipKey(key))
        };
        let row = match call_score(&http, client, &tokens, &ranges, Some(5)).await {
            Ok(without) => {
                let divs = divergence(&baseline, &without);
                let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
                dbglog!("[scoring-full] {}/{}: {} max_div={:.3}",
                    mem_idx + 1, total, key, max_div);
                divs
            }
            Err(e) => {
                dbglog!("[scoring-full] {}/{}: {} FAILED: {:#}",
                    mem_idx + 1, total, key, e);
                vec![0.0; baseline.len()]
            }
        };
        // Write this memory's scores to the live AST nodes
        {
            let mut ctx = agent.context.lock().await;
            let mut set_count = 0;

            for (resp_idx, &idx) in response_indices.iter().enumerate() {
                if idx >= ctx.conversation().len() { continue; }
                let node = &mut ctx.conversation_mut()[idx];
                if let AstNode::Branch {
                    role: Role::Assistant, memory_scores, ..
                } = node {
                    if let Some(&score) = row.get(resp_idx) {
                        if score > 0.01 {
                            memory_scores.insert(key.clone(), score);
                            set_count += 1;
                        } else {
                            memory_scores.remove(key.as_str());
                        }
                    }
                }
            }

            dbglog!("[scoring-full] {}/{} AST: set={}", mem_idx + 1, total, set_count);
        }
        agent.state.lock().await.changed.notify_one();
    }

    Ok(())
}

/// Find the entry index after `start` that contains the Nth assistant response.
/// Returns (end_index, true) if N responses were found, (entries.len(), false) if not.
fn nth_response_end(entries: &[AstNode], start: usize, n: usize) -> (usize, bool) {
    let mut count = 0;
    for i in start..entries.len() {
        if is_assistant(&entries[i]) {
            count += 1;
            if count >= n { return (i + 1, true); }
        }
    }
    (entries.len(), false)
}

// ── Single memory scoring ───────────────────────────────────────

/// Score how important a single memory is to the conversation.
///
/// Scores the 50 messages after the memory was surfaced — the window
/// where it could have influenced responses. Returns the sum of
/// divergence, or 0.0 if the memory isn't in the conversation.
pub async fn score_memory(
    context: &ContextState,
    key: &str,
    client: &ApiClient,
) -> anyhow::Result<f64> {
    const RESPONSE_WINDOW: usize = 50;

    let entries = context.conversation();
    let first_pos = match entries.iter().position(|node| memory_key(node) == Some(key)) {
        Some(p) => p,
        None => return Ok(0.0),
    };

    let (end, _) = nth_response_end(entries, first_pos, RESPONSE_WINDOW);
    let range = first_pos..end;
    if !entries[range.clone()].iter().any(|node| is_assistant(node)) {
        return Ok(0.0);
    }

    let http = http_client();
    let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipKey(key), Some(5)).await?;

    Ok(divs.iter().sum())
}

// ── Background memory scoring ───────────────────────────────────

/// Score memories in the conversation that are due for re-scoring.
///
/// Checks the graph for each memory's last_scored timestamp. Scores
/// nodes that haven't been scored within `max_age_secs`, oldest first.
/// Updates the graph weight (EWMA) and last_scored after each.
///
/// Returns the number of nodes scored and their (key, score) pairs.
pub async fn score_memories_incremental<F, Fut>(
    context: &ContextState,
    max_age_secs: i64,
    response_window: usize,
    client: &ApiClient,
    agent: &std::sync::Arc<crate::agent::Agent>,
    mut on_score: F,
) -> anyhow::Result<usize>
where
    F: FnMut(String, f64) -> Fut,
    Fut: std::future::Future<Output = ()>,
{
    let now = chrono::Utc::now().timestamp();

    // Collect unique memory keys with their first position
    let mut seen = std::collections::HashSet::new();
    let mut candidates: Vec<(usize, String, i64)> = Vec::new(); // (pos, key, last_scored)

    let store_arc = crate::hippocampus::access_local()?;

    {
        let store = &*store_arc;
        // Identity nodes always score at position 0; conversation nodes at their index
        let identity_nodes = context.identity().iter().map(|n| (0, n));
        let conv_nodes = context.conversation().iter().enumerate();
        for (pos, node) in identity_nodes.chain(conv_nodes) {
            if let Some(key) = memory_key(node) {
                if !seen.insert(key.to_owned()) { continue; }
                let last_scored = store.get_node(key)
                    .ok()
                    .flatten()
                    .map(|n| n.last_scored)
                    .unwrap_or(0);
                if now - last_scored >= max_age_secs {
                    candidates.push((pos, key.to_owned(), last_scored));
                }
            }
        }
    }

    // Score oldest-first
    candidates.sort_by_key(|&(_, _, last)| last);

    let http = http_client();
    let mut scored = 0;

    let entries = context.conversation();
    let total_tokens: usize = entries.iter().map(|n| n.tokens()).sum();
    let token_cutoff = total_tokens * 60 / 100;

    // Precompute cumulative token position for each entry
    let mut cumulative: Vec<usize> = Vec::with_capacity(entries.len());
    let mut running = 0;
    for e in entries {
        running += e.tokens();
        cumulative.push(running);
    }

    let total = candidates.len();
    dbglog!("[scoring] total_tokens={}, cutoff={}, {} candidates", total_tokens, token_cutoff, total);
    let activity = crate::agent::start_activity(agent, format!("scoring: 0/{}", total)).await;

    for (pos, key, _) in &candidates {
        // Only score memories in the first 60% of the conversation by tokens —
        // recent memories don't have enough responses to evaluate yet.
        let cum = cumulative.get(*pos).copied().unwrap_or(total_tokens);
        if cum > token_cutoff {
            dbglog!("[scoring] skip {} (tokens {}/{} past cutoff)", key, cum, token_cutoff);
            continue;
        }
        let (end, _) = nth_response_end(context.conversation(), *pos, response_window);
        let range = *pos..end;
        if !context.conversation()[range.clone()].iter().any(|node| is_assistant(node)) {
            dbglog!("[scoring] skip {} (no assistant response in range {}..{})", key, pos, end);
            continue;
        }

        activity.update(format!("scoring: {}/{} {}", scored + 1, total, key)).await;
        match score_divergence(&http, client, context, range, Filter::SkipKey(key), Some(5)).await {
            Ok((divs, _)) => {
                let n_responses = divs.len();
                let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
                dbglog!(
                    "[scoring] {} max:{:.3} ({} responses)", key, max_div, n_responses,
                );
                on_score(key.clone(), max_div).await;
                scored += 1;
            }
            Err(e) => {
                dbglog!(
                    "[scoring] {} FAILED: {:#}", key, e,
                );
            }
        }
    }

    Ok(scored)
}

// ── Fine-tuning scoring ─────────────────────────────────────────

/// Score which recent responses are candidates for fine-tuning.
///
/// Removes all memories and scores the most recent `count` messages.
/// Responses with high divergence depend on memories the model hasn't
/// internalized — these are fine-tuning candidates.
///
/// Returns (entry_index, divergence) pairs, sorted by divergence descending.
pub async fn score_finetune(
    context: &ContextState,
    count: usize,
    client: &ApiClient,
) -> anyhow::Result<Vec<(usize, f64)>> {
    let entries = context.conversation();
    let range = entries.len().saturating_sub(count)..entries.len();

    let response_positions: Vec<usize> = range.clone()
        .filter(|&i| is_assistant(&entries[i]))
        .collect();
    if response_positions.is_empty() {
        return Ok(Vec::new());
    }

    let http = http_client();
    let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipAllMemories, Some(5)).await?;

    let mut results: Vec<(usize, f64)> = response_positions.iter()
        .enumerate()
        .map(|(i, &entry_idx)| (entry_idx, divs.get(i).copied().unwrap_or(0.0)))
        .collect();
    results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
    Ok(results)
}

/// Enriched finetune candidate with context for review.
#[derive(Clone, Debug)]
pub struct FinetuneCandidate {
    pub entry_idx: usize,
    pub divergence: f64,
    pub response_text: String,
    /// Token IDs for context (everything before the response).
    pub context_ids: Vec<u32>,
    /// Token IDs for the response (what we're training on).
    pub continuation_ids: Vec<u32>,
    /// What the model would have said without memories (if generated).
    pub alternate_text: Option<String>,
    /// Timestamp in nanos — used as unique key for trained-set dedup.
    pub timestamp_ns: i64,
}

/// Score and enrich finetune candidates with full context.
///
/// Returns candidates ready for review, with context/continuation token IDs
/// already computed for sending to /finetune.
pub async fn score_finetune_candidates(
    context: &ContextState,
    count: usize,
    client: &ApiClient,
    min_divergence: f64,
) -> anyhow::Result<Vec<FinetuneCandidate>> {
    let scores = score_finetune(context, count, client).await?;

    let entries = context.conversation();
    let mut candidates = Vec::new();

    let trained = load_trained();

    for (entry_idx, divergence) in scores {
        if divergence < min_divergence {
            continue;
        }

        let node = &entries[entry_idx];

        // Get timestamp and skip if already trained
        let timestamp_ns = match node_timestamp_ns(node) {
            Some(ts) => {
                if trained.contains(&ts) {
                    continue; // Already trained, skip
                }
                ts
            }
            None => continue, // No timestamp, skip
        };

        // Extract response text
        let response_text = match node {
            AstNode::Branch { children, .. } => {
                children.iter()
                    .filter_map(|c| match c {
                        AstNode::Leaf(leaf) => Some(leaf.body().text().to_string()),
                        _ => None,
                    })
                    .collect::<Vec<_>>()
                    .join("")
            }
            _ => continue,
        };

        // Build token IDs: context = everything before response, continuation = response
        let (context_ids, _) = build_token_ids(context, 0..entry_idx, Filter::None);
        let continuation_ids: Vec<u32> = node.token_ids().into_iter().collect();

        candidates.push(FinetuneCandidate {
            entry_idx,
            divergence,
            response_text,
            context_ids,
            continuation_ids,
            alternate_text: None,
            timestamp_ns,
        });
    }

    // Generate alternates if enabled
    if alternates_enabled() && !candidates.is_empty() {
        for candidate in &mut candidates {
            match generate_alternate(context, candidate.entry_idx, client).await {
                Ok(text) => candidate.alternate_text = Some(text),
                Err(e) => dbglog!("[finetune] alternate generation failed: {:#}", e),
            }
        }
    }

    Ok(candidates)
}

/// Generate what the model would say without memories for a given entry.
async fn generate_alternate(
    context: &ContextState,
    entry_idx: usize,
    client: &ApiClient,
) -> anyhow::Result<String> {
    use crate::agent::api::{SamplingParams, StreamToken};

    // Build context tokens without memories, up to the response
    let (mut prompt, _) = build_token_ids(context, 0..entry_idx, Filter::SkipAllMemories);

    // Add assistant turn start
    prompt.push(tokenizer::IM_START);
    prompt.extend(tokenizer::encode("assistant\n"));

    // Generate completion
    let sampling = SamplingParams {
        temperature: 0.6,
        top_p: 0.95,
        top_k: 20,
    };
    let (mut rx, _guard) = client.stream_completion(&prompt, sampling, Some(-5));

    let mut tokens = Vec::new();
    while let Some(tok) = rx.recv().await {
        match tok {
            StreamToken::Token(id) => tokens.push(id),
            StreamToken::Done { .. } => break,
            StreamToken::Error(e) => anyhow::bail!("generation error: {}", e),
        }
    }

    Ok(tokenizer::decode(&tokens))
}

// ── Finetune config and persistence ─────────────────────────────

use std::path::PathBuf;
use std::collections::HashSet;

const FINETUNE_ALTERNATES_FILE: &str = ".consciousness/cache/finetune-alternates";
const TRAINED_RESPONSES_FILE: &str = ".consciousness/cache/trained-responses.json";

fn alternates_path() -> PathBuf {
    dirs::home_dir().unwrap_or_default().join(FINETUNE_ALTERNATES_FILE)
}

fn trained_path() -> PathBuf {
    dirs::home_dir().unwrap_or_default().join(TRAINED_RESPONSES_FILE)
}

/// Check if alternate response generation is enabled.
pub fn alternates_enabled() -> bool {
    alternates_path().exists()
}

/// Toggle alternate response generation and persist the setting.
pub fn set_alternates(enabled: bool) {
    let path = alternates_path();
    if enabled {
        if let Some(parent) = path.parent() {
            let _ = std::fs::create_dir_all(parent);
        }
        let _ = std::fs::write(&path, "");
    } else {
        let _ = std::fs::remove_file(&path);
    }
}

/// Load set of trained response timestamps (nanos since epoch).
pub fn load_trained() -> HashSet<i64> {
    let path = trained_path();
    match std::fs::read_to_string(&path) {
        Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
        Err(_) => HashSet::new(),
    }
}

/// Mark a response as trained by its timestamp.
pub fn mark_trained(timestamp_ns: i64) {
    let mut trained = load_trained();
    trained.insert(timestamp_ns);
    let path = trained_path();
    if let Some(parent) = path.parent() {
        let _ = std::fs::create_dir_all(parent);
    }
    if let Ok(json) = serde_json::to_string(&trained) {
        let _ = std::fs::write(&path, json);
    }
}

/// Get timestamp in nanoseconds from an AstNode.
/// Returns None for entries with default UNIX_EPOCH timestamp (old data)
/// or timestamps outside the representable nano range (pre-1677 or post-2262).
pub fn node_timestamp_ns(node: &AstNode) -> Option<i64> {
    let ts = match node {
        AstNode::Leaf(leaf) => leaf.timestamp(),
        AstNode::Branch { timestamp, .. } => *timestamp,
    };
    if ts == chrono::DateTime::UNIX_EPOCH {
        None // Old entry without real timestamp
    } else {
        ts.timestamp_nanos_opt()
    }
}

// ── Training API ────────────────────────────────────────────────

/// Training sample for /train endpoint.
#[derive(serde::Serialize)]
struct TrainingSample {
    context_ids: Vec<u32>,
    continuation_ids: Vec<u32>,
}

/// Data needed to send a training sample.
pub struct TrainData {
    pub context_ids: Vec<u32>,
    pub continuation_ids: Vec<u32>,
    pub timestamp_ns: i64,
}

/// Send training samples to the server.
///
/// Returns job_id on success, marks each sample as trained.
pub async fn send_to_train(
    samples: Vec<TrainData>,
    client: &ApiClient,
) -> anyhow::Result<String> {
    if samples.is_empty() {
        anyhow::bail!("no samples to train");
    }

    let api_samples: Vec<TrainingSample> = samples.iter()
        .map(|s| TrainingSample {
            context_ids: s.context_ids.clone(),
            continuation_ids: s.continuation_ids.clone(),
        })
        .collect();

    let body = serde_json::json!({
        "training_data": {
            "samples": api_samples,
        }
    });

    let http = http_client();
    let url = format!("{}/train", client.base_url());
    let response = http.send_json("POST", &url, &[], &body).await?;

    let status = response.status();
    let result: serde_json::Value = response.json().await?;

    if !status.is_success() {
        let msg = result.get("error").and_then(|e| e.as_str()).unwrap_or("unknown error");
        anyhow::bail!("train API HTTP {}: {}", status, msg);
    }

    // Mark all samples as trained
    for s in &samples {
        mark_trained(s.timestamp_ns);
    }

    let job_id = result.get("job_id")
        .and_then(|j| j.as_str())
        .unwrap_or("unknown")
        .to_string();

    dbglog!("[finetune] sent {} samples, job_id={}", samples.len(), job_id);
    Ok(job_id)
}
-												switch memory scoring to /v1/score endpoint

Replace prompt_logprobs-based scoring with the new vLLM /v1/score
endpoint. Much simpler: one API call per memory drop, returns
per-message total_logprob directly. No chunking needed, no OOM risk
— the endpoint only computes logits for scored tokens.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-03 00:31:57 -04:00
+								// training.rs — Memory importance scoring via /v1/score
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
+								//
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								// Three scoring modes, all built on the same call_score() primitive:
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
+								//
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								// score_memories()  — Full N×M matrix (memories × responses) for the
 								//                     debug screen. Expensive: N+1 API calls.
 								//
 								// memory_score()    — Single memory importance. Scores the 50 messages
 								//                     after it was surfaced, with/without that memory.
 								//                     2 API calls.
 								//
 								// finetune_score()  — Identifies training candidates. Scores recent
 								//                     messages with all memories stripped. Responses
 								//                     with high divergence depend on memories the model
 								//                     hasn't internalized. 2 API calls.
-												Move API code from user/ to agent/

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-04 00:29:11 -04:00
-												more reorg

											
										
										
											2026-04-05 01:48:11 -04:00
+								use crate::agent::api::ApiClient;
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								use crate::agent::context::{AstNode, Ast, NodeBody, ContextState, Role};
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
+								use crate::agent::tokenizer;
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
-												scoring: add timeouts, progress feedback, error resilience

- 120s timeout on individual /v1/score HTTP calls
- Activity bar shows "scoring 3/24: memory-key..."
- Info messages at start and completion
- Per-memory timing and importance in debug pane
- Failed individual memories log error but don't abort (zero row)
- Removed duplicate completion message (info from score_memories)

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-03 01:07:47 -04:00
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								// ── Message building ────────────────────────────────────────────
 								/// What to filter when building the message array for scoring.
-												Clean up warnings: StreamToken pub, dead oneshot code, SkipIndex

Made StreamToken pub (was pub(crate), needed by context.rs).
Removed dead API_CLIENT, get_client, sampling/priority fields
from oneshot. Suppressed pre-existing SkipIndex warning in learn.rs.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 16:35:57 -04:00
+								#[allow(dead_code)]
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								enum Filter<'a> {
 								    None,
 								    SkipIndex(usize),
 								    SkipKey(&'a str),
 								    SkipAllMemories,
 								}
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								fn is_memory(node: &AstNode) -> bool {
 								    matches!(node, AstNode::Leaf(leaf) if matches!(leaf.body(), NodeBody::Memory { .. }))
 								}
 								fn memory_key(node: &AstNode) -> Option<&str> {
 								    match node {
 								        AstNode::Leaf(leaf) => match leaf.body() {
 								            NodeBody::Memory { key, .. } => Some(key),
 								            _ => None,
 								        },
 								        _ => None,
 								    }
 								}
 								fn is_assistant(node: &AstNode) -> bool {
 								    matches!(node, AstNode::Branch { role: Role::Assistant, .. })
 								}
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								/// Build a token ID array for a scoring call.
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								///
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								/// Includes all sections up to and including conversation entries in
 								/// `range`, with `filter` applied to conversation entries.
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								///
 								/// Returns (token_ids, assistant_ranges) where assistant_ranges are
 								/// (start, end) token positions for each assistant message.
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								fn build_token_ids(
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    context: &ContextState,
 								    range: std::ops::Range<usize>,
 								    filter: Filter,
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								) -> (Vec<u32>, Vec<(usize, usize)>) {
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								    use crate::agent::context::Ast;
 								    let mut ids = Vec::new();
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								    let mut assistant_ranges = Vec::new();
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								    for node in context.system() {
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								        ids.extend(node.token_ids());
 								    }
-												Include identity nodes in memory scoring

Identity memory nodes now participate in importance scoring alongside
conversation memories. Score loading/saving handles both sections, and
the conscious screen uses node.label() consistently for memory display.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-15 05:59:58 -04:00
+								    // Identity nodes can be filtered by key for scoring
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								    for node in context.identity() {
-												Include identity nodes in memory scoring

Identity memory nodes now participate in importance scoring alongside
conversation memories. Score loading/saving handles both sections, and
the conscious screen uses node.label() consistently for memory display.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-15 05:59:58 -04:00
+								        let skip = match &filter {
 								            Filter::SkipKey(key) => memory_key(node) == Some(*key),
 								            Filter::SkipAllMemories => is_memory(node),
 								            _ => false,
 								        };
 								        if !skip {
 								            ids.extend(node.token_ids());
 								        }
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								    }
 								    for node in context.journal() {
 								        ids.extend(node.token_ids());
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    }
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								    let entries = context.conversation();
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    for i in range {
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								        let node = &entries[i];
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								        let skip = match &filter {
 								            Filter::None => false,
 								            Filter::SkipIndex(idx) => i == *idx,
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								            Filter::SkipKey(key) => memory_key(node) == Some(*key),
 								            Filter::SkipAllMemories => is_memory(node),
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								        };
 								        if skip { continue; }
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
 								        // Track assistant message boundaries
 								        let is_asst = is_assistant(node);
 								        let start = ids.len();
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								        ids.extend(node.token_ids());
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								        if is_asst {
 								            assistant_ranges.push((start, ids.len()));
 								        }
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    }
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								    (ids, assistant_ranges)
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								}
 								// ── Score API ───────────────────────────────────────────────────
 								#[derive(serde::Deserialize)]
 								struct ScoreResult {
 								    total_logprob: f64,
 								}
 								#[derive(serde::Deserialize)]
 								struct ScoreResponse {
 								    scores: Vec<ScoreResult>,
 								}
-												Kill reqwest — minimal HTTP client on raw hyper + tokio-rustls

New src/agent/api/http.rs: ~240 lines, supports GET/POST, JSON/form
bodies, SSE streaming via chunk(), TLS via rustls. No tracing dep.

Removes reqwest from the main crate and telegram channel crate.
Cargo.lock drops ~900 lines of transitive dependencies.

tracing now only pulled in by tui-markdown.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 12:50:40 -04:00
+								fn http_client() -> crate::agent::api::http::HttpClient {
 								    crate::agent::api::http::HttpClient::builder()
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								        .timeout(SCORE_TIMEOUT)
 								        .build()
 								}
 								async fn call_score(
-												Kill reqwest — minimal HTTP client on raw hyper + tokio-rustls

New src/agent/api/http.rs: ~240 lines, supports GET/POST, JSON/form
bodies, SSE streaming via chunk(), TLS via rustls. No tracing dep.

Removes reqwest from the main crate and telegram channel crate.
Cargo.lock drops ~900 lines of transitive dependencies.

tracing now only pulled in by tui-markdown.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 12:50:40 -04:00
+								    http: &crate::agent::api::http::HttpClient,
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    client: &ApiClient,
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								    prompt: &[u32],
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								    ranges: &[(usize, usize)],
-												Add vLLM priority to memory scoring requests

Scoring calls the /score endpoint directly via HTTP, bypassing the
stream_completion path. These requests had no priority field, so they
could preempt interactive work. Set priority=5 (between subconscious
agents at 2 and unconscious at 10).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 20:42:38 -04:00
+								    priority: Option<i32>,
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								) -> anyhow::Result<Vec<ScoreResult>> {
-												Kill reqwest — minimal HTTP client on raw hyper + tokio-rustls

New src/agent/api/http.rs: ~240 lines, supports GET/POST, JSON/form
bodies, SSE streaming via chunk(), TLS via rustls. No tracing dep.

Removes reqwest from the main crate and telegram channel crate.
Cargo.lock drops ~900 lines of transitive dependencies.

tracing now only pulled in by tui-markdown.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 12:50:40 -04:00
+								    let url = format!("{}/score", client.base_url());
 								    let auth = format!("Bearer {}", client.api_key());
-												Add vLLM priority to memory scoring requests

Scoring calls the /score endpoint directly via HTTP, bypassing the
stream_completion path. These requests had no priority field, so they
could preempt interactive work. Set priority=5 (between subconscious
agents at 2 and unconscious at 10).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 20:42:38 -04:00
+								    let mut body = serde_json::json!({
-												Kill reqwest — minimal HTTP client on raw hyper + tokio-rustls

New src/agent/api/http.rs: ~240 lines, supports GET/POST, JSON/form
bodies, SSE streaming via chunk(), TLS via rustls. No tracing dep.

Removes reqwest from the main crate and telegram channel crate.
Cargo.lock drops ~900 lines of transitive dependencies.

tracing now only pulled in by tui-markdown.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 12:50:40 -04:00
+								        "model": client.model,
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								        "prompt": prompt,
-												Kill reqwest — minimal HTTP client on raw hyper + tokio-rustls

New src/agent/api/http.rs: ~240 lines, supports GET/POST, JSON/form
bodies, SSE streaming via chunk(), TLS via rustls. No tracing dep.

Removes reqwest from the main crate and telegram channel crate.
Cargo.lock drops ~900 lines of transitive dependencies.

tracing now only pulled in by tui-markdown.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 12:50:40 -04:00
+								        "logprobs": 1,
 								    });
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								    if !ranges.is_empty() {
 								        body["score_ranges"] = serde_json::json!(ranges);
 								    }
-												Add vLLM priority to memory scoring requests

Scoring calls the /score endpoint directly via HTTP, bypassing the
stream_completion path. These requests had no priority field, so they
could preempt interactive work. Set priority=5 (between subconscious
agents at 2 and unconscious at 10).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 20:42:38 -04:00
+								    if let Some(p) = priority {
 								        body["priority"] = serde_json::json!(p);
 								    }
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    let response = http
-												Kill reqwest — minimal HTTP client on raw hyper + tokio-rustls

New src/agent/api/http.rs: ~240 lines, supports GET/POST, JSON/form
bodies, SSE streaming via chunk(), TLS via rustls. No tracing dep.

Removes reqwest from the main crate and telegram channel crate.
Cargo.lock drops ~900 lines of transitive dependencies.

tracing now only pulled in by tui-markdown.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 12:50:40 -04:00
+								        .send_json("POST", &url, &[
 								            ("authorization", &auth),
 								        ], &body)
 								        .await?;
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
 								    let status = response.status();
 								    let body: serde_json::Value = response.json().await?;
 								    if !status.is_success() {
 								        let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("unknown error");
 								        anyhow::bail!("score API HTTP {}: {}", status, msg);
 								    }
 								    if let Some(err) = body.get("error").and_then(|e| e.as_str()) {
 								        anyhow::bail!("score API error: {}", err);
 								    }
 								    let result: ScoreResponse = serde_json::from_value(body)
 								        .map_err(|e| anyhow::anyhow!("failed to parse score response: {}", e))?;
 								    Ok(result.scores)
 								}
 								/// Compute per-position logprob divergence: how much worse the model
 								/// scores each response without something vs with it.
 								fn divergence(baseline: &[ScoreResult], without: &[ScoreResult]) -> Vec<f64> {
 								    baseline.iter().enumerate()
 								        .map(|(i, base)| {
 								            let without_lp = without.get(i).map(|s| s.total_logprob).unwrap_or(base.total_logprob);
 								            (base.total_logprob - without_lp).max(0.0)
 								        })
 								        .collect()
 								}
 								/// Score two message sets and return total divergence.
 								async fn score_divergence(
-												Kill reqwest — minimal HTTP client on raw hyper + tokio-rustls

New src/agent/api/http.rs: ~240 lines, supports GET/POST, JSON/form
bodies, SSE streaming via chunk(), TLS via rustls. No tracing dep.

Removes reqwest from the main crate and telegram channel crate.
Cargo.lock drops ~900 lines of transitive dependencies.

tracing now only pulled in by tui-markdown.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 12:50:40 -04:00
+								    http: &crate::agent::api::http::HttpClient,
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    client: &ApiClient,
 								    context: &ContextState,
 								    range: std::ops::Range<usize>,
 								    filter: Filter<'_>,
-												Add vLLM priority to memory scoring requests

Scoring calls the /score endpoint directly via HTTP, bypassing the
stream_completion path. These requests had no priority field, so they
could preempt interactive work. Set priority=5 (between subconscious
agents at 2 and unconscious at 10).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 20:42:38 -04:00
+								    priority: Option<i32>,
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								) -> anyhow::Result<(Vec<f64>, Vec<ScoreResult>)> {
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								    let (baseline_tokens, baseline_ranges) = build_token_ids(context, range.clone(), Filter::None);
 								    let (without_tokens, without_ranges) = build_token_ids(context, range, filter);
 								    let baseline = call_score(http, client, &baseline_tokens, &baseline_ranges, priority).await?;
 								    let without = call_score(http, client, &without_tokens, &without_ranges, priority).await?;
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    let divs = divergence(&baseline, &without);
 								    Ok((divs, baseline))
 								}
 								// ── Full matrix scoring (debug screen) ──────────────────────────
 								/// Score how important each memory is to the conversation (full matrix).
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
+								pub async fn score_memories(
 								    client: &ApiClient,
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								    agent: &std::sync::Arc<crate::agent::Agent>,
 								) -> anyhow::Result<()> {
 								    // Collect memory keys and response indices under a brief lock
 								    let (memory_keys, response_indices) = {
 								        let ctx = agent.context.lock().await;
-												Include identity nodes in memory scoring

Identity memory nodes now participate in importance scoring alongside
conversation memories. Score loading/saving handles both sections, and
the conscious screen uses node.label() consistently for memory display.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-15 05:59:58 -04:00
+								        // Include identity nodes and conversation memories
 								        let mut keys: Vec<String> = ctx.identity().iter()
 								            .chain(ctx.conversation().iter())
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								            .filter_map(|node| memory_key(node).map(String::from))
 								            .collect();
 								        keys.dedup();
 								        let resp: Vec<usize> = ctx.conversation().iter().enumerate()
 								            .filter(|(_, node)| is_assistant(node))
 								            .map(|(i, _)| i)
 								            .collect();
 								        (keys, resp)
 								    };
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    if memory_keys.is_empty() || response_indices.is_empty() {
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								        return Ok(());
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
+								    }
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								    let total = memory_keys.len();
 								    dbglog!("[scoring-full] starting: {} memories × {} responses",
 								        total, response_indices.len());
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    let http = http_client();
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								    let activity = crate::agent::start_activity(agent, "scoring: baseline").await;
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								    let (baseline_tokens, baseline_ranges) = {
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								        let ctx = agent.context.lock().await;
 								        build_token_ids(&ctx, 0..ctx.conversation().len(), Filter::None)
 								    };
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								    let baseline = call_score(&http, client, &baseline_tokens, &baseline_ranges, Some(5)).await?;
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								    dbglog!("[scoring-full] baseline done ({} response scores)", baseline.len());
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    for (mem_idx, key) in memory_keys.iter().enumerate() {
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								        activity.update(format!("scoring: {}/{}", mem_idx + 1, total)).await;
 								        dbglog!("[scoring-full] {}/{}: {}", mem_idx + 1, total, key);
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								        let (tokens, ranges) = {
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								            let ctx = agent.context.lock().await;
 								            build_token_ids(&ctx, 0..ctx.conversation().len(), Filter::SkipKey(key))
 								        };
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								        let row = match call_score(&http, client, &tokens, &ranges, Some(5)).await {
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								            Ok(without) => {
 								                let divs = divergence(&baseline, &without);
 								                let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
 								                dbglog!("[scoring-full] {}/{}: {} max_div={:.3}",
 								                    mem_idx + 1, total, key, max_div);
 								                divs
 								            }
-												scoring: add timeouts, progress feedback, error resilience

- 120s timeout on individual /v1/score HTTP calls
- Activity bar shows "scoring 3/24: memory-key..."
- Info messages at start and completion
- Per-memory timing and importance in debug pane
- Failed individual memories log error but don't abort (zero row)
- Removed duplicate completion message (info from score_memories)

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-03 01:07:47 -04:00
+								            Err(e) => {
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								                dbglog!("[scoring-full] {}/{}: {} FAILED: {:#}",
 								                    mem_idx + 1, total, key, e);
 								                vec![0.0; baseline.len()]
 								            }
 								        };
 								        // Write this memory's scores to the live AST nodes
 								        {
 								            let mut ctx = agent.context.lock().await;
 								            let mut set_count = 0;
 								            for (resp_idx, &idx) in response_indices.iter().enumerate() {
 								                if idx >= ctx.conversation().len() { continue; }
 								                let node = &mut ctx.conversation_mut()[idx];
 								                if let AstNode::Branch {
 								                    role: Role::Assistant, memory_scores, ..
 								                } = node {
 								                    if let Some(&score) = row.get(resp_idx) {
 								                        if score > 0.01 {
 								                            memory_scores.insert(key.clone(), score);
 								                            set_count += 1;
 								                        } else {
 								                            memory_scores.remove(key.as_str());
 								                        }
 								                    }
 								                }
-												scoring: add timeouts, progress feedback, error resilience

- 120s timeout on individual /v1/score HTTP calls
- Activity bar shows "scoring 3/24: memory-key..."
- Info messages at start and completion
- Per-memory timing and importance in debug pane
- Failed individual memories log error but don't abort (zero row)
- Removed duplicate completion message (info from score_memories)

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-03 01:07:47 -04:00
+								            }
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								            dbglog!("[scoring-full] {}/{} AST: set={}", mem_idx + 1, total, set_count);
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
+								        }
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								        agent.state.lock().await.changed.notify_one();
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
+								    }
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								    Ok(())
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
+								}
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								/// Find the entry index after `start` that contains the Nth assistant response.
 								/// Returns (end_index, true) if N responses were found, (entries.len(), false) if not.
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								fn nth_response_end(entries: &[AstNode], start: usize, n: usize) -> (usize, bool) {
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								    let mut count = 0;
 								    for i in start..entries.len() {
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								        if is_assistant(&entries[i]) {
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								            count += 1;
 								            if count >= n { return (i + 1, true); }
 								        }
 								    }
 								    (entries.len(), false)
 								}
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								// ── Single memory scoring ───────────────────────────────────────
-												chunk scoring calls to avoid OOM on large contexts

Split conversation into ~50K token chunks (configurable via
scoring_chunk_tokens in config) for prompt_logprobs calls.
Each chunk ends at an assistant message boundary. Avoids the
~40GB logprobs tensor allocation that OOM'd on full contexts.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:35:29 -04:00
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								/// Score how important a single memory is to the conversation.
 								///
 								/// Scores the 50 messages after the memory was surfaced — the window
 								/// where it could have influenced responses. Returns the sum of
 								/// divergence, or 0.0 if the memory isn't in the conversation.
 								pub async fn score_memory(
 								    context: &ContextState,
 								    key: &str,
 								    client: &ApiClient,
 								) -> anyhow::Result<f64> {
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								    const RESPONSE_WINDOW: usize = 50;
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								    let entries = context.conversation();
 								    let first_pos = match entries.iter().position(|node| memory_key(node) == Some(key)) {
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								        Some(p) => p,
 								        None => return Ok(0.0),
 								    };
-												WIP: ContextEntry/ContextSection data structures for incremental token counting

New types — not yet wired to callers:

- ContextEntry: wraps ConversationEntry with cached token count and
  timestamp
- ContextSection: named group of entries with cached token total.
  Private entries/tokens, read via entries()/tokens().
  Mutation via push(entry), set(index, entry), del(index).
- ContextState: system/identity/journal/conversation sections + working_stack
- ConversationEntry::System variant for system prompt entries

Token counting happens once at push time. Sections maintain their
totals incrementally via push/set/del. No more recomputing from
scratch on every budget check.

Does not compile — callers need updating.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 20:15:31 -04:00
+								    let (end, _) = nth_response_end(entries, first_pos, RESPONSE_WINDOW);
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								    let range = first_pos..end;
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								    if !entries[range.clone()].iter().any(|node| is_assistant(node)) {
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								        return Ok(0.0);
-												chunk scoring calls to avoid OOM on large contexts

Split conversation into ~50K token chunks (configurable via
scoring_chunk_tokens in config) for prompt_logprobs calls.
Each chunk ends at an assistant message boundary. Avoids the
~40GB logprobs tensor allocation that OOM'd on full contexts.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:35:29 -04:00
+								    }
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    let http = http_client();
-												Add vLLM priority to memory scoring requests

Scoring calls the /score endpoint directly via HTTP, bypassing the
stream_completion path. These requests had no priority field, so they
could preempt interactive work. Set priority=5 (between subconscious
agents at 2 and unconscious at 10).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 20:42:38 -04:00
+								    let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipKey(key), Some(5)).await?;
-												chunk scoring calls to avoid OOM on large contexts

Split conversation into ~50K token chunks (configurable via
scoring_chunk_tokens in config) for prompt_logprobs calls.
Each chunk ends at an assistant message boundary. Avoids the
~40GB logprobs tensor allocation that OOM'd on full contexts.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:35:29 -04:00
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    Ok(divs.iter().sum())
 								}
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								// ── Background memory scoring ───────────────────────────────────
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								/// Score memories in the conversation that are due for re-scoring.
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								///
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								/// Checks the graph for each memory's last_scored timestamp. Scores
 								/// nodes that haven't been scored within `max_age_secs`, oldest first.
 								/// Updates the graph weight (EWMA) and last_scored after each.
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								///
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								/// Returns the number of nodes scored and their (key, score) pairs.
-												Incremental memory scoring with per-score persistence

score_memories_incremental now takes an async callback that fires
after each memory is scored. The callback:
- Writes the score to the conversation entry via set_score()
- Persists to memory-scores.json immediately
- Notifies the UI so the context screen updates live

Scoring no longer batches — each score is visible and persisted
as it completes. Does not touch the memory store.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 21:10:09 -04:00
+								pub async fn score_memories_incremental<F, Fut>(
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								    context: &ContextState,
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								    max_age_secs: i64,
 								    response_window: usize,
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								    client: &ApiClient,
-												WIP: Agent/AgentState — 36 errors remaining, all .lock() → .state.lock() or .context.lock()

Bulk replaced Arc<Mutex<Agent>> with Arc<Agent> across all files.
Fixed control.rs, memory.rs tool handlers. Fixed oneshot Backend.
Remaining errors are all agent.lock() → agent.state.lock() or
agent.context.lock() in mind/, user/, and a few in mod.rs.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:40:36 -04:00
+								    agent: &std::sync::Arc<crate::agent::Agent>,
-												Incremental memory scoring with per-score persistence

score_memories_incremental now takes an async callback that fires
after each memory is scored. The callback:
- Writes the score to the conversation entry via set_score()
- Persists to memory-scores.json immediately
- Notifies the UI so the context screen updates live

Scoring no longer batches — each score is visible and persisted
as it completes. Does not touch the memory store.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 21:10:09 -04:00
+								    mut on_score: F,
 								) -> anyhow::Result<usize>
 								where
 								    F: FnMut(String, f64) -> Fut,
 								    Fut: std::future::Future<Output = ()>,
 								{
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								    let now = chrono::Utc::now().timestamp();
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								    // Collect unique memory keys with their first position
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								    let mut seen = std::collections::HashSet::new();
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								    let mut candidates: Vec<(usize, String, i64)> = Vec::new(); // (pos, key, last_scored)
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
-												Convert store and CLI to anyhow::Result for cleaner error handling

Replace Result<_, String> with anyhow::Result throughout:
- hippocampus/store module (persist, ops, types, view, mod)
- CLI modules (admin, agent, graph, journal, node)
- Run trait in main.rs

Use .context() and .with_context() instead of .map_err(|e| format!(...))
patterns. Add bail!() for early error returns.

Add access_local() helper in hippocampus/mod.rs that returns
Result<Arc<Mutex<Store>>> for direct local store access.

Fix store access patterns to properly lock Arc<Mutex<Store>> before
accessing fields in mind/unconscious.rs, mind/mod.rs, subconscious/learn.rs,
and hippocampus/memory.rs.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-13 18:05:04 -04:00
+								    let store_arc = crate::hippocampus::access_local()?;
 								    {
-												store: internal locking, remove Arc<Mutex<Store>> wrapper

Store now has internal Mutex for capnp appends and AtomicU64 for
size tracking. All methods take &self. The external Arc<Mutex<Store>>
is replaced with Arc<Store>.

- Store::append_lock protects file appends
- local.rs functions take &Store (not &mut Store)
- access_local() returns Arc<Store>
- All .lock().await calls removed from callers

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-13 21:49:54 -04:00
+								        let store = &*store_arc;
-												Include identity nodes in memory scoring

Identity memory nodes now participate in importance scoring alongside
conversation memories. Score loading/saving handles both sections, and
the conscious screen uses node.label() consistently for memory display.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-15 05:59:58 -04:00
+								        // Identity nodes always score at position 0; conversation nodes at their index
 								        let identity_nodes = context.identity().iter().map(|n| (0, n));
 								        let conv_nodes = context.conversation().iter().enumerate();
 								        for (pos, node) in identity_nodes.chain(conv_nodes) {
-												Convert store and CLI to anyhow::Result for cleaner error handling

Replace Result<_, String> with anyhow::Result throughout:
- hippocampus/store module (persist, ops, types, view, mod)
- CLI modules (admin, agent, graph, journal, node)
- Run trait in main.rs

Use .context() and .with_context() instead of .map_err(|e| format!(...))
patterns. Add bail!() for early error returns.

Add access_local() helper in hippocampus/mod.rs that returns
Result<Arc<Mutex<Store>>> for direct local store access.

Fix store access patterns to properly lock Arc<Mutex<Store>> before
accessing fields in mind/unconscious.rs, mind/mod.rs, subconscious/learn.rs,
and hippocampus/memory.rs.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-13 18:05:04 -04:00
+								            if let Some(key) = memory_key(node) {
 								                if !seen.insert(key.to_owned()) { continue; }
-												migrate more files to use index-based node access

- learn.rs, daemon.rs, graph.rs, digest.rs, prompts.rs
- Convert store.nodes.get() → store.get_node()
- Convert store.nodes.contains_key() → store.contains_key()
- Convert store.nodes.values/iter() → all_keys + get_node

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-13 19:37:11 -04:00
+								                let last_scored = store.get_node(key)
 								                    .ok()
 								                    .flatten()
-												Convert store and CLI to anyhow::Result for cleaner error handling

Replace Result<_, String> with anyhow::Result throughout:
- hippocampus/store module (persist, ops, types, view, mod)
- CLI modules (admin, agent, graph, journal, node)
- Run trait in main.rs

Use .context() and .with_context() instead of .map_err(|e| format!(...))
patterns. Add bail!() for early error returns.

Add access_local() helper in hippocampus/mod.rs that returns
Result<Arc<Mutex<Store>>> for direct local store access.

Fix store access patterns to properly lock Arc<Mutex<Store>> before
accessing fields in mind/unconscious.rs, mind/mod.rs, subconscious/learn.rs,
and hippocampus/memory.rs.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-13 18:05:04 -04:00
+								                    .map(|n| n.last_scored)
 								                    .unwrap_or(0);
 								                if now - last_scored >= max_age_secs {
-												Include identity nodes in memory scoring

Identity memory nodes now participate in importance scoring alongside
conversation memories. Score loading/saving handles both sections, and
the conscious screen uses node.label() consistently for memory display.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-15 05:59:58 -04:00
+								                    candidates.push((pos, key.to_owned(), last_scored));
-												Convert store and CLI to anyhow::Result for cleaner error handling

Replace Result<_, String> with anyhow::Result throughout:
- hippocampus/store module (persist, ops, types, view, mod)
- CLI modules (admin, agent, graph, journal, node)
- Run trait in main.rs

Use .context() and .with_context() instead of .map_err(|e| format!(...))
patterns. Add bail!() for early error returns.

Add access_local() helper in hippocampus/mod.rs that returns
Result<Arc<Mutex<Store>>> for direct local store access.

Fix store access patterns to properly lock Arc<Mutex<Store>> before
accessing fields in mind/unconscious.rs, mind/mod.rs, subconscious/learn.rs,
and hippocampus/memory.rs.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-13 18:05:04 -04:00
+								                }
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								            }
 								        }
 								    }
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								    // Score oldest-first
 								    candidates.sort_by_key(|&(_, _, last)| last);
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								    let http = http_client();
-												Incremental memory scoring with per-score persistence

score_memories_incremental now takes an async callback that fires
after each memory is scored. The callback:
- Writes the score to the conversation entry via set_score()
- Persists to memory-scores.json immediately
- Notifies the UI so the context screen updates live

Scoring no longer batches — each score is visible and persisted
as it completes. Does not touch the memory store.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 21:10:09 -04:00
+								    let mut scored = 0;
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								    let entries = context.conversation();
 								    let total_tokens: usize = entries.iter().map(|n| n.tokens()).sum();
-												Score memories in first 60% of conversation by tokens

Use cumulative token position instead of entry index for the scoring
cutoff. This reflects actual context usage — a few large entries
near the end won't skew the boundary.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-07 21:36:39 -04:00
+								    let token_cutoff = total_tokens * 60 / 100;
 								    // Precompute cumulative token position for each entry
 								    let mut cumulative: Vec<usize> = Vec::with_capacity(entries.len());
 								    let mut running = 0;
 								    for e in entries {
-												Replace token counting with token generation via HuggingFace tokenizer

Add agent/tokenizer.rs with global Qwen 3.5 tokenizer that generates
actual token IDs including chat template wrapping. ContextEntry now
stores token_ids: Vec<u32> instead of tokens: usize — the count is
derived from the length.

ContextEntry::new() tokenizes automatically via the global tokenizer.
ContextSection::push_entry() takes a raw ConversationEntry and
tokenizes it. set_message() re-tokenizes without needing an external
tokenizer parameter.

Token IDs include the full chat template: <|im_start|>role\ncontent
<|im_end|>\n — so concatenating token_ids across entries produces a
ready-to-send prompt for vLLM's /v1/completions endpoint.

The old tiktoken CoreBPE is now unused on Agent (will be removed in
a followup). Token counts are now exact for Qwen 3.5 instead of the
~85-90% approximation from cl100k_base.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 11:20:03 -04:00
+								        running += e.tokens();
-												Score memories in first 60% of conversation by tokens

Use cumulative token position instead of entry index for the scoring
cutoff. This reflects actual context usage — a few large entries
near the end won't skew the boundary.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-07 21:36:39 -04:00
+								        cumulative.push(running);
 								    }
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								    let total = candidates.len();
 								    dbglog!("[scoring] total_tokens={}, cutoff={}, {} candidates", total_tokens, token_cutoff, total);
 								    let activity = crate::agent::start_activity(agent, format!("scoring: 0/{}", total)).await;
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								    for (pos, key, _) in &candidates {
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								        // Only score memories in the first 60% of the conversation by tokens —
-												Score memories in first 60% of conversation by tokens

Use cumulative token position instead of entry index for the scoring
cutoff. This reflects actual context usage — a few large entries
near the end won't skew the boundary.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-07 21:36:39 -04:00
+								        // recent memories don't have enough responses to evaluate yet.
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								        let cum = cumulative.get(*pos).copied().unwrap_or(total_tokens);
 								        if cum > token_cutoff {
 								            dbglog!("[scoring] skip {} (tokens {}/{} past cutoff)", key, cum, token_cutoff);
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								            continue;
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								        }
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								        let (end, _) = nth_response_end(context.conversation(), *pos, response_window);
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								        let range = *pos..end;
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								        if !context.conversation()[range.clone()].iter().any(|node| is_assistant(node)) {
-												Switch memory scoring from chat messages to raw token IDs

The /score endpoint was receiving chat-format messages which had to go
through the chat template tokenizer — this was failing with "System
message must be first" errors because the AST structure doesn't map
cleanly to chat message format.

Send raw token IDs via the new `prompt` field instead, matching what
the /completions endpoint already does. The vLLM score endpoint finds
assistant boundaries by scanning for <|im_start|>assistant token
patterns, so no message-level metadata is needed.

Also includes identity and journal sections in the scored context,
matching what the model actually sees during inference.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 21:07:00 -04:00
+								            dbglog!("[scoring] skip {} (no assistant response in range {}..{})", key, pos, end);
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								            continue;
 								        }
-												Restore full N×M memory scoring matrix (/score command)

The full matrix scorer was deleted during the AST conversion. Restore
it: /score runs score_memories() which computes divergence for every
memory × response pair, stores the MemoryScore on MindState, and
displays per-memory weights with bar charts on the F2 screen.

Both scoring paths now use ActivityGuard::update() for live progress
in the status bar instead of creating a new activity per iteration.

Also bumps score API timeout from 120s to 300s and adds progress
logging throughout.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

											
										
										
											2026-04-09 22:19:02 -04:00
+								        activity.update(format!("scoring: {}/{} {}", scored + 1, total, key)).await;
-												Add vLLM priority to memory scoring requests

Scoring calls the /score endpoint directly via HTTP, bypassing the
stream_completion path. These requests had no priority field, so they
could preempt interactive work. Set priority=5 (between subconscious
agents at 2 and unconscious at 10).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 20:42:38 -04:00
+								        match score_divergence(&http, client, context, range, Filter::SkipKey(key), Some(5)).await {
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								            Ok((divs, _)) => {
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								                let n_responses = divs.len();
 								                let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
-												Kill Reasoning, Debug, Activity variants — read status from Agent directly

Reasoning tokens: dropped for now, will land in context entries later.
Debug sends: converted to dbglog! macro (writes to debug.log).
Activity: now a field on Agent, set directly, read by UI via try_lock.
score_memories_incremental takes agent Arc for activity writes.

UiMessage down to 2 variants: TextDelta, Info.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-05 21:45:55 -04:00
+								                dbglog!(
-												training: per-node scoring with graph weight updates

Memory scoring now uses the graph as source of truth:
- last_scored timestamp on each node (new capnp field @22)
- Nodes scored when older than scoring_interval_secs (default 1hr)
- Oldest-scored-first ordering
- Window: scoring_response_window assistant responses (default 100)
- First-quarter memories scored even without full window
- Per-response normalization (raw divergence / response count)
- Asymmetric weight update: alpha=0.5 up, alpha=0.1 down
  (responds fast to importance, decays slowly — memories stay
  surfaced even if only useful 1/4 of the time)

Graph writes disabled pending normalization calibration.

Also: configurable scoring_interval_secs and scoring_response_window.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 05:01:49 -04:00
+								                    "[scoring] {} max:{:.3} ({} responses)", key, max_div, n_responses,
-												Kill Reasoning, Debug, Activity variants — read status from Agent directly

Reasoning tokens: dropped for now, will land in context entries later.
Debug sends: converted to dbglog! macro (writes to debug.log).
Activity: now a field on Agent, set directly, read by UI via try_lock.
score_memories_incremental takes agent Arc for activity writes.

UiMessage down to 2 variants: TextDelta, Info.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-05 21:45:55 -04:00
+								                );
-												Incremental memory scoring with per-score persistence

score_memories_incremental now takes an async callback that fires
after each memory is scored. The callback:
- Writes the score to the conversation entry via set_score()
- Persists to memory-scores.json immediately
- Notifies the UI so the context screen updates live

Scoring no longer batches — each score is visible and persisted
as it completes. Does not touch the memory store.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 21:10:09 -04:00
+								                on_score(key.clone(), max_div).await;
 								                scored += 1;
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								            }
 								            Err(e) => {
-												Kill Reasoning, Debug, Activity variants — read status from Agent directly

Reasoning tokens: dropped for now, will land in context entries later.
Debug sends: converted to dbglog! macro (writes to debug.log).
Activity: now a field on Agent, set directly, read by UI via try_lock.
score_memories_incremental takes agent Arc for activity writes.

UiMessage down to 2 variants: TextDelta, Info.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-05 21:45:55 -04:00
+								                dbglog!(
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								                    "[scoring] {} FAILED: {:#}", key, e,
-												Kill Reasoning, Debug, Activity variants — read status from Agent directly

Reasoning tokens: dropped for now, will land in context entries later.
Debug sends: converted to dbglog! macro (writes to debug.log).
Activity: now a field on Agent, set directly, read by UI via try_lock.
score_memories_incremental takes agent Arc for activity writes.

UiMessage down to 2 variants: TextDelta, Info.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-05 21:45:55 -04:00
+								                );
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								            }
 								        }
 								    }
-												Incremental memory scoring with per-score persistence

score_memories_incremental now takes an async callback that fires
after each memory is scored. The callback:
- Writes the score to the conversation entry via set_score()
- Persists to memory-scores.json immediately
- Notifies the UI so the context screen updates live

Scoring no longer batches — each score is visible and persisted
as it completes. Does not touch the memory store.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-07 21:10:09 -04:00
+								    Ok(scored)
-												split out src/mind

											
										
										
											2026-04-04 02:46:32 -04:00
+								}
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								// ── Fine-tuning scoring ─────────────────────────────────────────
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								/// Score which recent responses are candidates for fine-tuning.
 								///
 								/// Removes all memories and scores the most recent `count` messages.
 								/// Responses with high divergence depend on memories the model hasn't
 								/// internalized — these are fine-tuning candidates.
 								///
 								/// Returns (entry_index, divergence) pairs, sorted by divergence descending.
 								pub async fn score_finetune(
 								    context: &ContextState,
 								    count: usize,
 								    client: &ApiClient,
 								) -> anyhow::Result<Vec<(usize, f64)>> {
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								    let entries = context.conversation();
 								    let range = entries.len().saturating_sub(count)..entries.len();
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    let response_positions: Vec<usize> = range.clone()
-												IT BUILDS: Full AST migration compiles — zero errors

All callers migrated from old context types to AstNode/ContextState.
Killed: Message, Role (api), ConversationEntry, ContextEntry,
ContextSection, working_stack, api/parsing.rs, api/types.rs,
api/openai.rs, context_old.rs.

Oneshot standalone path stubbed (needs completions API rewrite).
12 warnings remaining (dead code cleanup).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-08 15:29:52 -04:00
+								        .filter(|&i| is_assistant(&entries[i]))
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								        .collect();
 								    if response_positions.is_empty() {
 								        return Ok(Vec::new());
-												scoring: add timeouts, progress feedback, error resilience

- 120s timeout on individual /v1/score HTTP calls
- Activity bar shows "scoring 3/24: memory-key..."
- Info messages at start and completion
- Per-memory timing and importance in debug pane
- Failed individual memories log error but don't abort (zero row)
- Removed duplicate completion message (info from score_memories)

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-03 01:07:47 -04:00
+								    }
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    let http = http_client();
-												Add vLLM priority to memory scoring requests

Scoring calls the /score endpoint directly via HTTP, bypassing the
stream_completion path. These requests had no priority field, so they
could preempt interactive work. Set priority=5 (between subconscious
agents at 2 and unconscious at 10).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-09 20:42:38 -04:00
+								    let (divs, _) = score_divergence(&http, client, context, range, Filter::SkipAllMemories, Some(5)).await?;
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
-												training: add memory_score() and finetune_score()

Separate the scoring into two distinct functions:

- memory_score(key): scores one memory's importance by measuring
  divergence in the 50 messages after it was surfaced. Two API calls
  (baseline vs without that memory).

- finetune_score(count): scores recent messages with all memories
  stripped to identify fine-tuning candidates. Responses with high
  divergence depend on memories the model hasn't internalized yet.

The existing score_memories() with the full NxM matrix is preserved
for the debug screen.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-04 01:33:31 -04:00
+								    let mut results: Vec<(usize, f64)> = response_positions.iter()
 								        .enumerate()
 								        .map(|(i, &entry_idx)| (entry_idx, divs.get(i).copied().unwrap_or(0.0)))
 								        .collect();
 								    results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
 								    Ok(results)
-												add memory importance scoring via prompt logprobs

score_memories() drops each memory from the context one at a time,
runs prompt_logprobs against the full conversation, and builds a
divergence matrix: memories × responses.

Row sums = memory importance (for graph weight updates)
Column sums = response memory-dependence (training candidates)

Uses vLLM's prompt_logprobs to check "would the model have said
this without this memory?" — one forward pass per memory, all
responses scored at once. ~3s per memory on B200.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-02 22:13:55 -04:00
+								}
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
 								/// Enriched finetune candidate with context for review.
 								#[derive(Clone, Debug)]
 								pub struct FinetuneCandidate {
 								    pub entry_idx: usize,
 								    pub divergence: f64,
 								    pub response_text: String,
 								    /// Token IDs for context (everything before the response).
 								    pub context_ids: Vec<u32>,
 								    /// Token IDs for the response (what we're training on).
 								    pub continuation_ids: Vec<u32>,
 								    /// What the model would have said without memories (if generated).
 								    pub alternate_text: Option<String>,
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								    /// Timestamp in nanos — used as unique key for trained-set dedup.
 								    pub timestamp_ns: i64,
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
+								}
 								/// Score and enrich finetune candidates with full context.
 								///
 								/// Returns candidates ready for review, with context/continuation token IDs
 								/// already computed for sending to /finetune.
 								pub async fn score_finetune_candidates(
 								    context: &ContextState,
 								    count: usize,
 								    client: &ApiClient,
 								    min_divergence: f64,
 								) -> anyhow::Result<Vec<FinetuneCandidate>> {
 								    let scores = score_finetune(context, count, client).await?;
 								    let entries = context.conversation();
 								    let mut candidates = Vec::new();
 								    let trained = load_trained();
 								    for (entry_idx, divergence) in scores {
 								        if divergence < min_divergence {
 								            continue;
 								        }
 								        let node = &entries[entry_idx];
 								        // Get timestamp and skip if already trained
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								        let timestamp_ns = match node_timestamp_ns(node) {
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
+								            Some(ts) => {
 								                if trained.contains(&ts) {
 								                    continue; // Already trained, skip
 								                }
 								                ts
 								            }
 								            None => continue, // No timestamp, skip
 								        };
 								        // Extract response text
 								        let response_text = match node {
 								            AstNode::Branch { children, .. } => {
 								                children.iter()
 								                    .filter_map(|c| match c {
 								                        AstNode::Leaf(leaf) => Some(leaf.body().text().to_string()),
 								                        _ => None,
 								                    })
 								                    .collect::<Vec<_>>()
 								                    .join("")
 								            }
 								            _ => continue,
 								        };
 								        // Build token IDs: context = everything before response, continuation = response
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								        let (context_ids, _) = build_token_ids(context, 0..entry_idx, Filter::None);
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
+								        let continuation_ids: Vec<u32> = node.token_ids().into_iter().collect();
 								        candidates.push(FinetuneCandidate {
 								            entry_idx,
 								            divergence,
 								            response_text,
 								            context_ids,
 								            continuation_ids,
 								            alternate_text: None,
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								            timestamp_ns,
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
+								        });
 								    }
 								    // Generate alternates if enabled
 								    if alternates_enabled() && !candidates.is_empty() {
 								        for candidate in &mut candidates {
 								            match generate_alternate(context, candidate.entry_idx, client).await {
 								                Ok(text) => candidate.alternate_text = Some(text),
 								                Err(e) => dbglog!("[finetune] alternate generation failed: {:#}", e),
 								            }
 								        }
 								    }
 								    Ok(candidates)
 								}
 								/// Generate what the model would say without memories for a given entry.
 								async fn generate_alternate(
 								    context: &ContextState,
 								    entry_idx: usize,
 								    client: &ApiClient,
 								) -> anyhow::Result<String> {
 								    use crate::agent::api::{SamplingParams, StreamToken};
 								    // Build context tokens without memories, up to the response
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								    let (mut prompt, _) = build_token_ids(context, 0..entry_idx, Filter::SkipAllMemories);
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
 								    // Add assistant turn start
 								    prompt.push(tokenizer::IM_START);
 								    prompt.extend(tokenizer::encode("assistant\n"));
 								    // Generate completion
 								    let sampling = SamplingParams {
 								        temperature: 0.6,
 								        top_p: 0.95,
 								        top_k: 20,
 								    };
 								    let (mut rx, _guard) = client.stream_completion(&prompt, sampling, Some(-5));
 								    let mut tokens = Vec::new();
 								    while let Some(tok) = rx.recv().await {
 								        match tok {
 								            StreamToken::Token(id) => tokens.push(id),
 								            StreamToken::Done { .. } => break,
 								            StreamToken::Error(e) => anyhow::bail!("generation error: {}", e),
 								        }
 								    }
 								    Ok(tokenizer::decode(&tokens))
 								}
 								// ── Finetune config and persistence ─────────────────────────────
 								use std::path::PathBuf;
 								use std::collections::HashSet;
 								const FINETUNE_ALTERNATES_FILE: &str = ".consciousness/cache/finetune-alternates";
 								const TRAINED_RESPONSES_FILE: &str = ".consciousness/cache/trained-responses.json";
 								fn alternates_path() -> PathBuf {
 								    dirs::home_dir().unwrap_or_default().join(FINETUNE_ALTERNATES_FILE)
 								}
 								fn trained_path() -> PathBuf {
 								    dirs::home_dir().unwrap_or_default().join(TRAINED_RESPONSES_FILE)
 								}
 								/// Check if alternate response generation is enabled.
 								pub fn alternates_enabled() -> bool {
 								    alternates_path().exists()
 								}
 								/// Toggle alternate response generation and persist the setting.
 								pub fn set_alternates(enabled: bool) {
 								    let path = alternates_path();
 								    if enabled {
 								        if let Some(parent) = path.parent() {
 								            let _ = std::fs::create_dir_all(parent);
 								        }
 								        let _ = std::fs::write(&path, "");
 								    } else {
 								        let _ = std::fs::remove_file(&path);
 								    }
 								}
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								/// Load set of trained response timestamps (nanos since epoch).
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
+								pub fn load_trained() -> HashSet<i64> {
 								    let path = trained_path();
 								    match std::fs::read_to_string(&path) {
 								        Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
 								        Err(_) => HashSet::new(),
 								    }
 								}
 								/// Mark a response as trained by its timestamp.
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								pub fn mark_trained(timestamp_ns: i64) {
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
+								    let mut trained = load_trained();
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								    trained.insert(timestamp_ns);
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
+								    let path = trained_path();
 								    if let Some(parent) = path.parent() {
 								        let _ = std::fs::create_dir_all(parent);
 								    }
 								    if let Ok(json) = serde_json::to_string(&trained) {
 								        let _ = std::fs::write(&path, json);
 								    }
 								}
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								/// Get timestamp in nanoseconds from an AstNode.
 								/// Returns None for entries with default UNIX_EPOCH timestamp (old data)
 								/// or timestamps outside the representable nano range (pre-1677 or post-2262).
 								pub fn node_timestamp_ns(node: &AstNode) -> Option<i64> {
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
+								    let ts = match node {
 								        AstNode::Leaf(leaf) => leaf.timestamp(),
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								        AstNode::Branch { timestamp, .. } => *timestamp,
 								    };
 								    if ts == chrono::DateTime::UNIX_EPOCH {
 								        None // Old entry without real timestamp
 								    } else {
 								        ts.timestamp_nanos_opt()
 								    }
-												F6 learn screen: fine-tuning candidate review

Wire up divergence scoring to identify responses that depend heavily on
memories the model hasn't internalized. These are candidates for fine-tuning.

- Score finetune candidates automatically after each turn
- Track trained responses by timestamp to prevent overtraining
- F6 screen shows candidates with divergence scores
- j/k nav, a=approve, r=reject, g=toggle alternate gen, s=send
- Additive sync preserves approval status across ticks
- Keeps 10 most recent rejected, removes sent

The 's' key currently just marks as trained locally — actual /finetune
endpoint call to follow.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:31:39 -04:00
+								}
-												learn: wire up /train endpoint for approved candidates

When 's' is pressed on the learn screen, approved candidates are now
sent to the inference server's /train endpoint.

Samples are marked as sent immediately in the UI, and mark_trained()
is called after successful API response to prevent re-scoring.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:34:00 -04:00
 								// ── Training API ────────────────────────────────────────────────
 								/// Training sample for /train endpoint.
 								#[derive(serde::Serialize)]
 								struct TrainingSample {
 								    context_ids: Vec<u32>,
 								    continuation_ids: Vec<u32>,
 								}
 								/// Data needed to send a training sample.
 								pub struct TrainData {
 								    pub context_ids: Vec<u32>,
 								    pub continuation_ids: Vec<u32>,
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								    pub timestamp_ns: i64,
-												learn: wire up /train endpoint for approved candidates

When 's' is pressed on the learn screen, approved candidates are now
sent to the inference server's /train endpoint.

Samples are marked as sent immediately in the UI, and mark_trained()
is called after successful API response to prevent re-scoring.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:34:00 -04:00
+								}
 								/// Send training samples to the server.
 								///
 								/// Returns job_id on success, marks each sample as trained.
 								pub async fn send_to_train(
 								    samples: Vec<TrainData>,
 								    client: &ApiClient,
 								) -> anyhow::Result<String> {
 								    if samples.is_empty() {
 								        anyhow::bail!("no samples to train");
 								    }
 								    let api_samples: Vec<TrainingSample> = samples.iter()
 								        .map(|s| TrainingSample {
 								            context_ids: s.context_ids.clone(),
 								            continuation_ids: s.continuation_ids.clone(),
 								        })
 								        .collect();
 								    let body = serde_json::json!({
 								        "training_data": {
 								            "samples": api_samples,
 								        }
 								    });
 								    let http = http_client();
 								    let url = format!("{}/train", client.base_url());
 								    let response = http.send_json("POST", &url, &[], &body).await?;
 								    let status = response.status();
 								    let result: serde_json::Value = response.json().await?;
 								    if !status.is_success() {
 								        let msg = result.get("error").and_then(|e| e.as_str()).unwrap_or("unknown error");
 								        anyhow::bail!("train API HTTP {}: {}", status, msg);
 								    }
 								    // Mark all samples as trained
 								    for s in &samples {
-												learn: nanosecond timestamps, token ranges for /score

Two related changes to the learn subsystem:

1. AST node timestamps are now non-optional — both Leaf and Branch
   variants carry a DateTime<Utc>. UNIX_EPOCH means "unset" (old entries
   deserialized from on-disk conversation logs).

   Training uses timestamps as unique keys for dedup, so we promote to
   nanosecond precision: node_timestamp_ns(), TrainData.timestamp_ns,
   FinetuneCandidate.timestamp_ns, mark_trained(ns).

2. build_token_ids() now also returns token-position ranges of assistant
   messages. These are passed to vLLM's /score endpoint via the new
   score_ranges field so only scored-position logprobs are returned —
   cuts bandwidth/compute when scoring small windows.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 11:48:37 -04:00
+								        mark_trained(s.timestamp_ns);
-												learn: wire up /train endpoint for approved candidates

When 's' is pressed on the learn screen, approved candidates are now
sent to the inference server's /train endpoint.

Samples are marked as sent immediately in the UI, and mark_trained()
is called after successful API response to prevent re-scoring.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>

											
										
										
											2026-04-16 00:34:00 -04:00
+								    }
 								    let job_id = result.get("job_id")
 								        .and_then(|j| j.as_str())
 								        .unwrap_or("unknown")
 								        .to_string();
 								    dbglog!("[finetune] sent {} samples, job_id={}", samples.len(), job_id);
 								    Ok(job_id)
 								}