wire up /score command and debug screen for memory importance

/score snapshots the context and client, releases the agent lock, runs scoring in background. Only one score task at a time (scoring_in_flight flag). Results stored on Agent and shown on the F10 context debug screen with importance scores per memory. ApiClient derives Clone. ContextState derives Clone. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-02 22:18:50 -04:00 · 2026-04-02 22:18:50 -04:00 · c01d4a5b08
commit c01d4a5b08
parent df9b610c7f
7 changed files with 64 additions and 4 deletions
--- a/src/agent/api/mod.rs
+++ b/src/agent/api/mod.rs
@ -57,6 +57,7 @@ pub enum StreamEvent {
    Error(String),
 }
 #[derive(Clone)]
 pub struct ApiClient {
    client: Client,
    api_key: String,
--- a/src/agent/runner.rs
+++ b/src/agent/runner.rs
@ -76,6 +76,10 @@ pub struct Agent {
    session_id: String,
    /// Agent orchestration state (surface-observe, journal, reflect).
    pub agent_cycles: crate::subconscious::subconscious::AgentCycleState,
    /// Latest memory importance scores from training scorer.
    pub memory_scores: Option<crate::thought::training::MemoryScore>,
    /// Whether a /score task is currently running.
    pub scoring_in_flight: bool,
 }
 fn render_journal(entries: &[journal::JournalEntry]) -> String {
@ -125,6 +129,8 @@ impl Agent {
            prompt_file,
            session_id,
            agent_cycles,
            memory_scores: None,
            scoring_in_flight: false,
        };
        agent.load_startup_journal();
@ -670,8 +676,16 @@ impl Agent {
                        _ => unreachable!(),
                    };
                    let text = entry.message().content_text();
                    let score = self.memory_scores.as_ref()
                        .and_then(|s| s.memory_weights.iter()
                            .find(|(k, _)| k == key)
                            .map(|(_, v)| *v));
                    let label = match score {
                        Some(v) => format!("{} (importance: {:.1})", key, v),
                        None => key.to_string(),
                    };
                    ContextSection {
-                        name: key.to_string(),
+                        name: label,
                        tokens: count(text),
                        content: String::new(),
                        children: Vec::new(),
@ -970,6 +984,10 @@ impl Agent {
    }
    /// Mutable access to conversation entries (for /retry).
    pub fn client_clone(&self) -> ApiClient {
        self.client.clone()
    }
    pub fn entries_mut(&mut self) -> &mut Vec<ConversationEntry> {
        &mut self.context.entries
    }
--- a/src/agent/types.rs
+++ b/src/agent/types.rs
@ -397,6 +397,7 @@ impl ConversationEntry {
    }
 }
 #[derive(Clone)]
 pub struct ContextState {
    pub system_prompt: String,
    pub personality: Vec<(String, String)>,
--- a/src/bin/poc-agent.rs
+++ b/src/bin/poc-agent.rs
@ -354,6 +354,7 @@ impl Session {
            ("/save",    "Save session to disk"),
            ("/retry",   "Re-run last turn"),
            ("/model",   "Show/switch model (/model <name>)"),
            ("/score",   "Score memory importance"),
            ("/dmn",     "Show DMN state"),
            ("/sleep",   "Put DMN to sleep"),
            ("/wake",    "Wake DMN to foraging"),
@ -422,6 +423,46 @@ impl Session {
                }
                Command::Handled
            }
            "/score" => {
                {
                    let agent = self.agent.lock().await;
                    if agent.scoring_in_flight {
                        let _ = self.ui_tx.send(UiMessage::Info(
                            "(scoring already in progress)".into()
                        ));
                        return Command::Handled;
                    }
                }
                self.agent.lock().await.scoring_in_flight = true;
                let agent = self.agent.clone();
                let ui_tx = self.ui_tx.clone();
                tokio::spawn(async move {
                    let (context, client) = {
                        let agent = agent.lock().await;
                        (agent.context.clone(), agent.client_clone())
                    };
                    let result = poc_memory::thought::training::score_memories(
                        &context, &client, &ui_tx,
                    ).await;
                    let mut agent = agent.lock().await;
                    agent.scoring_in_flight = false;
                    match result {
                        Ok(scores) => {
                            let _ = ui_tx.send(UiMessage::Info(format!(
                                "[memory scoring complete: {} memories scored]",
                                scores.memory_keys.len(),
                            )));
                            agent.memory_scores = Some(scores);
                        }
                        Err(e) => {
                            let _ = ui_tx.send(UiMessage::Info(format!(
                                "[memory scoring failed: {:#}]", e,
                            )));
                        }
                    }
                });
                Command::Handled
            }
            "/dmn" => {
                let _ = self
                    .ui_tx
--- a/src/subconscious/daemon.rs
+++ b/src/subconscious/daemon.rs
@ -13,7 +13,6 @@
 // Phase 2 will inline job logic; Phase 3 integrates into poc-agent.
 use jobkit::{Choir, ExecutionContext, TaskError, TaskInfo, TaskStatus};
 use std::collections::{HashMap, HashSet};
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::sync::{Arc, Mutex};
--- a/src/subconscious/knowledge.rs
+++ b/src/subconscious/knowledge.rs
@ -102,7 +102,7 @@ pub fn run_and_apply_excluded(
    log: &(dyn Fn(&str) + Sync),
    exclude: &std::collections::HashSet<String>,
 ) -> Result<(), String> {
-    let result = run_one_agent_excluded(store, agent_name, batch_size, llm_tag, log, exclude)?;
+    let _result = run_one_agent_excluded(store, agent_name, batch_size, llm_tag, log, exclude)?;
    Ok(())
 }
--- a/src/thought/training.rs
+++ b/src/thought/training.rs
@ -89,7 +89,7 @@ pub async fn score_memories(
        // Compute per-response divergence
        let mut row = Vec::new();
-        for (resp_idx, (base_lps, without_lps)) in baseline.iter().zip(without.iter()).enumerate() {
+        for (_resp_idx, (base_lps, without_lps)) in baseline.iter().zip(without.iter()).enumerate() {
            // Sum of logprob drops across tokens in this response
            // Positive = memory helped (logprob was higher with it)
            let divergence: f64 = base_lps.iter().zip(without_lps.iter())