wire up /score command and debug screen for memory importance
/score snapshots the context and client, releases the agent lock, runs scoring in background. Only one score task at a time (scoring_in_flight flag). Results stored on Agent and shown on the F10 context debug screen with importance scores per memory. ApiClient derives Clone. ContextState derives Clone. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
df9b610c7f
commit
c01d4a5b08
7 changed files with 64 additions and 4 deletions
|
|
@ -57,6 +57,7 @@ pub enum StreamEvent {
|
|||
Error(String),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ApiClient {
|
||||
client: Client,
|
||||
api_key: String,
|
||||
|
|
|
|||
|
|
@ -76,6 +76,10 @@ pub struct Agent {
|
|||
session_id: String,
|
||||
/// Agent orchestration state (surface-observe, journal, reflect).
|
||||
pub agent_cycles: crate::subconscious::subconscious::AgentCycleState,
|
||||
/// Latest memory importance scores from training scorer.
|
||||
pub memory_scores: Option<crate::thought::training::MemoryScore>,
|
||||
/// Whether a /score task is currently running.
|
||||
pub scoring_in_flight: bool,
|
||||
}
|
||||
|
||||
fn render_journal(entries: &[journal::JournalEntry]) -> String {
|
||||
|
|
@ -125,6 +129,8 @@ impl Agent {
|
|||
prompt_file,
|
||||
session_id,
|
||||
agent_cycles,
|
||||
memory_scores: None,
|
||||
scoring_in_flight: false,
|
||||
};
|
||||
|
||||
agent.load_startup_journal();
|
||||
|
|
@ -670,8 +676,16 @@ impl Agent {
|
|||
_ => unreachable!(),
|
||||
};
|
||||
let text = entry.message().content_text();
|
||||
let score = self.memory_scores.as_ref()
|
||||
.and_then(|s| s.memory_weights.iter()
|
||||
.find(|(k, _)| k == key)
|
||||
.map(|(_, v)| *v));
|
||||
let label = match score {
|
||||
Some(v) => format!("{} (importance: {:.1})", key, v),
|
||||
None => key.to_string(),
|
||||
};
|
||||
ContextSection {
|
||||
name: key.to_string(),
|
||||
name: label,
|
||||
tokens: count(text),
|
||||
content: String::new(),
|
||||
children: Vec::new(),
|
||||
|
|
@ -970,6 +984,10 @@ impl Agent {
|
|||
}
|
||||
|
||||
/// Mutable access to conversation entries (for /retry).
|
||||
pub fn client_clone(&self) -> ApiClient {
|
||||
self.client.clone()
|
||||
}
|
||||
|
||||
pub fn entries_mut(&mut self) -> &mut Vec<ConversationEntry> {
|
||||
&mut self.context.entries
|
||||
}
|
||||
|
|
|
|||
|
|
@ -397,6 +397,7 @@ impl ConversationEntry {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ContextState {
|
||||
pub system_prompt: String,
|
||||
pub personality: Vec<(String, String)>,
|
||||
|
|
|
|||
|
|
@ -354,6 +354,7 @@ impl Session {
|
|||
("/save", "Save session to disk"),
|
||||
("/retry", "Re-run last turn"),
|
||||
("/model", "Show/switch model (/model <name>)"),
|
||||
("/score", "Score memory importance"),
|
||||
("/dmn", "Show DMN state"),
|
||||
("/sleep", "Put DMN to sleep"),
|
||||
("/wake", "Wake DMN to foraging"),
|
||||
|
|
@ -422,6 +423,46 @@ impl Session {
|
|||
}
|
||||
Command::Handled
|
||||
}
|
||||
"/score" => {
|
||||
{
|
||||
let agent = self.agent.lock().await;
|
||||
if agent.scoring_in_flight {
|
||||
let _ = self.ui_tx.send(UiMessage::Info(
|
||||
"(scoring already in progress)".into()
|
||||
));
|
||||
return Command::Handled;
|
||||
}
|
||||
}
|
||||
self.agent.lock().await.scoring_in_flight = true;
|
||||
let agent = self.agent.clone();
|
||||
let ui_tx = self.ui_tx.clone();
|
||||
tokio::spawn(async move {
|
||||
let (context, client) = {
|
||||
let agent = agent.lock().await;
|
||||
(agent.context.clone(), agent.client_clone())
|
||||
};
|
||||
let result = poc_memory::thought::training::score_memories(
|
||||
&context, &client, &ui_tx,
|
||||
).await;
|
||||
let mut agent = agent.lock().await;
|
||||
agent.scoring_in_flight = false;
|
||||
match result {
|
||||
Ok(scores) => {
|
||||
let _ = ui_tx.send(UiMessage::Info(format!(
|
||||
"[memory scoring complete: {} memories scored]",
|
||||
scores.memory_keys.len(),
|
||||
)));
|
||||
agent.memory_scores = Some(scores);
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = ui_tx.send(UiMessage::Info(format!(
|
||||
"[memory scoring failed: {:#}]", e,
|
||||
)));
|
||||
}
|
||||
}
|
||||
});
|
||||
Command::Handled
|
||||
}
|
||||
"/dmn" => {
|
||||
let _ = self
|
||||
.ui_tx
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@
|
|||
// Phase 2 will inline job logic; Phase 3 integrates into poc-agent.
|
||||
|
||||
use jobkit::{Choir, ExecutionContext, TaskError, TaskInfo, TaskStatus};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ pub fn run_and_apply_excluded(
|
|||
log: &(dyn Fn(&str) + Sync),
|
||||
exclude: &std::collections::HashSet<String>,
|
||||
) -> Result<(), String> {
|
||||
let result = run_one_agent_excluded(store, agent_name, batch_size, llm_tag, log, exclude)?;
|
||||
let _result = run_one_agent_excluded(store, agent_name, batch_size, llm_tag, log, exclude)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ pub async fn score_memories(
|
|||
|
||||
// Compute per-response divergence
|
||||
let mut row = Vec::new();
|
||||
for (resp_idx, (base_lps, without_lps)) in baseline.iter().zip(without.iter()).enumerate() {
|
||||
for (_resp_idx, (base_lps, without_lps)) in baseline.iter().zip(without.iter()).enumerate() {
|
||||
// Sum of logprob drops across tokens in this response
|
||||
// Positive = memory helped (logprob was higher with it)
|
||||
let divergence: f64 = base_lps.iter().zip(without_lps.iter())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue