forked from kent/consciousness
TurnResult: remove text field, simplify oneshot loop
- Remove TurnResult.text (was dead code - Agent::turn handles text internally) - Simplify run_with_backend to just iterate over steps (Agent::turn loops for tool calls and handles empty responses internally) - Change run/run_shared/run_forked_shared to return Result<(), String> - Remove AgentResult.output field (no callers used it) - Stub out legacy text-parsing code (audit, compare) that needs redesign - Update digest.rs to not depend on text return - Add level parameter to journal_new/journal_update for digest support Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
ef80398466
commit
f00532bdb7
11 changed files with 82 additions and 422 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
|
@ -555,7 +555,6 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"skillratings",
|
||||
"tokenizers",
|
||||
"tokio",
|
||||
"tokio-rustls",
|
||||
|
|
@ -2802,12 +2801,6 @@ version = "1.0.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
|
||||
|
||||
[[package]]
|
||||
name = "skillratings"
|
||||
version = "0.28.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a6ee7559737c1adcd9184f168a04dc360c84878907c3ecc5c33c2320be1d47a"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.12"
|
||||
|
|
|
|||
|
|
@ -62,7 +62,6 @@ capnp = "0.25"
|
|||
capnp-rpc = "0.25"
|
||||
|
||||
tokenizers = "0.21"
|
||||
skillratings = "0.28"
|
||||
|
||||
http = "1"
|
||||
hyper = { version = "1", features = ["client", "http1"] }
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ use std::sync::Arc;
|
|||
use anyhow::Result;
|
||||
|
||||
use api::ApiClient;
|
||||
use context::{AstNode, NodeBody, ContextState, Section, Ast, PendingToolCall, ResponseParser, Role};
|
||||
use context::{AstNode, ContextState, Section, Ast, PendingToolCall, ResponseParser, Role};
|
||||
|
||||
use crate::mind::log::ConversationLog;
|
||||
|
||||
|
|
@ -105,9 +105,6 @@ pub async fn start_activity(agent: &Arc<Agent>, label: impl Into<String>) -> Act
|
|||
|
||||
/// Result of a single agent turn.
|
||||
pub struct TurnResult {
|
||||
/// The text response (already sent through UI channel).
|
||||
#[allow(dead_code)]
|
||||
pub text: String,
|
||||
/// Whether the model called yield_to_user during this turn.
|
||||
pub yield_requested: bool,
|
||||
/// Whether any tools (other than yield_to_user) were called.
|
||||
|
|
@ -445,24 +442,12 @@ impl Agent {
|
|||
}
|
||||
|
||||
// Text-only response — extract text and return
|
||||
let text = {
|
||||
let ctx = agent.context.lock().await;
|
||||
let children = ctx.conversation()[branch_idx].children();
|
||||
children.iter()
|
||||
.filter_map(|c| c.leaf())
|
||||
.filter(|l| matches!(l.body(), NodeBody::Content(_)))
|
||||
.map(|l| l.body().text())
|
||||
.collect::<Vec<_>>()
|
||||
.join("")
|
||||
};
|
||||
|
||||
let mut st = agent.state.lock().await;
|
||||
if st.pending_yield { ds.yield_requested = true; st.pending_yield = false; }
|
||||
if st.pending_model_switch.is_some() { ds.model_switch = st.pending_model_switch.take(); }
|
||||
if st.pending_dmn_pause { ds.dmn_pause = true; st.pending_dmn_pause = false; }
|
||||
|
||||
return Ok(TurnResult {
|
||||
text,
|
||||
yield_requested: ds.yield_requested,
|
||||
had_tool_calls: ds.had_tool_calls,
|
||||
tool_errors: ds.tool_errors,
|
||||
|
|
|
|||
|
|
@ -244,7 +244,7 @@ impl AutoAgent {
|
|||
pub async fn run(
|
||||
&mut self,
|
||||
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
||||
) -> Result<String, String> {
|
||||
) -> Result<(), String> {
|
||||
let config = crate::config::get();
|
||||
let base_url = config.api_base_url.as_deref().unwrap_or("");
|
||||
let api_key = config.api_key.as_deref().unwrap_or("");
|
||||
|
|
@ -288,7 +288,7 @@ impl AutoAgent {
|
|||
pub async fn run_shared(
|
||||
&mut self,
|
||||
agent: &std::sync::Arc<Agent>,
|
||||
) -> Result<String, String> {
|
||||
) -> Result<(), String> {
|
||||
let mut backend = Backend(agent.clone());
|
||||
self.run_with_backend(&mut backend, None).await
|
||||
}
|
||||
|
|
@ -301,7 +301,7 @@ impl AutoAgent {
|
|||
memory_keys: &[String],
|
||||
state: &std::collections::BTreeMap<String, String>,
|
||||
recently_written: &[String],
|
||||
) -> Result<String, String> {
|
||||
) -> Result<(), String> {
|
||||
let resolved_steps: Vec<AutoStep> = self.steps.iter().map(|s| AutoStep {
|
||||
prompt: resolve_prompt(&s.prompt, memory_keys, state, recently_written),
|
||||
phase: s.phase.clone(),
|
||||
|
|
@ -347,67 +347,23 @@ impl AutoAgent {
|
|||
&mut self,
|
||||
backend: &mut Backend,
|
||||
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
||||
) -> Result<String, String> {
|
||||
) -> Result<(), String> {
|
||||
dbglog!("[auto] {} starting, {} steps", self.name, self.steps.len());
|
||||
self.turn = 0;
|
||||
self.current_phase = self.steps.first()
|
||||
.map(|s| s.phase.clone()).unwrap_or_default();
|
||||
let mut next_step = 0;
|
||||
|
||||
if next_step < self.steps.len() {
|
||||
backend.push_node(
|
||||
AstNode::system_msg(&self.steps[next_step].prompt)).await;
|
||||
next_step += 1;
|
||||
}
|
||||
for (i, step) in self.steps.iter().enumerate() {
|
||||
self.turn = i + 1;
|
||||
self.current_phase = step.phase.clone();
|
||||
|
||||
let max_turns = 50 * self.steps.len().max(1);
|
||||
|
||||
for _ in 0..max_turns {
|
||||
self.turn += 1;
|
||||
|
||||
let result = match Agent::turn(backend.0.clone()).await {
|
||||
Ok(r) => r,
|
||||
Err(e) if super::context::is_context_overflow(&e) => {
|
||||
dbglog!("[auto] {} context full, stopping gracefully", self.name);
|
||||
return Ok(String::new());
|
||||
}
|
||||
Err(e) => return Err(format!("{}: {}", self.name, e)),
|
||||
};
|
||||
|
||||
if result.had_tool_calls {
|
||||
continue;
|
||||
}
|
||||
|
||||
let text = result.text;
|
||||
if text.is_empty() {
|
||||
dbglog!("[auto] {} empty response, retrying", self.name);
|
||||
backend.push_node(AstNode::system_msg(
|
||||
"Your previous response was empty. \
|
||||
Please respond with text or use a tool."
|
||||
)).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
dbglog!("[auto] {} response: {}",
|
||||
self.name, &text[..text.floor_char_boundary(text.len().min(200))]);
|
||||
|
||||
if next_step < self.steps.len() {
|
||||
if let Some(ref check) = bail_fn {
|
||||
check(next_step)?;
|
||||
}
|
||||
self.current_phase = self.steps[next_step].phase.clone();
|
||||
backend.push_node(
|
||||
AstNode::system_msg(&self.steps[next_step].prompt)).await;
|
||||
next_step += 1;
|
||||
dbglog!("[auto] {} step {}/{}",
|
||||
self.name, next_step, self.steps.len());
|
||||
continue;
|
||||
check(i)?;
|
||||
}
|
||||
|
||||
return Ok(text);
|
||||
backend.push_node(AstNode::system_msg(&step.prompt)).await;
|
||||
Agent::turn(backend.0.clone()).await
|
||||
.map_err(|e| format!("{}: {}", self.name, e))?;
|
||||
}
|
||||
|
||||
Err(format!("{}: exceeded {} tool turns", self.name, max_turns))
|
||||
Ok(())
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -418,7 +374,6 @@ impl AutoAgent {
|
|||
|
||||
/// Result of running a single agent.
|
||||
pub struct AgentResult {
|
||||
pub output: String,
|
||||
pub node_keys: Vec<String>,
|
||||
/// Directory containing output() files from the agent run.
|
||||
pub state_dir: PathBuf,
|
||||
|
|
@ -556,12 +511,11 @@ pub fn run_one_agent(
|
|||
Ok(())
|
||||
};
|
||||
|
||||
let output = call_api_with_tools_sync(
|
||||
call_api_with_tools_sync(
|
||||
agent_name, &prompts, &step_phases, def.temperature, def.priority,
|
||||
&effective_tools, Some(&bail_fn))?;
|
||||
|
||||
Ok(AgentResult {
|
||||
output,
|
||||
node_keys: agent_batch.node_keys,
|
||||
state_dir,
|
||||
})
|
||||
|
|
@ -581,7 +535,7 @@ pub async fn call_api_with_tools(
|
|||
priority: i32,
|
||||
tools: &[agent_tools::Tool],
|
||||
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
||||
) -> Result<String, String> {
|
||||
) -> Result<(), String> {
|
||||
let steps: Vec<AutoStep> = prompts.iter().zip(
|
||||
phases.iter().map(String::as_str)
|
||||
.chain(std::iter::repeat(""))
|
||||
|
|
@ -610,7 +564,7 @@ pub fn call_api_with_tools_sync(
|
|||
priority: i32,
|
||||
tools: &[agent_tools::Tool],
|
||||
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
||||
) -> Result<String, String> {
|
||||
) -> Result<(), String> {
|
||||
std::thread::scope(|s| {
|
||||
s.spawn(|| {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
|
|
|
|||
|
|
@ -101,11 +101,27 @@ pub fn journal_tools() -> [super::Tool; 3] {
|
|||
}
|
||||
}"#,
|
||||
handler: Arc::new(|_a, v| Box::pin(async move { journal_tail(&v).await })) },
|
||||
Tool { name: "journal_new", description: "Start a new journal entry.",
|
||||
parameters_json: r#"{"type":"object","properties":{"name":{"type":"string","description":"Short node name (becomes the key)"},"title":{"type":"string","description":"Descriptive title"},"body":{"type":"string","description":"Entry body"}},"required":["name","title","body"]}"#,
|
||||
Tool { name: "journal_new", description: "Start a new journal/digest entry.",
|
||||
parameters_json: r#"{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string", "description": "Short node name (becomes the key)"},
|
||||
"title": {"type": "string", "description": "Descriptive title"},
|
||||
"body": {"type": "string", "description": "Entry body"},
|
||||
"level": {"type": "integer", "description": "0=journal, 1=daily, 2=weekly, 3=monthly", "default": 0}
|
||||
},
|
||||
"required": ["name", "title", "body"]
|
||||
}"#,
|
||||
handler: Arc::new(|a, v| Box::pin(async move { journal_new(&a, &v).await })) },
|
||||
Tool { name: "journal_update", description: "Append text to the most recent journal entry.",
|
||||
parameters_json: r#"{"type":"object","properties":{"body":{"type":"string","description":"Text to append"}},"required":["body"]}"#,
|
||||
Tool { name: "journal_update", description: "Append text to the most recent entry at a level.",
|
||||
parameters_json: r#"{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"body": {"type": "string", "description": "Text to append"},
|
||||
"level": {"type": "integer", "description": "0=journal, 1=daily, 2=weekly, 3=monthly", "default": 0}
|
||||
},
|
||||
"required": ["body"]
|
||||
}"#,
|
||||
handler: Arc::new(|a, v| Box::pin(async move { journal_update(&a, &v).await })) },
|
||||
]
|
||||
}
|
||||
|
|
@ -357,10 +373,20 @@ async fn journal_tail(args: &serde_json::Value) -> Result<String> {
|
|||
query(&serde_json::json!({"query": q, "format": format})).await
|
||||
}
|
||||
|
||||
fn level_to_node_type(level: i64) -> crate::store::NodeType {
|
||||
match level {
|
||||
1 => crate::store::NodeType::EpisodicDaily,
|
||||
2 => crate::store::NodeType::EpisodicWeekly,
|
||||
3 => crate::store::NodeType::EpisodicMonthly,
|
||||
_ => crate::store::NodeType::EpisodicSession,
|
||||
}
|
||||
}
|
||||
|
||||
async fn journal_new(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> {
|
||||
let name = get_str(args, "name")?;
|
||||
let title = get_str(args, "title")?;
|
||||
let body = get_str(args, "body")?;
|
||||
let level = args.get("level").and_then(|v| v.as_i64()).unwrap_or(0);
|
||||
let ts = chrono::Local::now().format("%Y-%m-%dT%H:%M");
|
||||
let content = format!("## {} — {}\n\n{}", ts, title, body);
|
||||
|
||||
|
|
@ -386,7 +412,7 @@ async fn journal_new(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args:
|
|||
base_key.to_string()
|
||||
};
|
||||
let mut node = crate::store::new_node(&key, &content);
|
||||
node.node_type = crate::store::NodeType::EpisodicSession;
|
||||
node.node_type = level_to_node_type(level);
|
||||
node.provenance = get_provenance(agent).await;
|
||||
store.upsert_node(node).map_err(|e| anyhow::anyhow!("{}", e))?;
|
||||
store.save().map_err(|e| anyhow::anyhow!("{}", e))?;
|
||||
|
|
@ -396,14 +422,16 @@ async fn journal_new(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args:
|
|||
|
||||
async fn journal_update(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> {
|
||||
let body = get_str(args, "body")?;
|
||||
let level = args.get("level").and_then(|v| v.as_i64()).unwrap_or(0);
|
||||
let node_type = level_to_node_type(level);
|
||||
let arc = cached_store().await?;
|
||||
let mut store = arc.lock().await;
|
||||
let latest_key = store.nodes.values()
|
||||
.filter(|n| n.node_type == crate::store::NodeType::EpisodicSession)
|
||||
.filter(|n| n.node_type == node_type)
|
||||
.max_by_key(|n| n.created_at)
|
||||
.map(|n| n.key.clone());
|
||||
let Some(key) = latest_key else {
|
||||
anyhow::bail!("no journal entry to update — use journal_new first");
|
||||
anyhow::bail!("no entry at level {} to update — use journal_new first", level);
|
||||
};
|
||||
let existing = store.nodes.get(&key).unwrap().content.clone();
|
||||
let new_content = format!("{}\n\n{}", existing.trim_end(), body);
|
||||
|
|
|
|||
262
src/cli/agent.rs
262
src/cli/agent.rs
|
|
@ -119,265 +119,3 @@ pub fn cmd_digest_links(do_apply: bool) -> Result<(), String> {
|
|||
println!("\nApplied: {} ({} file-level fallbacks) Skipped: {}", applied, fallbacks, skipped);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn cmd_journal_enrich(_jsonl_path: &str, _entry_text: &str, _grep_line: usize) -> Result<(), String> {
|
||||
Err("journal-enrich has been removed — use the observation agent instead.".into())
|
||||
}
|
||||
|
||||
pub fn cmd_apply_consolidation(_do_apply: bool, _report_file: Option<&str>) -> Result<(), String> {
|
||||
Err("apply-consolidation has been removed — agents now apply changes via tool calls directly.".into())
|
||||
}
|
||||
|
||||
pub fn cmd_knowledge_loop(_max_cycles: usize, _batch_size: usize, _window: usize, _max_depth: i32) -> Result<(), String> {
|
||||
Err("knowledge-loop has been removed — agents now use tool calls directly. Use `poc-memory agent run` instead.".into())
|
||||
}
|
||||
|
||||
pub fn cmd_fact_mine(_path: &str, _batch: bool, _dry_run: bool, _output_file: Option<&str>, _min_messages: usize) -> Result<(), String> {
|
||||
Err("fact-mine has been removed — use the observation agent instead.".into())
|
||||
}
|
||||
|
||||
pub fn cmd_fact_mine_store(_path: &str) -> Result<(), String> {
|
||||
Err("fact-mine-store has been removed — use the observation agent instead.".into())
|
||||
}
|
||||
|
||||
/// Sample recent actions from each agent type, sort by quality using
|
||||
/// LLM pairwise comparison, report per-type rankings.
|
||||
/// Elo ratings file path
|
||||
fn elo_path() -> std::path::PathBuf {
|
||||
crate::config::get().data_dir.join("agent-elo.json")
|
||||
}
|
||||
|
||||
/// Load persisted Elo ratings, or initialize at 1000.0
|
||||
fn load_elo_ratings(agent_types: &[&str]) -> std::collections::HashMap<String, f64> {
|
||||
let path = elo_path();
|
||||
let mut ratings: std::collections::HashMap<String, f64> = std::fs::read_to_string(&path)
|
||||
.ok()
|
||||
.and_then(|s| serde_json::from_str(&s).ok())
|
||||
.unwrap_or_default();
|
||||
for t in agent_types {
|
||||
ratings.entry(t.to_string()).or_insert(1000.0);
|
||||
}
|
||||
ratings
|
||||
}
|
||||
|
||||
fn save_elo_ratings(ratings: &std::collections::HashMap<String, f64>) {
|
||||
let path = elo_path();
|
||||
if let Ok(json) = serde_json::to_string_pretty(ratings) {
|
||||
let _ = std::fs::write(path, json);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cmd_evaluate_agents(matchups: usize, model: &str, dry_run: bool) -> Result<(), String> {
|
||||
use skillratings::elo::{elo, EloConfig, EloRating};
|
||||
use skillratings::Outcomes;
|
||||
|
||||
let store = store::Store::load()?;
|
||||
|
||||
let agent_types: Vec<&str> = vec![
|
||||
"linker", "organize", "distill", "separator",
|
||||
"split", "rename",
|
||||
];
|
||||
|
||||
// Load agent prompt files
|
||||
let prompts_dir = {
|
||||
let repo = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("agents");
|
||||
if repo.is_dir() { repo } else { crate::store::memory_dir().join("agents") }
|
||||
};
|
||||
|
||||
// Collect recent actions per agent type
|
||||
let mut actions: std::collections::HashMap<String, Vec<(String, String)>> = std::collections::HashMap::new();
|
||||
|
||||
for agent_type in &agent_types {
|
||||
let prompt_file = prompts_dir.join(format!("{}.agent", agent_type));
|
||||
let agent_prompt = std::fs::read_to_string(&prompt_file)
|
||||
.unwrap_or_default()
|
||||
.lines().skip(1).collect::<Vec<_>>().join("\n");
|
||||
let agent_prompt = crate::util::truncate(&agent_prompt, 500, "...");
|
||||
|
||||
let prefix = format!("_consolidate-{}", agent_type);
|
||||
let mut keys: Vec<(String, i64)> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.starts_with(&prefix))
|
||||
.map(|(k, n)| (k.clone(), n.timestamp))
|
||||
.collect();
|
||||
keys.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
keys.truncate(20); // pool of recent actions to sample from
|
||||
|
||||
let mut type_actions = Vec::new();
|
||||
for (key, _) in &keys {
|
||||
let report = store.nodes.get(key)
|
||||
.map(|n| n.content.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut target_content = String::new();
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
for word in report.split_whitespace() {
|
||||
let clean = word.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '_');
|
||||
if clean.len() > 10 && seen.insert(clean.to_string()) && store.nodes.contains_key(clean)
|
||||
&& let Some(node) = store.nodes.get(clean) {
|
||||
let preview = crate::util::truncate(&node.content, 200, "...");
|
||||
target_content.push_str(&format!("\n### {}\n{}\n", clean, preview));
|
||||
if target_content.len() > 1500 { break; }
|
||||
}
|
||||
}
|
||||
|
||||
let context = format!(
|
||||
"## Agent instructions\n{}\n\n## Report output\n{}\n\n## Affected nodes\n{}",
|
||||
agent_prompt,
|
||||
crate::util::truncate(&report, 1000, "..."),
|
||||
if target_content.is_empty() { "(none found)".into() } else { target_content }
|
||||
);
|
||||
type_actions.push((key.clone(), context));
|
||||
}
|
||||
actions.insert(agent_type.to_string(), type_actions);
|
||||
}
|
||||
|
||||
// Filter to types that have at least 1 action
|
||||
let active_types: Vec<&str> = agent_types.iter()
|
||||
.filter(|t| actions.get(**t).map(|a| !a.is_empty()).unwrap_or(false))
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
if active_types.len() < 2 {
|
||||
return Err("Need at least 2 agent types with actions".into());
|
||||
}
|
||||
|
||||
eprintln!("Evaluating {} agent types with {} matchups (model={})",
|
||||
active_types.len(), matchups, model);
|
||||
|
||||
if dry_run {
|
||||
let t1 = active_types[0];
|
||||
let t2 = active_types[active_types.len() - 1];
|
||||
let a1 = &actions[t1][0];
|
||||
let a2 = &actions[t2][0];
|
||||
let sample_a = (t1.to_string(), a1.0.clone(), a1.1.clone());
|
||||
let sample_b = (t2.to_string(), a2.0.clone(), a2.1.clone());
|
||||
println!("=== DRY RUN: Example comparison ===\n");
|
||||
println!("{}", build_compare_prompt(&sample_a, &sample_b));
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Load persisted ratings
|
||||
let mut ratings = load_elo_ratings(&agent_types);
|
||||
let config = EloConfig { k: 32.0 };
|
||||
// Simple but adequate RNG: xorshift32
|
||||
let mut rng = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH).unwrap().subsec_nanos() | 1;
|
||||
let mut next_rng = || -> usize {
|
||||
rng ^= rng << 13;
|
||||
rng ^= rng >> 17;
|
||||
rng ^= rng << 5;
|
||||
rng as usize
|
||||
};
|
||||
|
||||
for i in 0..matchups {
|
||||
// Pick two different random agent types
|
||||
let idx_a = next_rng() % active_types.len();
|
||||
let mut idx_b = next_rng() % active_types.len();
|
||||
if idx_b == idx_a { idx_b = (idx_b + 1) % active_types.len(); }
|
||||
|
||||
let type_a = active_types[idx_a];
|
||||
let type_b = active_types[idx_b];
|
||||
|
||||
// Pick random recent action from each
|
||||
let acts_a = &actions[type_a];
|
||||
let acts_b = &actions[type_b];
|
||||
let act_a = &acts_a[next_rng() % acts_a.len()];
|
||||
let act_b = &acts_b[next_rng() % acts_b.len()];
|
||||
|
||||
let sample_a = (type_a.to_string(), act_a.0.clone(), act_a.1.clone());
|
||||
let sample_b = (type_b.to_string(), act_b.0.clone(), act_b.1.clone());
|
||||
|
||||
let result = llm_compare(&sample_a, &sample_b, model);
|
||||
|
||||
let rating_a = EloRating { rating: ratings[type_a] };
|
||||
let rating_b = EloRating { rating: ratings[type_b] };
|
||||
|
||||
let outcome = match result {
|
||||
Ok(std::cmp::Ordering::Less) => Outcomes::WIN, // A wins
|
||||
Ok(std::cmp::Ordering::Greater) => Outcomes::LOSS, // B wins
|
||||
_ => Outcomes::WIN, // default to A
|
||||
};
|
||||
|
||||
let (new_a, new_b) = elo(&rating_a, &rating_b, &outcome, &config);
|
||||
ratings.insert(type_a.to_string(), new_a.rating);
|
||||
ratings.insert(type_b.to_string(), new_b.rating);
|
||||
|
||||
eprint!(" matchup {}/{}: {} vs {} → {}\r",
|
||||
i + 1, matchups, type_a, type_b,
|
||||
if matches!(outcome, Outcomes::WIN) { type_a } else { type_b });
|
||||
}
|
||||
eprintln!();
|
||||
|
||||
// Save updated ratings
|
||||
save_elo_ratings(&ratings);
|
||||
|
||||
// Print rankings
|
||||
let mut ranked: Vec<_> = ratings.iter().collect();
|
||||
ranked.sort_by(|a, b| b.1.total_cmp(a.1));
|
||||
|
||||
println!("\nAgent Elo Ratings (after {} matchups):\n", matchups);
|
||||
for (agent_type, rating) in &ranked {
|
||||
let bar_len = ((*rating - 800.0) / 10.0).max(0.0) as usize;
|
||||
let bar = "#".repeat(bar_len.min(40));
|
||||
println!(" {:12} {:7.1} {}", agent_type, rating, bar);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn build_compare_prompt(
|
||||
a: &(String, String, String),
|
||||
b: &(String, String, String),
|
||||
) -> String {
|
||||
if a.0 == b.0 {
|
||||
// Same agent type — show instructions once
|
||||
// Split context at "## Report output" to extract shared prompt
|
||||
let split_a: Vec<&str> = a.2.splitn(2, "## Report output").collect();
|
||||
let split_b: Vec<&str> = b.2.splitn(2, "## Report output").collect();
|
||||
let shared_prompt = split_a.first().unwrap_or(&"");
|
||||
let report_a = split_a.get(1).unwrap_or(&"");
|
||||
let report_b = split_b.get(1).unwrap_or(&"");
|
||||
format!(
|
||||
"Compare two actions from the same {} agent. Which was better?\n\n\
|
||||
{}\n\n\
|
||||
## Action A\n## Report output{}\n\n\
|
||||
## Action B\n## Report output{}\n\n\
|
||||
Say which is better and why in 1-2 sentences, then end with:\n\
|
||||
BETTER: A or BETTER: B\n\
|
||||
You must pick one. No ties.",
|
||||
a.0, shared_prompt, report_a, report_b
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"Compare these two memory graph agent actions. Which one was better \
|
||||
for building a useful, well-organized knowledge graph?\n\n\
|
||||
## Action A ({} agent)\n{}\n\n\
|
||||
## Action B ({} agent)\n{}\n\n\
|
||||
Say which is better and why in 1-2 sentences, then end with:\n\
|
||||
BETTER: A or BETTER: B\n\
|
||||
You must pick one. No ties.",
|
||||
a.0, a.2, b.0, b.2
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn llm_compare(
|
||||
a: &(String, String, String),
|
||||
b: &(String, String, String),
|
||||
model: &str,
|
||||
) -> Result<std::cmp::Ordering, String> {
|
||||
let prompt = build_compare_prompt(a, b);
|
||||
|
||||
let _ = model; // model selection handled by API backend config
|
||||
let response = crate::agent::oneshot::call_api_with_tools_sync(
|
||||
"compare", &[prompt], &[], None, 10, &[], None)?;
|
||||
let response = response.trim().to_uppercase();
|
||||
|
||||
if response.contains("BETTER: B") {
|
||||
Ok(std::cmp::Ordering::Greater)
|
||||
} else {
|
||||
// Default to A (includes "BETTER: A" and any unparseable response)
|
||||
Ok(std::cmp::Ordering::Less)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
50
src/main.rs
50
src/main.rs
|
|
@ -466,36 +466,6 @@ enum AgentCmd {
|
|||
#[arg(long)]
|
||||
apply: bool,
|
||||
},
|
||||
/// Mine conversation for experiential moments to journal
|
||||
#[command(name = "experience-mine")]
|
||||
ExperienceMine {
|
||||
/// Path to JSONL transcript (default: most recent)
|
||||
jsonl_path: Option<String>,
|
||||
},
|
||||
/// Extract atomic facts from conversation transcripts
|
||||
#[command(name = "fact-mine")]
|
||||
FactMine {
|
||||
/// Path to JSONL transcript or directory (with --batch)
|
||||
path: String,
|
||||
/// Process all .jsonl files in directory
|
||||
#[arg(long)]
|
||||
batch: bool,
|
||||
/// Show chunks without calling model
|
||||
#[arg(long)]
|
||||
dry_run: bool,
|
||||
/// Write JSON to file (default: stdout)
|
||||
#[arg(long, short)]
|
||||
output: Option<String>,
|
||||
/// Skip transcripts with fewer messages
|
||||
#[arg(long, default_value_t = 10)]
|
||||
min_messages: usize,
|
||||
},
|
||||
/// Extract facts from a transcript and store directly
|
||||
#[command(name = "fact-mine-store")]
|
||||
FactMineStore {
|
||||
/// Path to JSONL transcript
|
||||
path: String,
|
||||
},
|
||||
/// Run a single agent by name
|
||||
Run {
|
||||
/// Agent name (e.g. observation, linker, distill)
|
||||
|
|
@ -526,19 +496,6 @@ enum AgentCmd {
|
|||
#[arg(long, default_value_t = 10)]
|
||||
count: usize,
|
||||
},
|
||||
/// Evaluate agent quality by LLM-sorted ranking
|
||||
#[command(name = "evaluate")]
|
||||
Evaluate {
|
||||
/// Number of pairwise matchups to run
|
||||
#[arg(long, default_value_t = 30)]
|
||||
matchups: usize,
|
||||
/// Model to use for comparison (haiku or sonnet)
|
||||
#[arg(long, default_value = "haiku")]
|
||||
model: String,
|
||||
/// Show example comparison prompt without calling LLM
|
||||
#[arg(long)]
|
||||
dry_run: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
|
|
@ -790,16 +747,9 @@ impl Run for AgentCmd {
|
|||
=> cli::agent::cmd_apply_consolidation(apply, report.as_deref()),
|
||||
Self::Digest { level } => cmd_digest(level),
|
||||
Self::DigestLinks { apply } => cli::agent::cmd_digest_links(apply),
|
||||
Self::ExperienceMine { .. }
|
||||
=> Err("experience-mine has been removed — use the observation agent instead.".into()),
|
||||
Self::FactMine { path, batch, dry_run, output, min_messages }
|
||||
=> cli::agent::cmd_fact_mine(&path, batch, dry_run, output.as_deref(), min_messages),
|
||||
Self::FactMineStore { path } => cli::agent::cmd_fact_mine_store(&path),
|
||||
Self::Run { agent, count, target, query, dry_run, local, state_dir }
|
||||
=> cli::agent::cmd_run_agent(&agent, count, &target, query.as_deref(), dry_run, local, state_dir.as_deref()),
|
||||
Self::ReplayQueue { count } => cli::agent::cmd_replay_queue(count),
|
||||
Self::Evaluate { matchups, model, dry_run }
|
||||
=> cli::agent::cmd_evaluate_agents(matchups, &model, dry_run),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -319,7 +319,7 @@ struct SubconsciousAgent {
|
|||
forked_agent: Option<Arc<crate::agent::Agent>>,
|
||||
/// Entry index where the fork diverged from the conscious agent.
|
||||
fork_point: usize,
|
||||
handle: Option<tokio::task::JoinHandle<(AutoAgent, Result<String, String>)>>,
|
||||
handle: Option<tokio::task::JoinHandle<(AutoAgent, Result<(), String>)>>,
|
||||
}
|
||||
|
||||
impl SubconsciousAgent {
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ struct UnconsciousAgent {
|
|||
name: String,
|
||||
enabled: bool,
|
||||
auto: AutoAgent,
|
||||
handle: Option<tokio::task::JoinHandle<(AutoAgent, Result<String, String>)>>,
|
||||
handle: Option<tokio::task::JoinHandle<(AutoAgent, Result<(), String>)>>,
|
||||
/// Shared agent handle — UI locks to read context live.
|
||||
pub agent: Option<std::sync::Arc<crate::agent::Agent>>,
|
||||
last_run: Option<Instant>,
|
||||
|
|
|
|||
|
|
@ -2,11 +2,12 @@
|
|||
//
|
||||
// Each batch of links gets reviewed by Sonnet, which returns per-link actions:
|
||||
// KEEP, DELETE, RETARGET, WEAKEN, STRENGTHEN. Batches run in parallel via rayon.
|
||||
//
|
||||
// TODO: Redesign to use tool-based agent instead of text parsing.
|
||||
|
||||
use crate::store::{self, Store, new_relation};
|
||||
|
||||
use std::collections::HashSet;
|
||||
use crate::store::Store;
|
||||
|
||||
#[allow(dead_code)]
|
||||
struct LinkInfo {
|
||||
rel_idx: usize,
|
||||
source_key: String,
|
||||
|
|
@ -26,6 +27,7 @@ pub struct AuditStats {
|
|||
pub errors: usize,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn build_audit_prompt(batch: &[LinkInfo], batch_num: usize, total_batches: usize) -> String {
|
||||
let mut prompt = format!(
|
||||
"You are auditing memory graph links for quality (batch {}/{}).\n\n\
|
||||
|
|
@ -63,6 +65,7 @@ fn build_audit_prompt(batch: &[LinkInfo], batch_num: usize, total_batches: usize
|
|||
prompt
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn parse_audit_response(response: &str, batch_size: usize) -> Vec<(usize, AuditAction)> {
|
||||
let mut actions = Vec::new();
|
||||
|
||||
|
|
@ -109,6 +112,7 @@ fn parse_audit_response(response: &str, batch_size: usize) -> Vec<(usize, AuditA
|
|||
actions
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
enum AuditAction {
|
||||
Keep,
|
||||
Delete,
|
||||
|
|
@ -118,7 +122,11 @@ enum AuditAction {
|
|||
}
|
||||
|
||||
/// Run a full link audit: walk every link, batch to Sonnet, apply results.
|
||||
pub fn link_audit(store: &mut Store, apply: bool) -> Result<AuditStats, String> {
|
||||
pub fn link_audit(_store: &mut Store, _apply: bool) -> Result<AuditStats, String> {
|
||||
// TODO: Reimplement to use tool-based agent instead of text parsing
|
||||
Err("link_audit disabled: needs redesign to use tool-based agent".to_string())
|
||||
|
||||
/*
|
||||
// Collect all non-deleted relations with their info
|
||||
let mut links: Vec<LinkInfo> = Vec::new();
|
||||
|
||||
|
|
@ -330,4 +338,5 @@ pub fn link_audit(store: &mut Store, apply: bool) -> Result<AuditStats, String>
|
|||
}
|
||||
|
||||
Ok(stats)
|
||||
*/
|
||||
}
|
||||
|
|
|
|||
|
|
@ -284,14 +284,13 @@ fn generate_digest(
|
|||
.filter(|t| def.tools.iter().any(|w| w == &t.name))
|
||||
.collect()
|
||||
};
|
||||
let digest = crate::agent::oneshot::call_api_with_tools_sync(
|
||||
// Agent writes digest via memory_write tool - we just run it
|
||||
crate::agent::oneshot::call_api_with_tools_sync(
|
||||
&def.agent, &prompts, &phases, def.temperature, def.priority,
|
||||
&tools, None)?;
|
||||
|
||||
let key = digest_node_key(level.name, label);
|
||||
store.upsert_provenance(&key, &digest, "digest:write")?;
|
||||
|
||||
// Structural links: connect all source entries to this digest
|
||||
let key = digest_node_key(level.name, label);
|
||||
let mut linked = 0;
|
||||
for source_key in source_keys {
|
||||
// Skip if link already exists
|
||||
|
|
@ -299,10 +298,17 @@ fn generate_digest(
|
|||
!r.deleted && r.source_key == *source_key && r.target_key == key);
|
||||
if exists { continue; }
|
||||
|
||||
// Reload store to pick up agent's writes
|
||||
*store = Store::load().map_err(|e| format!("reload: {}", e))?;
|
||||
|
||||
let source_uuid = store.nodes.get(source_key)
|
||||
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||
let target_uuid = store.nodes.get(&key)
|
||||
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||
if target_uuid == [0u8; 16] {
|
||||
println!(" Warning: digest key {} not found after agent run", key);
|
||||
continue;
|
||||
}
|
||||
let mut rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Link, 0.8,
|
||||
|
|
@ -314,12 +320,10 @@ fn generate_digest(
|
|||
}
|
||||
if linked > 0 {
|
||||
println!(" Linked {} source entries → {}", linked, key);
|
||||
store.save()?;
|
||||
}
|
||||
|
||||
store.save()?;
|
||||
println!(" Stored: {}", key);
|
||||
|
||||
println!(" Done: {} lines", digest.lines().count());
|
||||
println!(" Done");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue