Agent identity, parallel scheduling, memory-search fixes, stemmer optimization

- Agent identity injection: prepend core-personality to all agent prompts so agents dream as me, not as generic graph workers. Include instructions to walk the graph and connect new nodes to core concepts. - Parallel agent scheduling: sequential within type, parallel across types. Different agent types (linker, organize, replay) run concurrently. - Linker prompt: graph walking instead of keyword search for connections. "Explore the local topology and walk the graph until you find the best connections." - memory-search fixes: format_results no longer truncates to 5 results, pipeline default raised to 50, returned file cleared on compaction, --seen and --seen-full merged, compaction timestamp in --seen output, max_entries=3 per prompt for steady memory drip. - Stemmer optimization: strip_suffix now works in-place on a single String buffer instead of allocating 18 new Strings per word. Note for future: reversed-suffix trie for O(suffix_len) instead of O(n_rules). - Transcript: add compaction_timestamp() for --seen display. - Agent budget configurable (default 4000 from config). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 12:49:10 -04:00 · 2026-03-15 12:49:10 -04:00 · 5d6b2021f8
commit 5d6b2021f8
parent 7b1d6b8ad0
8 changed files with 190 additions and 71 deletions
--- a/poc-memory/src/bin/memory-search.rs
+++ b/poc-memory/src/bin/memory-search.rs
@ -27,16 +27,12 @@ struct Args {
    #[arg(short, long)]
    debug: bool,

-    /// Show the seen set and returned memories for this session
+    /// Show session state: seen set, returned memories, compaction info
    #[arg(long)]
    seen: bool,

-    /// Show full seen set (list all keys)
-    #[arg(long)]
-    seen_full: bool,
-
-    /// Max results to return
-    #[arg(long, default_value = "5")]
+    /// Max results from search pipeline (filtered by seen set before injection)
+    #[arg(long, default_value = "50")]
    max_results: usize,

    /// Search query (bypasses stashed input, uses this as the prompt)
@ -60,7 +56,7 @@ fn main() {

    let args = Args::parse();

-    if args.seen || args.seen_full {
+    if args.seen {
        show_seen();
        return;
    }
@ -114,9 +110,11 @@ fn main() {
    let is_first = !cookie_path.exists();

    if is_first || is_compaction {
-        // Reset seen set to keys that load-context will inject
+        // Reset seen set and returned list
        let seen_path = state_dir.join(format!("seen-{}", session_id));
+        let returned_path = state_dir.join(format!("returned-{}", session_id));
        fs::remove_file(&seen_path).ok();
+        fs::remove_file(&returned_path).ok();
    }

    if debug {
@ -287,8 +285,7 @@ fn main() {
        }
    }

-    let max_results = if debug { args.max_results.max(25) } else { args.max_results };
-    let raw_results = search::run_pipeline(&pipeline, seeds, &graph, &store, debug, max_results);
+    let raw_results = search::run_pipeline(&pipeline, seeds, &graph, &store, debug, args.max_results);

    let results: Vec<search::SearchResult> = raw_results.into_iter()
        .map(|(key, activation)| {
@ -319,7 +316,7 @@ fn main() {

    let mut result_output = String::new();
    let mut count = 0;
-    let max_entries = 5;
+    let max_entries = 3;

    for line in search_output.lines() {
        if count >= max_entries { break; }
@ -666,19 +663,41 @@ fn show_seen() {
        return;
    }

+    let transcript_path = json["transcript_path"].as_str().unwrap_or("");
+
    println!("Session: {}", session_id);

    let cookie_path = state_dir.join(format!("cookie-{}", session_id));
    if let Ok(cookie) = fs::read_to_string(&cookie_path) {
-        println!("Cookie: {}", cookie.trim());
+        println!("Cookie:  {}", cookie.trim());
    }

-    let returned = load_returned(&state_dir, session_id);
-    if !returned.is_empty() {
-        println!("\nReturned by search ({}):", returned.len());
-        for key in &returned {
-            println!("  {}", key);
+    // Show last compaction info
+    let compaction_path = state_dir.join(format!("compaction-{}", session_id));
+    match fs::read_to_string(&compaction_path) {
+        Ok(offset_str) => {
+            let offset: u64 = offset_str.trim().parse().unwrap_or(0);
+            // Try to get a timestamp from the compaction offset in the transcript
+            let ts = if !transcript_path.is_empty() && offset > 0 {
+                poc_memory::transcript::compaction_timestamp(transcript_path, offset)
+            } else {
+                None
+            };
+            match ts {
+                Some(t) => println!("Last compaction: offset {} ({})", offset, t),
+                None => println!("Last compaction: offset {}", offset),
+            }
        }
+        Err(_) => println!("Last compaction: none detected"),
+    }
+
+    // Pending chunks
+    let chunks_dir = state_dir.join(format!("chunks-{}", session_id));
+    let pending = fs::read_dir(&chunks_dir).ok()
+        .map(|d| d.flatten().count())
+        .unwrap_or(0);
+    if pending > 0 {
+        println!("Pending chunks: {}", pending);
    }

    // Read seen file in insertion order (append-only file)
@ -689,20 +708,46 @@ fn show_seen() {
        .filter(|s| !s.is_empty())
        .map(|s| s.to_string())
        .collect();
-    let returned_set: HashSet<_> = returned.iter().cloned().collect();
-    let pre_seeded = seen_lines.len().saturating_sub(returned.len());
-    println!("\nSeen set ({} total, {} pre-seeded):", seen_lines.len(), pre_seeded);

-    if Args::parse().seen_full {
-        for line in &seen_lines {
-            let key = parse_seen_line(line);
-            let marker = if returned_set.contains(key) { "→ " } else { "  " };
-            // Show timestamp if present, otherwise just key
-            if let Some((ts, k)) = line.split_once('\t') {
-                println!("  {} {}{}", ts, marker, k);
-            } else {
-                println!("  (no ts) {}{}", marker, line);
-            }
+    let returned = load_returned(&state_dir, session_id);
+    let returned_set: HashSet<_> = returned.iter().cloned().collect();
+
+    // Count context-loaded vs search-returned
+    let context_keys: Vec<_> = seen_lines.iter()
+        .map(|l| parse_seen_line(l).to_string())
+        .filter(|k| !returned_set.contains(k))
+        .collect();
+    let search_keys: Vec<_> = seen_lines.iter()
+        .map(|l| parse_seen_line(l).to_string())
+        .filter(|k| returned_set.contains(k))
+        .collect();
+
+    println!("\nSeen set ({} total):", seen_lines.len());
+    if !context_keys.is_empty() {
+        println!("  Context-loaded ({}):", context_keys.len());
+        for key in &context_keys {
+            println!("    {}", key);
+        }
+    }
+    if !search_keys.is_empty() {
+        println!("  Search-returned ({}):", search_keys.len());
+        for key in &search_keys {
+            println!("    {}", key);
+        }
+    }
+
+    // Show returned keys that aren't in the seen set (bug indicator)
+    let seen_key_set: HashSet<_> = seen_lines.iter()
+        .map(|l| parse_seen_line(l).to_string())
+        .collect();
+    let orphan_returned: Vec<_> = returned.iter()
+        .filter(|k| !seen_key_set.contains(k.as_str()))
+        .collect();
+    if !orphan_returned.is_empty() {
+        println!("\n  WARNING: {} returned keys not in seen set (pre-compaction?):",
+            orphan_returned.len());
+        for key in &orphan_returned {
+            println!("    {}", key);
        }
    }
 }