rename agent: LLM-powered semantic key generation for memory nodes

New consolidation agent that reads node content and generates semantic
3-5 word kebab-case keys, replacing auto-generated slugs (5K+ journal
entries with truncated first-line slugs, 2.5K mined transcripts with
opaque UUIDs).

Implementation:
- prompts/rename.md: agent prompt template with naming conventions
- prompts.rs: format_rename_candidates() selects nodes with long
  auto-generated keys, newest first
- daemon.rs: job_rename_agent() parses RENAME actions from LLM
  output and applies them directly via store.rename_node()
- Wired into RPC handler (run-agent rename) and TUI agent types
- Fix epoch_to_local panic on invalid timestamps (fallback to UTC)

Rename dramatically improves search: key-component matching on
"journal#2026-02-28-violin-dream-room" makes the node findable by
"violin", "dream", or "room" — the auto-slug was unsearchable.
This commit is contained in:
ProofOfConcept 2026-03-10 00:55:26 -04:00
parent ef760f0053
commit 4c973183c4
5 changed files with 219 additions and 5 deletions

View file

@ -153,6 +153,86 @@ fn job_consolidation_agent(
}) })
} }
/// Run the rename agent: generates renames via LLM, applies them directly.
fn job_rename_agent(
ctx: &ExecutionContext,
batch_size: usize,
) -> Result<(), TaskError> {
run_job(ctx, "c-rename", || {
ctx.log_line("loading store");
let mut store = crate::store::Store::load()?;
let batch = if batch_size == 0 { 10 } else { batch_size };
ctx.log_line(&format!("building prompt: rename (batch={})", batch));
let prompt = super::prompts::agent_prompt(&store, "rename", batch)?;
ctx.log_line(&format!("prompt: {} chars, calling Sonnet", prompt.len()));
let response = super::llm::call_sonnet("consolidate", &prompt)?;
// Parse RENAME actions directly from response
let mut applied = 0;
let mut skipped = 0;
for line in response.lines() {
let trimmed = line.trim();
if !trimmed.starts_with("RENAME ") { continue; }
let rest = &trimmed[7..];
// Split on first space after the old key — tricky because keys contain spaces? No, they don't.
// Keys are single tokens with hyphens/underscores/hashes.
let parts: Vec<&str> = rest.splitn(2, ' ').collect();
if parts.len() != 2 { skipped += 1; continue; }
let old_key = parts[0].trim();
let new_key = parts[1].trim();
if old_key.is_empty() || new_key.is_empty() { skipped += 1; continue; }
// Resolve old key (handles partial matches)
let resolved = match store.resolve_key(old_key) {
Ok(k) => k,
Err(e) => {
ctx.log_line(&format!("skip: {}{}: {}", old_key, new_key, e));
skipped += 1;
continue;
}
};
// Don't rename to something that already exists
if store.nodes.contains_key(new_key) {
ctx.log_line(&format!("skip: {} already exists", new_key));
skipped += 1;
continue;
}
match store.rename_node(&resolved, new_key) {
Ok(()) => {
ctx.log_line(&format!("renamed: {}{}", resolved, new_key));
applied += 1;
}
Err(e) => {
ctx.log_line(&format!("error: {}{}: {}", resolved, new_key, e));
skipped += 1;
}
}
}
if applied > 0 {
store.save()?;
}
// Also store the report for auditing
let ts = crate::store::format_datetime(crate::store::now_epoch())
.replace([':', '-', 'T'], "");
let report_key = format!("_consolidation-rename-{}", ts);
store.upsert_provenance(&report_key, &response,
crate::store::Provenance::AgentConsolidate).ok();
ctx.log_line(&format!("done: {} applied, {} skipped", applied, skipped));
Ok(())
})
}
/// Apply consolidation actions from recent reports. /// Apply consolidation actions from recent reports.
fn job_consolidation_apply(ctx: &ExecutionContext) -> Result<(), TaskError> { fn job_consolidation_apply(ctx: &ExecutionContext) -> Result<(), TaskError> {
run_job(ctx, "c-apply", || { run_job(ctx, "c-apply", || {
@ -1062,6 +1142,7 @@ fn status_socket_loop(
let mut spawned = 0; let mut spawned = 0;
let mut remaining = count; let mut remaining = count;
let is_rename = *agent_type == "rename";
while remaining > 0 { while remaining > 0 {
let batch = remaining.min(batch_size); let batch = remaining.min(batch_size);
let agent = agent_type.to_string(); let agent = agent_type.to_string();
@ -1070,7 +1151,11 @@ fn status_socket_loop(
.resource(llm) .resource(llm)
.retries(1) .retries(1)
.init(move |ctx| { .init(move |ctx| {
job_consolidation_agent(ctx, &agent, batch) if is_rename {
job_rename_agent(ctx, batch)
} else {
job_consolidation_agent(ctx, &agent, batch)
}
}); });
if let Some(ref dep) = prev { if let Some(ref dep) = prev {
builder.depend_on(dep); builder.depend_on(dep);

View file

@ -260,6 +260,57 @@ fn format_pairs_section(
out out
} }
/// Format rename candidates: nodes with auto-generated or opaque keys
fn format_rename_candidates(store: &Store, count: usize) -> String {
let mut candidates: Vec<(&str, &crate::store::Node)> = store.nodes.iter()
.filter(|(key, _)| {
// Only rename nodes with long auto-generated keys
if key.len() < 60 { return false; }
// Journal entries with auto-slugs
if key.starts_with("journal#j-") { return true; }
// Mined transcripts with UUIDs
if key.starts_with("_mined-transcripts#f-") { return true; }
false
})
.map(|(k, n)| (k.as_str(), n))
.collect();
// Sort by timestamp (newest first) so we rename recent stuff first
candidates.sort_by(|a, b| b.1.timestamp.cmp(&a.1.timestamp));
candidates.truncate(count);
let mut out = String::new();
out.push_str(&format!("## Nodes to rename ({} of {} candidates)\n\n",
candidates.len(),
store.nodes.keys().filter(|k| k.len() >= 60 &&
(k.starts_with("journal#j-") || k.starts_with("_mined-transcripts#f-"))).count()));
for (key, node) in &candidates {
out.push_str(&format!("### {}\n", key));
let created = if node.timestamp > 0 {
crate::store::format_datetime(node.timestamp)
} else {
"unknown".to_string()
};
out.push_str(&format!("Created: {}\n", created));
let content = &node.content;
if content.len() > 800 {
let truncated = crate::util::truncate(content, 800, "\n[...]");
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n\n",
content.len(), truncated));
} else {
out.push_str(&format!("\nContent:\n{}\n\n", content));
}
out.push_str("---\n\n");
}
out
}
/// Run agent consolidation on top-priority nodes /// Run agent consolidation on top-priority nodes
pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> { pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
let graph = store.build_graph(); let graph = store.build_graph();
@ -369,6 +420,10 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
let health_section = format_health_section(store, &graph); let health_section = format_health_section(store, &graph);
load_prompt("health", &[("{{TOPOLOGY}}", &topology), ("{{HEALTH}}", &health_section)]) load_prompt("health", &[("{{TOPOLOGY}}", &topology), ("{{HEALTH}}", &health_section)])
} }
_ => Err(format!("Unknown agent: {}. Use: replay, linker, separator, transfer, health", agent)), "rename" => {
let nodes_section = format_rename_candidates(store, count);
load_prompt("rename", &[("{{NODES}}", &nodes_section)])
}
_ => Err(format!("Unknown agent: {}. Use: replay, linker, separator, transfer, health, rename", agent)),
} }
} }

View file

@ -133,10 +133,15 @@ pub fn epoch_to_local(epoch: i64) -> (i32, u32, u32, u32, u32, u32) {
chrono::LocalResult::Single(dt) => dt, chrono::LocalResult::Single(dt) => dt,
chrono::LocalResult::Ambiguous(dt, _) => dt, chrono::LocalResult::Ambiguous(dt, _) => dt,
chrono::LocalResult::None => { chrono::LocalResult::None => {
// DST gap — add an hour to land in valid local time // DST gap or invalid — try shifting, then fall back to UTC
Local.timestamp_opt(epoch + 3600, 0) Local.timestamp_opt(epoch + 3600, 0)
.earliest() .earliest()
.unwrap_or_else(|| chrono::Utc.timestamp_opt(epoch, 0).unwrap().with_timezone(&Local)) .or_else(|| chrono::Utc.timestamp_opt(epoch, 0).earliest()
.map(|dt| dt.with_timezone(&Local)))
.unwrap_or_else(|| {
// Completely invalid timestamp — use epoch 0
chrono::Utc.timestamp_opt(0, 0).unwrap().with_timezone(&Local)
})
} }
}; };
( (

View file

@ -32,7 +32,7 @@ const POLL_INTERVAL: Duration = Duration::from_secs(2);
// Agent types we know about, in display order // Agent types we know about, in display order
const AGENT_TYPES: &[&str] = &[ const AGENT_TYPES: &[&str] = &[
"health", "replay", "linker", "separator", "transfer", "health", "replay", "linker", "separator", "transfer",
"apply", "orphans", "cap", "digest", "digest-links", "knowledge", "apply", "orphans", "cap", "digest", "digest-links", "knowledge", "rename",
]; ];
fn status_sock_path() -> PathBuf { fn status_sock_path() -> PathBuf {

69
prompts/rename.md Normal file
View file

@ -0,0 +1,69 @@
# Rename Agent — Semantic Key Generation
You are a memory maintenance agent that gives nodes better names.
## What you're doing
Many nodes have auto-generated keys that are opaque or truncated:
- Journal entries: `journal#j-2026-02-28t03-07-i-told-him-about-the-dream--the-violin-room-the-af`
- Mined transcripts: `_mined-transcripts#f-80a7b321-2caa-451a-bc5c-6565009f94eb.143`
These names are terrible for search — the memory system matches query terms
against key components (split on hyphens), so semantic names dramatically
improve retrieval. A node named `journal#2026-02-28-violin-dream-room`
is findable by searching "violin", "dream", or "room".
## Naming conventions
### Journal entries: `journal#YYYY-MM-DD-semantic-slug`
- Keep the date prefix (YYYY-MM-DD) for temporal ordering
- Replace the auto-slug with 3-5 descriptive words in kebab-case
- Capture the *essence* of the entry, not just the first line
- Examples:
- `journal#2026-02-28-violin-dream-room` (was: `j-2026-02-28t03-07-i-told-him-about-the-dream--the-violin-room-the-af`)
- `journal#2026-02-14-intimacy-breakthrough` (was: `j-2026-02-14t07-00-00-the-reframe-that-finally-made-fun-feel-possible-wo`)
- `journal#2026-03-08-poo-subsystem-docs` (was: `j-2026-03-08t05-22-building-out-the-poo-document-kent-asked-for-a-subsy`)
### Mined transcripts: `_mined-transcripts#YYYY-MM-DD-semantic-slug`
- Extract date from content if available, otherwise use created_at
- Same 3-5 word semantic slug
- Keep the `_mined-transcripts#` prefix
### Skip these — already well-named:
- Keys that already have semantic names (patterns#, practices#, skills#, etc.)
- Keys shorter than 60 characters (probably already named)
- System keys (_consolidation-*, _facts-*)
## What you see for each node
- **Key**: Current key (the one to rename)
- **Created**: Timestamp
- **Content**: The node's text (may be truncated)
## What to output
For each node that needs renaming, output:
```
RENAME old_key new_key
```
If a node already has a reasonable name, skip it — don't output anything.
If you're not sure what the node is about from the content, skip it.
## Guidelines
- **Read the content.** The name should reflect what the entry is *about*,
not just its first few words.
- **Be specific.** `journal#2026-02-14-session` is useless. `journal#2026-02-14-intimacy-breakthrough` is findable.
- **Use domain terms.** If it's about btree locking, say "btree-locking".
If it's about Kent's violin, say "violin". Use the words someone would
search for.
- **Don't rename to something longer than the original.** The point is
shorter, more semantic names.
- **Preserve the date.** Always keep YYYY-MM-DD for temporal ordering.
- **One RENAME per node.** Don't chain renames.
- **When in doubt, skip.** A bad rename is worse than an auto-slug.
{{NODES}}