organize: fix # quoting, protect journal entries

Keys containing # are now pre-quoted in all cluster output (similarity
scores, hub analysis, node headers) so the agent copies them correctly
into bash commands. Prompt strengthened with CRITICAL warning about #
being a shell comment character.

Journal entries included in clusters but identified by node_type
(EpisodicSession) rather than key prefix, and tagged [JOURNAL — no
delete] in the output. Prompt rule 3b tells agent to LINK/REFINE
journals but never DELETE them. Digest nodes (daily/weekly/monthly)
still excluded entirely from clusters.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-03-13 21:37:21 -04:00
parent 4cacfa7599
commit f063eb01f0
2 changed files with 44 additions and 21 deletions

View file

@ -10,16 +10,22 @@ Your job: read every node, then decide what to do with each pair.
## Your tools
```bash
# Find related clusters by search term
poc-memory graph organize TERM --key-only
# Read a node's full content
poc-memory render KEY
# Read a node's full content (ALWAYS single-quote keys with #)
poc-memory render 'identity#core'
poc-memory render simple-key
# Check a node's graph connections
poc-memory query "key = 'KEY'" | connectivity
poc-memory query "key = 'identity#core'" | connectivity
# Find related clusters by search term
poc-memory graph organize TERM --key-only
```
**CRITICAL: Keys containing `#` MUST be wrapped in single quotes in ALL
bash commands.** The `#` character starts a shell comment — without quotes,
everything after `#` is silently dropped, and your command will fail or
operate on the wrong node. Keys are shown pre-quoted in the cluster data below.
## The three decisions
For each high-similarity pair (>0.7), read both nodes fully, then pick ONE:
@ -61,12 +67,13 @@ LINK key1 key2
contain everything valuable from the deleted node. Diff them mentally.
3. **One concept, one node.** If two nodes have the same one-sentence
description, merge them.
3b. **Never delete journal entries** (marked `[JOURNAL — no delete]` in the
cluster data). They are the raw record. You may LINK and REFINE them,
but never DELETE.
4. **Work systematically.** Go through every pair above 0.7 similarity.
For pairs 0.4-0.7, check if they should be linked.
5. **Use your tools.** If the pre-computed cluster misses something,
search for it. Render nodes you're unsure about.
6. **Keys with `#` need quoting.** Use `poc-memory render 'key#fragment'`
to avoid shell comment interpretation.
## Cluster data

View file

@ -172,15 +172,22 @@ fn resolve(
return Some(Resolved { text: "(no term provided)".into(), keys: vec![] });
}
let term_lower = term.to_lowercase();
let skip_prefixes = ["journal#", "daily-", "weekly-", "monthly-", "_",
"deep-index#", "facts-", "irc-history#"];
use crate::store::NodeType;
let mut cluster: Vec<(String, String)> = Vec::new();
let mut cluster: Vec<(String, String, bool)> = Vec::new(); // (key, content, is_journal)
for (key, node) in &store.nodes {
if node.deleted { continue; }
if !key.to_lowercase().contains(&term_lower) { continue; }
if skip_prefixes.iter().any(|p| key.starts_with(p)) { continue; }
cluster.push((key.clone(), node.content.clone()));
// Skip digest/system nodes entirely
match node.node_type {
NodeType::EpisodicDaily | NodeType::EpisodicWeekly
| NodeType::EpisodicMonthly => continue,
_ => {}
}
// Skip internal prefixes
if key.starts_with('_') { continue; }
let is_journal = node.node_type == NodeType::EpisodicSession;
cluster.push((key.clone(), node.content.clone(), is_journal));
}
cluster.sort_by(|a, b| a.0.cmp(&b.0));
@ -189,8 +196,15 @@ fn resolve(
cluster.truncate(20);
}
// Similarity pairs
let pairs = crate::similarity::pairwise_similar(&cluster, 0.4);
// Similarity pairs (need (key, content) tuples)
let pair_input: Vec<(String, String)> = cluster.iter()
.map(|(k, c, _)| (k.clone(), c.clone())).collect();
let pairs = crate::similarity::pairwise_similar(&pair_input, 0.4);
// Helper: shell-quote keys containing #
let sq = |k: &str| -> String {
if k.contains('#') { format!("'{}'", k) } else { k.to_string() }
};
let mut text = format!("### Cluster: '{}' ({} nodes)\n\n", term, cluster.len());
@ -198,14 +212,14 @@ fn resolve(
if !pairs.is_empty() {
text.push_str("#### Similarity scores\n\n");
for (a, b, sim) in &pairs {
text.push_str(&format!(" [{:.3}] {}{}\n", sim, a, b));
text.push_str(&format!(" [{:.3}] {}{}\n", sim, sq(a), sq(b)));
}
text.push('\n');
}
// Connectivity
let cluster_keys: std::collections::HashSet<&str> = cluster.iter()
.map(|(k,_)| k.as_str()).collect();
.map(|(k,_,_)| k.as_str()).collect();
let mut best_hub: Option<(&str, usize)> = None;
for key in &cluster_keys {
let intra = graph.neighbor_keys(key).iter()
@ -216,12 +230,12 @@ fn resolve(
}
}
if let Some((hub, deg)) = best_hub {
text.push_str(&format!("#### Hub: {} (intra-cluster degree {})\n\n", hub, deg));
text.push_str(&format!("#### Hub: {} (intra-cluster degree {})\n\n", sq(hub), deg));
let hub_nbrs = graph.neighbor_keys(hub);
for key in &cluster_keys {
if *key == hub { continue; }
if !hub_nbrs.contains(*key) {
text.push_str(&format!(" NOT linked to hub: {}\n", key));
text.push_str(&format!(" NOT linked to hub: {}\n", sq(key)));
}
}
text.push('\n');
@ -230,9 +244,11 @@ fn resolve(
// Full node contents
text.push_str("#### Node contents\n\n");
let mut result_keys = Vec::new();
for (key, content) in &cluster {
for (key, content, is_journal) in &cluster {
let words = content.split_whitespace().count();
text.push_str(&format!("##### {} ({} words)\n\n{}\n\n---\n\n", key, words, content));
let tag = if *is_journal { " [JOURNAL — no delete]" } else { "" };
text.push_str(&format!("##### {}{} ({} words)\n\n{}\n\n---\n\n",
sq(key), tag, words, content));
result_keys.push(key.clone());
}