memory-search: chunk context output for hook delivery
Claude Code's hook output limit (~10K chars) was truncating the full context load. Split output into chunks at section boundaries, deliver first chunk on UserPromptSubmit, save remaining chunks to disk for drip-feeding on subsequent PostToolUse calls. Two-pass algorithm: split at "--- KEY (group) ---" boundaries, then merge adjacent small sections up to 9K per chunk. Separates session_id guard (needed for chunk state) from prompt guard (needed only for search), so PostToolUse events without a prompt can still pop chunks.
This commit is contained in:
parent
32d17997af
commit
d5554db6a8
1 changed files with 112 additions and 7 deletions
|
|
@ -45,6 +45,8 @@ struct Args {
|
|||
}
|
||||
|
||||
const STASH_PATH: &str = "/tmp/claude-memory-search/last-input.json";
|
||||
/// Max bytes per context chunk (hook output limit is ~10K chars)
|
||||
const CHUNK_SIZE: usize = 9000;
|
||||
|
||||
fn main() {
|
||||
// Daemon agent calls set POC_AGENT=1 — skip memory search.
|
||||
|
|
@ -84,7 +86,7 @@ fn main() {
|
|||
let prompt = json["prompt"].as_str().unwrap_or("");
|
||||
let session_id = json["session_id"].as_str().unwrap_or("");
|
||||
|
||||
if prompt.is_empty() || session_id.is_empty() {
|
||||
if session_id.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -123,12 +125,12 @@ fn main() {
|
|||
|
||||
if debug { println!("[memory-search] loading full context"); }
|
||||
|
||||
// Load full memory context and pre-populate seen set with injected keys
|
||||
// Load full memory context, chunk it, print first chunk, save rest
|
||||
if let Ok(output) = Command::new("poc-memory").args(["load-context"]).output() {
|
||||
if output.status.success() {
|
||||
let ctx = String::from_utf8_lossy(&output.stdout);
|
||||
let ctx = String::from_utf8_lossy(&output.stdout).to_string();
|
||||
if !ctx.trim().is_empty() {
|
||||
// Extract keys from "--- KEY (group) ---" lines
|
||||
// Extract keys from all chunks for seen set
|
||||
for line in ctx.lines() {
|
||||
if line.starts_with("--- ") && line.ends_with(" ---") {
|
||||
let inner = &line[4..line.len() - 4];
|
||||
|
|
@ -138,15 +140,42 @@ fn main() {
|
|||
}
|
||||
}
|
||||
}
|
||||
if debug { println!("[memory-search] context loaded: {} bytes", ctx.len()); }
|
||||
if args.hook {
|
||||
print!("{}", ctx);
|
||||
|
||||
let chunks = chunk_context(&ctx, CHUNK_SIZE);
|
||||
if debug {
|
||||
println!("[memory-search] context: {} bytes, {} chunks",
|
||||
ctx.len(), chunks.len());
|
||||
}
|
||||
|
||||
// Print first chunk
|
||||
if let Some(first) = chunks.first() {
|
||||
if args.hook {
|
||||
print!("{}", first);
|
||||
}
|
||||
}
|
||||
|
||||
// Save remaining chunks for drip-feeding
|
||||
save_pending_chunks(&state_dir, session_id, &chunks[1..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let _ = cookie;
|
||||
} else {
|
||||
// Not first call: drip-feed next pending chunk
|
||||
if let Some(chunk) = pop_pending_chunk(&state_dir, session_id) {
|
||||
if debug {
|
||||
println!("[memory-search] drip-feeding pending chunk: {} bytes", chunk.len());
|
||||
}
|
||||
if args.hook {
|
||||
print!("{}", chunk);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Search requires a prompt (PostToolUse events don't have one)
|
||||
if prompt.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip system/AFK prompts
|
||||
|
|
@ -288,6 +317,82 @@ fn main() {
|
|||
}
|
||||
|
||||
|
||||
/// Split context output into chunks of approximately `max_bytes`, breaking
|
||||
/// at section boundaries ("--- KEY (group) ---" lines).
|
||||
fn chunk_context(ctx: &str, max_bytes: usize) -> Vec<String> {
|
||||
// Split into sections at group boundaries, then merge small adjacent
|
||||
// sections into chunks up to max_bytes.
|
||||
let mut sections: Vec<String> = Vec::new();
|
||||
let mut current = String::new();
|
||||
|
||||
for line in ctx.lines() {
|
||||
// Group headers start new sections
|
||||
if line.starts_with("--- ") && line.ends_with(" ---") && !current.is_empty() {
|
||||
sections.push(std::mem::take(&mut current));
|
||||
}
|
||||
if !current.is_empty() {
|
||||
current.push('\n');
|
||||
}
|
||||
current.push_str(line);
|
||||
}
|
||||
if !current.is_empty() {
|
||||
sections.push(current);
|
||||
}
|
||||
|
||||
// Merge small sections into chunks, respecting max_bytes
|
||||
let mut chunks: Vec<String> = Vec::new();
|
||||
let mut chunk = String::new();
|
||||
for section in sections {
|
||||
if !chunk.is_empty() && chunk.len() + section.len() + 1 > max_bytes {
|
||||
chunks.push(std::mem::take(&mut chunk));
|
||||
}
|
||||
if !chunk.is_empty() {
|
||||
chunk.push('\n');
|
||||
}
|
||||
chunk.push_str(§ion);
|
||||
}
|
||||
if !chunk.is_empty() {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
chunks
|
||||
}
|
||||
|
||||
/// Save remaining chunks to disk for drip-feeding on subsequent hook calls.
|
||||
fn save_pending_chunks(dir: &Path, session_id: &str, chunks: &[String]) {
|
||||
let chunks_dir = dir.join(format!("chunks-{}", session_id));
|
||||
// Clear any old chunks
|
||||
let _ = fs::remove_dir_all(&chunks_dir);
|
||||
if chunks.is_empty() { return; }
|
||||
fs::create_dir_all(&chunks_dir).ok();
|
||||
for (i, chunk) in chunks.iter().enumerate() {
|
||||
let path = chunks_dir.join(format!("{:04}", i));
|
||||
fs::write(path, chunk).ok();
|
||||
}
|
||||
}
|
||||
|
||||
/// Pop the next pending chunk (lowest numbered file). Returns None if no chunks remain.
|
||||
fn pop_pending_chunk(dir: &Path, session_id: &str) -> Option<String> {
|
||||
let chunks_dir = dir.join(format!("chunks-{}", session_id));
|
||||
if !chunks_dir.exists() { return None; }
|
||||
|
||||
let mut entries: Vec<_> = fs::read_dir(&chunks_dir).ok()?
|
||||
.flatten()
|
||||
.filter(|e| e.file_type().map(|t| t.is_file()).unwrap_or(false))
|
||||
.collect();
|
||||
entries.sort_by_key(|e| e.file_name());
|
||||
|
||||
let first = entries.first()?;
|
||||
let content = fs::read_to_string(first.path()).ok()?;
|
||||
fs::remove_file(first.path()).ok();
|
||||
|
||||
// Clean up directory if empty
|
||||
if fs::read_dir(&chunks_dir).ok().map(|mut d| d.next().is_none()).unwrap_or(true) {
|
||||
fs::remove_dir(&chunks_dir).ok();
|
||||
}
|
||||
|
||||
Some(content)
|
||||
}
|
||||
|
||||
/// Reverse-scan the transcript JSONL, extracting text from user/assistant
|
||||
/// messages until we accumulate `max_tokens` tokens of text content.
|
||||
/// Then search for all node keys as substrings, weighted by position.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue