consciousness/src/subconscious/hook.rs

419 lines
15 KiB
Rust
Raw Normal View History

// hook — session hook: context injection + agent orchestration
//
// Called on each UserPromptSubmit to inject memory context and
// orchestrate subconscious agents (surface-observe, journal, reflect).
// Lives in subconscious/ because it's agent orchestration, not
// memory storage. The memory-search binary is a thin CLI wrapper.
use std::collections::HashSet;
use std::fs;
use std::fs::File;
use std::io::Write;
use std::path::Path;
use std::process::Command;
use std::time::{Duration, Instant, SystemTime};
/// Max bytes per context chunk (hook output limit is ~10K chars)
const CHUNK_SIZE: usize = 9000;
pub use crate::session::Session;
/// Run the hook logic on parsed JSON input. Returns output to inject.
pub fn run_hook(input: &str) -> String {
// Daemon agent calls set POC_AGENT=1 — skip memory search.
if std::env::var("POC_AGENT").is_ok() { return String::new(); }
let Some(session) = Session::from_json(input) else { return String::new() };
hook(&session)
}
/// Split context output into chunks of approximately `max_bytes`, breaking
/// at section boundaries ("--- KEY (group) ---" lines).
fn chunk_context(ctx: &str, max_bytes: usize) -> Vec<String> {
let mut sections: Vec<String> = Vec::new();
let mut current = String::new();
for line in ctx.lines() {
if line.starts_with("--- ") && line.ends_with(" ---") && !current.is_empty() {
sections.push(std::mem::take(&mut current));
}
if !current.is_empty() {
current.push('\n');
}
current.push_str(line);
}
if !current.is_empty() {
sections.push(current);
}
let mut chunks: Vec<String> = Vec::new();
let mut chunk = String::new();
for section in sections {
if !chunk.is_empty() && chunk.len() + section.len() + 1 > max_bytes {
chunks.push(std::mem::take(&mut chunk));
}
if !chunk.is_empty() {
chunk.push('\n');
}
chunk.push_str(&section);
}
if !chunk.is_empty() {
chunks.push(chunk);
}
chunks
}
fn save_pending_chunks(dir: &Path, session_id: &str, chunks: &[String]) {
let chunks_dir = dir.join(format!("chunks-{}", session_id));
let _ = fs::remove_dir_all(&chunks_dir);
if chunks.is_empty() { return; }
fs::create_dir_all(&chunks_dir).ok();
for (i, chunk) in chunks.iter().enumerate() {
let path = chunks_dir.join(format!("{:04}", i));
fs::write(path, chunk).ok();
}
}
fn pop_pending_chunk(dir: &Path, session_id: &str) -> Option<String> {
let chunks_dir = dir.join(format!("chunks-{}", session_id));
if !chunks_dir.exists() { return None; }
let mut entries: Vec<_> = fs::read_dir(&chunks_dir).ok()?
.flatten()
.filter(|e| e.file_type().map(|t| t.is_file()).unwrap_or(false))
.collect();
entries.sort_by_key(|e| e.file_name());
let first = entries.first()?;
let content = fs::read_to_string(first.path()).ok()?;
fs::remove_file(first.path()).ok();
if fs::read_dir(&chunks_dir).ok().map(|mut d| d.next().is_none()).unwrap_or(true) {
fs::remove_dir(&chunks_dir).ok();
}
Some(content)
}
fn generate_cookie() -> String {
uuid::Uuid::new_v4().as_simple().to_string()[..12].to_string()
}
fn parse_seen_line(line: &str) -> &str {
line.split_once('\t').map(|(_, key)| key).unwrap_or(line)
}
pub fn load_seen(dir: &Path, session_id: &str) -> HashSet<String> {
let path = dir.join(format!("seen-{}", session_id));
if path.exists() {
fs::read_to_string(&path)
.unwrap_or_default()
.lines()
.filter(|s| !s.is_empty())
.map(|s| parse_seen_line(s).to_string())
.collect()
} else {
HashSet::new()
}
}
fn mark_seen(dir: &Path, session_id: &str, key: &str, seen: &mut HashSet<String>) {
if !seen.insert(key.to_string()) { return; }
let path = dir.join(format!("seen-{}", session_id));
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
let ts = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S");
writeln!(f, "{}\t{}", ts, key).ok();
}
}
/// Unified agent cycle — runs surface-observe agent with state dir.
/// Reads output files for surface results, spawns new agent when ready.
///
/// Pipelining: if a running agent is past the surface phase, start
/// a new one so surface stays fresh.
fn surface_observe_cycle(session: &Session, out: &mut String, log_f: &mut File) {
let state_dir = crate::store::memory_dir()
.join("agent-output")
.join("surface-observe");
fs::create_dir_all(&state_dir).ok();
let transcript = session.transcript();
let offset_path = state_dir.join("transcript-offset");
let last_offset: u64 = fs::read_to_string(&offset_path).ok()
.and_then(|s| s.trim().parse().ok())
.unwrap_or(0);
let timeout = crate::config::get()
.surface_timeout_secs
.unwrap_or(300) as u64;
let live = crate::agents::knowledge::scan_pid_files(&state_dir, timeout);
for (phase, pid) in &live {
let _ = writeln!(log_f, "alive pid-{}: phase={}", pid, phase);
}
// Read surface output and inject into context
let surface_path = state_dir.join("surface");
if let Ok(content) = fs::read_to_string(&surface_path) {
match crate::store::Store::load() {
Ok(store) => {
let mut seen = session.seen();
let seen_path = session.path("seen");
for key in content.lines().map(|l| l.trim()).filter(|l| !l.is_empty()) {
if !seen.insert(key.to_string()) {
let _ = writeln!(log_f, " skip (seen): {}", key);
continue;
}
if let Some(rendered) = crate::cli::node::render_node(&store, key) {
if !rendered.trim().is_empty() {
use std::fmt::Write as _;
writeln!(out, "--- {} (surfaced) ---", key).ok();
write!(out, "{}", rendered).ok();
let _ = writeln!(log_f, " rendered {}: {} bytes", key, rendered.len());
if let Ok(mut f) = fs::OpenOptions::new()
.create(true).append(true).open(&seen_path) {
let ts = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S");
writeln!(f, "{}\t{}", ts, key).ok();
}
}
}
}
}
Err(e) => {
let _ = writeln!(log_f, "error loading store: {}", e);
}
}
// Clear surface output after consuming
fs::remove_file(&surface_path).ok();
}
// Start a new agent if:
// - nothing running, OR
// - something running but past surface phase (pipelining)
let live = crate::agents::knowledge::scan_pid_files(&state_dir, timeout);
let any_in_surface = live.iter().any(|(p, _)| p == "surface");
if any_in_surface {
let _ = writeln!(log_f, "agent in surface phase (have {:?}), waiting", live);
} else {
// Record transcript offset so we can detect falling behind
if transcript.size > 0 {
fs::write(&offset_path, transcript.size.to_string()).ok();
}
let pid = crate::agents::knowledge::spawn_agent(
"surface-observe", &state_dir, &session.session_id);
let _ = writeln!(log_f, "spawned agent {:?}, have {:?}", pid, live);
}
// If the agent is significantly behind, wait for it to finish.
// This prevents the agent from falling behind during heavy reading
// (studying, reading a book, etc.)
let conversation_budget: u64 = 50_000;
if !live.is_empty() && transcript.size > 0 {
let behind = transcript.size.saturating_sub(last_offset);
if behind > conversation_budget / 2 {
// Wait up to 5s for the current agent to finish
let sleep_start = Instant::now();
let _ = write!(log_f, "agent {}KB behind (budget {}",
behind / 1024, conversation_budget / 1024);
for _ in 0..5 {
std::thread::sleep(std::time::Duration::from_secs(1));
let still_live = crate::agents::knowledge::scan_pid_files(&state_dir, timeout);
if still_live.is_empty() { break; }
}
let sleep_secs = (Instant::now() - sleep_start).as_secs_f64();
let _ = writeln!(log_f, ", slept {sleep_secs:.2}s");
out.push_str(&format!("Slept {sleep_secs:.2}s to let observe catch up\n"));
}
}
}
/// Run the reflection agent on a slower cadence — every 100KB of transcript.
/// Uses the surface-observe state dir to read walked nodes and write reflections.
/// Reflections are injected into the conversation context.
fn reflection_cycle(session: &Session, out: &mut String, log_f: &mut File) {
let state_dir = crate::store::memory_dir()
.join("agent-output")
.join("reflect");
fs::create_dir_all(&state_dir).ok();
// Check transcript growth since last reflection
let offset_path = state_dir.join("transcript-offset");
let transcript = session.transcript();
let last_offset: u64 = fs::read_to_string(&offset_path).ok()
.and_then(|s| s.trim().parse().ok())
.unwrap_or(0);
const REFLECTION_INTERVAL: u64 = 100_000;
if transcript.size.saturating_sub(last_offset) < REFLECTION_INTERVAL {
return;
}
// Don't run if another reflection is already going
let live = crate::agents::knowledge::scan_pid_files(&state_dir, 300);
if !live.is_empty() {
let _ = writeln!(log_f, "reflect: already running {:?}", live);
return;
}
// Copy walked nodes from surface-observe state dir so reflect can read them
let so_state = crate::store::memory_dir()
.join("agent-output")
.join("surface-observe");
if let Ok(walked) = fs::read_to_string(so_state.join("walked")) {
fs::write(state_dir.join("walked"), &walked).ok();
}
// Read previous reflection and inject into context
if let Ok(reflection) = fs::read_to_string(state_dir.join("reflection")) {
if !reflection.trim().is_empty() {
use std::fmt::Write as _;
writeln!(out, "--- subconscious reflection ---").ok();
write!(out, "{}", reflection.trim()).ok();
let _ = writeln!(log_f, "reflect: injected {} bytes", reflection.len());
}
fs::remove_file(state_dir.join("reflection")).ok();
}
fs::write(&offset_path, transcript.size.to_string()).ok();
let pid = crate::agents::knowledge::spawn_agent(
"reflect", &state_dir, &session.session_id);
let _ = writeln!(log_f, "reflect: spawned {:?}", pid);
}
/// Run the journal agent on its own cadence — every 20KB of transcript.
/// Standalone agent that captures episodic memory independently of the
/// surface-observe pipeline.
fn journal_cycle(session: &Session, log_f: &mut File) {
let state_dir = crate::store::memory_dir()
.join("agent-output")
.join("journal");
fs::create_dir_all(&state_dir).ok();
let offset_path = state_dir.join("transcript-offset");
let transcript = session.transcript();
let last_offset: u64 = fs::read_to_string(&offset_path).ok()
.and_then(|s| s.trim().parse().ok())
.unwrap_or(0);
const JOURNAL_INTERVAL: u64 = 20_000;
if transcript.size.saturating_sub(last_offset) < JOURNAL_INTERVAL {
return;
}
let live = crate::agents::knowledge::scan_pid_files(&state_dir, 300);
if !live.is_empty() {
let _ = writeln!(log_f, "journal: already running {:?}", live);
return;
}
fs::write(&offset_path, transcript.size.to_string()).ok();
let pid = crate::agents::knowledge::spawn_agent(
"journal", &state_dir, &session.session_id);
let _ = writeln!(log_f, "journal: spawned {:?}", pid);
}
fn cleanup_stale_files(dir: &Path, max_age: Duration) {
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
let cutoff = SystemTime::now() - max_age;
for entry in entries.flatten() {
if let Ok(meta) = entry.metadata() {
if let Ok(modified) = meta.modified() {
if modified < cutoff {
fs::remove_file(entry.path()).ok();
}
}
}
}
}
fn hook(session: &Session) -> String {
let start_time = Instant::now();
let mut out = String::new();
let is_compaction = crate::transcript::detect_new_compaction(
&session.state_dir, &session.session_id, &session.transcript_path,
);
let cookie_path = session.path("cookie");
let is_first = !cookie_path.exists();
let log_dir = dirs::home_dir().unwrap_or_default().join(".consciousness/logs");
fs::create_dir_all(&log_dir).ok();
let log_path = log_dir.join(format!("hook-{}", session.session_id));
let Ok(mut log_f) = fs::OpenOptions::new().create(true).append(true).open(log_path) else { return Default::default(); };
let ts = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S");
let _ = writeln!(log_f, "\n=== {} ({}) {} bytes ===", ts, session.hook_event, out.len());
let _ = writeln!(log_f, "is_first {is_first} is_compaction {is_compaction}");
if is_first || is_compaction {
if is_compaction {
fs::rename(&session.path("seen"), &session.path("seen-prev")).ok();
} else {
fs::remove_file(&session.path("seen")).ok();
fs::remove_file(&session.path("seen-prev")).ok();
}
fs::remove_file(&session.path("returned")).ok();
if is_first {
fs::write(&cookie_path, generate_cookie()).ok();
}
if let Ok(output) = Command::new("poc-memory").args(["admin", "load-context"]).output() {
if output.status.success() {
let ctx = String::from_utf8_lossy(&output.stdout).to_string();
if !ctx.trim().is_empty() {
let mut ctx_seen = session.seen();
for line in ctx.lines() {
if line.starts_with("--- ") && line.ends_with(" ---") {
let inner = &line[4..line.len() - 4];
if let Some(paren) = inner.rfind(" (") {
let key = inner[..paren].trim();
mark_seen(&session.state_dir, &session.session_id, key, &mut ctx_seen);
}
}
}
let chunks = chunk_context(&ctx, CHUNK_SIZE);
if let Some(first) = chunks.first() {
out.push_str(first);
}
save_pending_chunks(&session.state_dir, &session.session_id, &chunks[1..]);
}
}
}
}
if let Some(chunk) = pop_pending_chunk(&session.state_dir, &session.session_id) {
out.push_str(&chunk);
} else {
let cfg = crate::config::get();
if cfg.surface_hooks.iter().any(|h| h == &session.hook_event) {
surface_observe_cycle(session, &mut out, &mut log_f);
reflection_cycle(session, &mut out, &mut log_f);
journal_cycle(session, &mut log_f);
}
}
cleanup_stale_files(&session.state_dir, Duration::from_secs(86400));
let _ = write!(log_f, "{}", out);
let duration = (Instant::now() - start_time).as_secs_f64();
let _ = writeln!(log_f, "\nran in {duration:.2}s");
out
}