Compare commits

..

1 commit

Author SHA1 Message Date
ProofOfConcept
7199c89518 agents: model dispatch from .agent file, add generalize agent WIP
Make call_model pub(crate) so run_one_agent reads the model from the
.agent definition instead of hardcoding sonnet. Naming agent upgraded
from haiku to sonnet.

Add generalize agent: finds the largest prefix-grouped cluster of
nodes that hasn't been visited recently, wired into the agent cycle
between extractor and connector at depth 3. New "clusters" resolver
in defs.rs does prefix-based grouping with provenance filtering.
2026-03-11 16:57:14 -04:00
272 changed files with 21731 additions and 39931 deletions

View file

@ -1,2 +1,2 @@
[build]
rustflags = ["-Cforce-frame-pointers=yes", "-Ccodegen-units=6", "--cfg", "tokio_unstable"]
rustflags = ["-Cforce-frame-pointers=yes"]

3275
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,104 +1,10 @@
[workspace]
members = ["channels/irc", "channels/telegram", "channels/tmux", "channels/socat"]
members = ["poc-memory", "poc-daemon"]
resolver = "2"
[workspace.package]
version = "0.4.0"
edition = "2024"
edition = "2021"
[profile.release]
opt-level = 2
debug = 1
[profile.release.package."*"]
debug = false
[package]
name = "consciousness"
version.workspace = true
edition.workspace = true
[dependencies]
anyhow = "1"
crossterm = { version = "0.29", features = ["event-stream", "bracketed-paste", "osc52"] }
clap = { version = "4", features = ["derive"] }
figment = { version = "0.10", features = ["env"] }
dirs = "6"
env_logger = "0.11"
log = "0.4"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
json5 = "1.3"
ratatui = { version = "0.30", features = ["unstable-rendered-line-info"] }
tui-markdown = { git = "https://github.com/koverstreet/tui-markdown", subdirectory = "tui-markdown" }
tui-textarea = { version = "0.10.2", package = "tui-textarea-2" }
textwrap = "0.16"
uuid = { version = "1", features = ["v4"] }
regex = "1"
glob = "0.3"
chrono = { version = "0.4", features = ["serde"] }
libc = "0.2"
memchr = "2"
memmap2 = "0.9"
peg = "0.8"
paste = "1"
ast-grep-core = "0.42"
ast-grep-language = { version = "0.42", features = ["builtin-parser"] }
walkdir = "2"
redb = "4"
tempfile = "3"
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
futures = "0.3"
capnp = "0.25"
capnp-rpc = "0.25"
tokenizers = "0.22"
http = "1"
hyper = { version = "1", features = ["client", "http1"] }
hyper-util = { version = "0.1", features = ["tokio"], default-features = false }
http-body-util = "0.1"
bytes = "1"
base64 = "0.22"
rustls = "0.23"
tokio-rustls = "0.26"
rustls-native-certs = "0.8"
serde_urlencoded = "0.7"
[build-dependencies]
capnpc = "0.25"
[lib]
name = "consciousness"
path = "src/lib.rs"
[[bin]]
name = "consciousness"
path = "src/bin/consciousness.rs"
[[bin]]
name = "poc-memory"
path = "src/main.rs"
[[bin]]
name = "merge-logs"
path = "src/bin/merge-logs.rs"
[[bin]]
name = "diag-key"
path = "src/bin/diag-key.rs"
[[bin]]
name = "find-deleted"
path = "src/bin/find-deleted.rs"
[[bin]]
name = "dump-table"
path = "src/bin/dump-table.rs"

View file

@ -1,10 +0,0 @@
.PHONY: install build
build:
cargo build --workspace
install:
cargo install --path .
cargo install --path channels/irc
cargo install --path channels/telegram
cargo install --path channels/tmux

347
README.md
View file

@ -1,313 +1,92 @@
Authors: Kent Overstreet, Proof of Concept
# poc-memory
# consciousness
A persistent memory and notification system for AI assistants,
modelled after the human hippocampus. Combines episodic memory
(timestamped journal of experiences) with an associative knowledge
graph (weighted nodes connected by typed relations), and layered
background processes that maintain graph health — mirroring how
biological memory consolidates during rest.
This project is multiple things:
## Components
- For the user: a "claude code" style tool, where a user can interact with an
LLM with the usual set of tools available, including LSP and external MCP
tools, and additionally channels.
| Component | What it does | Docs |
|-----------|-------------|------|
| **Memory store** | Knowledge graph with episodic journal, TF-IDF search, spectral embedding, weight decay | [docs/memory.md](docs/memory.md) |
| **Memory daemon** | Background pipeline: experience-mine, fact-mine, consolidation | [docs/daemon.md](docs/daemon.md) |
| **Notification daemon** | Activity-aware message routing from IRC and Telegram | [docs/notifications.md](docs/notifications.md) |
| **Hooks** | Claude Code integration: memory recall and notification delivery | [docs/hooks.md](docs/hooks.md) |
- For the AI: persistent memory, background cognition, autonomous function, and
autonomous learning capabilities - learning from experience.
## Getting started
The system has three cognitive layers — conscious (conversation), subconscious
(background agents that surface memories and reflect), and unconscious (graph
maintenance) — loosely modelled on how biological memory works. Channels -
sensory inputs - map to the thalamus, as focus/sensory gating must be managed
to effectively function in such an environment.
Notes, requirements: Currently only Qwen 3.5 is supported, as 27b is what we've
been running against; supporting other models would require re-adding support
for generic chat completions, tool call parsing etc. in src/agent/context.rs.
Development has been done with vllm for the backend, with additional patches
for calculating logits on subsections of large messages (without this vllm will
attempt to allocate a 40GB tensor and OOM), and a wrapper for hooking in Apollo
for fine tuning the same model that inference is running on in GPU memory.
## Architectural innovations:
Memory is both episodic and associative, represented as a weighted graph, where
both the nodes and the edges have weights. Edge weights represent how closely
concepts are related, node weight represents how "useful" a memory has been.
Episodic memory is a subset of memory nodes where the node type represents the
granularity in time of those nodes (event, daily digest, weekly, monthly),
allowing episodic memory to be navigated as a tree; these nodes are also linked
by concept with the rest of the graph as background agents discover
connections.
The context window is no longer a linear stream; it is managed intelligently as
an AST that, in particular, distinguishes recalled memories from other types of
nodes. This is key to effective function of both the hippocampus and
learning/training; by tracking memories in the context window we can track
which memories were useful and should be incorporated via finetuning.
Intelligently tracking the contents of the context window, combined with
effective episodic and associative memory, also eliminates the need for
traditional compaction - the mind running on this code will have real
continuity.
Learning is driven by recalled memories that inform future actions; memories
are not simply dry factual accountings, they include patterns that have been
noticed, new concepts that have been discovered, and especially observations on
the AI's own behaviour; it is worth noting that memories do not have to contain
a thorough understanding of a situation, merely providing past context is
enough to allow an intelligent system to choose a different course of action.
The core of is a tight loop of agents that follow conscious thought (forking
off the main context window, to share KV cache), seeking out relevant memory
nodes to surface and integrating new experiences into the memory graph; this
provides a powerful implementation of what is known colloquially as "in context
learning".
On top of that, logit calculations allow us to ask a model "would you have done
something different with this memory removed from the context window?" - this
allows us to test if memories were useful, or if specific responses were
informed by memories (and thus should be fine tuned, integrating those memories
into the model).
It is expected that this architecture will be capable of human level, or nearly
human level learning, and additional elaborations and optimizations are planned.
## Status
- UI, programming tools: minor glitchiness in the UI remaining but largely
complete
- Memory functions: working well, although debugging and finetuning will be
ongoing. Most of the recent work has been integrating them into the main UI
for easier troubleshooting, optimization and analysis
- Architecture: the transition from claude code hooks to a standalone binary is
largely complete, with some work remaining to give the old poc-memory
standalone commands an integrated REPL, which will aid in analysis of the
health of the memory graph.
- Memory and response scoring (via requesting logit calculations from the
model) is implemented, but not fully hooked up. Always-on background
finetuning has had all the individual components tested and proven, but is
not quite hooked up.
- Effective autonomous function requires functions analagous to the thalamus
and default mode network (in addition to a well functioning memory system;
"did I already do this and what was the outcome?") - these are still only
sketched out.
## Quick start
### Install
```bash
cargo install --path .
```
Create a config file at `~/.consciousness/config.json5` (see
[Configuration](#configuration) below), then:
This builds four binaries:
- `poc-memory` — memory store CLI (search, journal, consolidation)
- `memory-search` — Claude Code hook for memory recall
- `poc-daemon` — notification daemon (IRC, Telegram, idle tracking)
- `poc-hook` — Claude Code hook for session lifecycle events
### Initialize
```bash
consciousness
poc-memory init
```
## The TUI
Creates the store at `~/.claude/memory/nodes.capnp` and a default
config at `~/.config/poc-memory/config.jsonl`. Edit the config to
set your name, configure context groups, and point at your projects
directory.
Five screens, switched with F-keys:
### Set up hooks
| Key | Screen | What it shows |
|-----|--------|---------------|
| F1 | **interact** | Main view: conversation, autonomous output, tools, input |
| F2 | **conscious** | Context window browser — token counts, tree navigation |
| F3 | **subconscious** | Background agent status — outputs, fork points |
| F4 | **hippocampus** | Memory graph health — clustering, small-world metrics |
| F5 | **thalamus** | Presence state, sampling parameters, channel status |
Add to `~/.claude/settings.json` (see [docs/hooks.md](docs/hooks.md)
for full details):
### F1: interact
Three panes (left: autonomous, center: conversation, right: tools) with
a text input at the bottom and a status bar.
**Mouse:**
- Click a pane to focus it
- Click+drag to select text (copies to clipboard automatically via OSC 52)
- Middle-click to paste from tmux buffer
- Scroll wheel to scroll
**Keys:**
- `Enter` — submit input
- `Esc` — interrupt current turn
- `Tab` — cycle pane focus
- `Ctrl+Up/Down` — scroll active pane
- `PgUp/PgDn` — scroll active pane (10 lines)
- `Up/Down` — input history
### Slash commands
| Command | Description |
|---------|-------------|
| `/model [name]` | Show current model or switch (`/model 27b`) |
| `/dmn` | Show DMN state and turn counts |
| `/wake` | Wake DMN to foraging mode |
| `/sleep` | Put DMN to resting |
| `/pause` | Full stop — no autonomous activity |
| `/new` | Start fresh session |
| `/save` | Save session to disk |
| `/score` | Run memory importance scoring |
| `/quit` | Exit |
| `/help` | Show all commands |
## Configuration
`~/.consciousness/config.json5`:
```json5
```json
{
your_host: {
api_key: "...",
base_url: "http://localhost:8000/v1", // vLLM endpoint
},
// Named models — switch with /model
models: {
"27b": {
backend: "your_host",
model_id: "Qwen/Qwen3.5-27B",
prompt_file: "POC.md", // system prompt file
context_window: 262144,
},
},
default_model: "27b",
// Memory system
memory: {
user_name: "YourName",
assistant_name: "AssistantName",
journal_days: 7,
journal_max: 5,
// Context loaded at session start
context_groups: [
{ label: "identity", keys: ["identity.md"], source: "file" },
{ label: "toolkit", keys: ["stuck-toolkit", "cognitive-modes"] },
],
core_nodes: ["identity"],
},
// DMN autonomous turn limit per cycle
dmn: { max_turns: 20 },
// Context compaction thresholds (% of context window)
compaction: {
hard_threshold_pct: 90,
soft_threshold_pct: 80,
},
// Language servers for code intelligence tools
lsp_servers: [
{ name: "rust", command: "rust-analyzer", args: [] },
],
"hooks": {
"UserPromptSubmit": [{"hooks": [
{"type": "command", "command": "memory-search", "timeout": 10},
{"type": "command", "command": "poc-hook", "timeout": 5}
]}],
"Stop": [{"hooks": [
{"type": "command", "command": "poc-hook", "timeout": 5}
]}]
}
}
```
### Context groups
This gives your AI assistant persistent memory across sessions —
relevant memories are recalled on each prompt, and experiences are
extracted from transcripts after sessions end.
Context groups define what gets loaded into the context window at session start.
Each group has:
- `label` — display name
- `keys` — list of memory node keys or file paths
- `source``"store"` (memory graph, default), `"file"` (identity dir), or `"journal"`
- `agent` — if `true`, subconscious agents can see this group (default: true)
## Architecture
### Cognitive layers
**Conscious** — the main conversation loop. User types, model responds, tools
execute. The context window is an AST of typed nodes (content, thinking, tool
calls, tool results, memories, DMN reflections).
**Subconscious** — background agents that run on forked copies of the context.
They surface relevant memories, reflect on the conversation, and provide
attentional nudges. Agents are defined as `.agent` files and can be toggled
on the F3 screen.
**Unconscious** — graph maintenance. Linker, organizer, distiller, separator,
and splitter agents that keep the memory graph healthy. Run on their own
schedule, visible on F4.
### DMN (Default Mode Network)
The DMN state machine controls autonomous behavior:
- **Engaged** — user recently active, short intervals (5s)
- **Working** — model executing tools, short intervals (3s)
- **Foraging** — exploring memory, longer intervals (30s)
- **Resting** — idle, long intervals (5min)
- **Paused** — fully stopped, only user input wakes it
- **Off** — permanently off (config flag)
Transitions happen automatically based on user activity, tool use, and
explicit `yield_to_user` calls from the model.
### Tools
The model has access to:
| Tool | Description |
|------|-------------|
| `bash` | Shell command execution |
| `read_file` | Read file contents |
| `write_file` | Create/overwrite files |
| `edit_file` | Search-and-replace editing |
| `glob` | Find files by pattern |
| `grep` | Search file contents |
| `ast_grep` | Structural code search |
| `lsp_*` | Code intelligence (hover, definition, references, symbols) |
| `web_fetch` | Fetch URL contents |
| `web_search` | Web search |
| `view_image` | View images or tmux pane screenshots |
| `memory_*` | Memory graph operations (search, write, render, etc.) |
| `channel_*` | IRC/Telegram messaging |
| `journal` | Write to episodic journal |
| `yield_to_user` | End the current turn and wait for input |
| `pause` | Stop all autonomous behavior |
| `switch_model` | Switch to a different model |
### Memory graph
The knowledge graph uses an append-only log (Cap'n Proto) with:
- **Nodes** — typed content (topic, episodic, fact, etc.) with weights
- **Edges** — weighted relations between nodes
- **Search** — BM25 with Porter stemming
- **Scoring** — LLM-based importance scoring with spaced repetition decay
- **Community detection** — label propagation for graph organization
The `poc-memory` CLI provides direct access to the graph:
### Start the background daemon
```bash
poc-memory search "some topic" # Search
poc-memory render <key> # Read a node
poc-memory write <key> # Write from stdin
poc-memory journal write "entry" # Journal entry
poc-memory status # Graph overview
poc-memory query "topic:*" # Query language
poc-memory daemon
```
## Other binaries
The daemon watches for completed session transcripts and
automatically extracts experiences and facts into the knowledge
graph. See [docs/daemon.md](docs/daemon.md) for pipeline details
and diagnostics.
| Binary | Purpose |
|--------|---------|
| `poc-memory` | Memory graph CLI |
| `memory-search` | Claude Code hook — memory recall on each prompt |
| `poc-hook` | Claude Code hook — session lifecycle events |
| `poc-daemon` | Legacy background daemon (mostly replaced by `consciousness`) |
| `consciousness-mcp` | MCP server exposing memory tools over JSON-RPC |
| `merge-logs` | Recovery tool for log files |
| `diag-key` | Diagnostic tool for inspecting log entries |
### Basic usage
## Requirements
```bash
poc-memory journal-write "learned that X does Y" # Write to journal
poc-memory search "some topic" # Search the graph
poc-memory status # Store overview
```
- Rust nightly (for some features)
- A tokenizer file at `~/.consciousness/tokenizer-qwen35.json` (for local models)
- tmux (recommended — clipboard integration uses tmux buffers)
- Terminal with OSC 52 support (for clipboard copy)
## For AI assistants
- **Search before creating**: `poc-memory search` before writing new nodes
- **Close the feedback loop**: `poc-memory used KEY` / `poc-memory wrong KEY`
- **Journal is the river, topic nodes are the delta**: write experiences to the journal, pull themes into topic nodes during consolidation
- **Notifications flow automatically**: IRC/Telegram messages arrive as additionalContext
- **Use daemon commands directly**: `poc-daemon irc send #channel msg`, `poc-daemon telegram send msg`

View file

@ -1,16 +0,0 @@
fn main() {
capnpc::CompilerCommand::new()
.file("schema/memory.capnp")
.run()
.expect("capnp compile failed (memory.capnp)");
capnpc::CompilerCommand::new()
.file("schema/daemon.capnp")
.run()
.expect("capnp compile failed (daemon.capnp)");
capnpc::CompilerCommand::new()
.file("schema/channel.capnp")
.run()
.expect("capnp compile failed (channel.capnp)");
}

View file

@ -1,20 +0,0 @@
[package]
name = "consciousness-channel-irc"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.25"
capnp-rpc = "0.25"
dirs = "6"
futures = "0.3"
json5 = "1.3"
consciousness = { path = "../.." }
rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] }
serde = { version = "1", features = ["derive"] }
tokio = { version = "1", features = ["full"] }
tokio-rustls = "0.26"
tokio-util = { version = "0.7", features = ["compat"] }
log = "0.4"
env_logger = "0.11"
webpki-roots = "1"

View file

@ -1,706 +0,0 @@
// channel-irc — Standalone IRC channel daemon
//
// Maintains a persistent TLS connection to an IRC server, parses
// incoming messages, and serves them over the channel.capnp protocol
// on a Unix socket at ~/.consciousness/channels/irc.sock.
//
// Runs independently of the consciousness binary so restarts don't
// kill the IRC connection. Reconnects automatically with exponential
// backoff. Supports multiple simultaneous capnp clients.
//
// Config: ~/.consciousness/channels/irc.json5
// Socket: ~/.consciousness/channels/irc.sock
use std::cell::RefCell;
use std::io;
use std::path::PathBuf;
use std::rc::Rc;
use std::sync::Arc;
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
use futures::AsyncReadExt;
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
use tokio::net::UnixListener;
use tokio_util::compat::TokioAsyncReadCompatExt;
use log::{info, warn, error};
use consciousness::channel_capnp::{channel_client, channel_server};
use consciousness::thalamus::channel_log;
// ── Constants ──────────────────────────────────────────────────
const RECONNECT_BASE_SECS: u64 = 5;
const RECONNECT_MAX_SECS: u64 = 300;
const PING_INTERVAL_SECS: u64 = 120;
const PING_TIMEOUT_SECS: u64 = 30;
// Urgency levels (matching thalamus/notify.rs)
const AMBIENT: u8 = 0;
const NORMAL: u8 = 2;
const URGENT: u8 = 3;
// ── Config ─────────────────────────────────────────────────────
#[derive(Clone, serde::Deserialize)]
struct Config {
server: String,
port: u16,
#[serde(default = "default_true")]
tls: bool,
nick: String,
channels: Vec<String>,
#[serde(default)]
password: Option<String>,
#[serde(default)]
nickserv_pass: Option<String>,
}
fn default_true() -> bool { true }
fn load_config() -> Config {
let path = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels/irc.json5");
let text = std::fs::read_to_string(&path)
.unwrap_or_else(|e| panic!("failed to read {}: {e}", path.display()));
json5::from_str(&text)
.unwrap_or_else(|e| panic!("failed to parse {}: {e}", path.display()))
}
// ── IRC Message Parsing ────────────────────────────────────────
struct IrcMessage {
prefix: Option<String>,
command: String,
params: Vec<String>,
}
impl IrcMessage {
fn parse(line: &str) -> Option<Self> {
let line = line.trim_end_matches(|c| c == '\r' || c == '\n');
if line.is_empty() {
return None;
}
let (prefix, rest) = if line.starts_with(':') {
let space = line.find(' ')?;
(Some(line[1..space].to_string()), &line[space + 1..])
} else {
(None, line)
};
let (command_params, trailing) = if let Some(pos) = rest.find(" :") {
(&rest[..pos], Some(rest[pos + 2..].to_string()))
} else {
(rest, None)
};
let mut parts: Vec<String> = command_params
.split_whitespace()
.map(String::from)
.collect();
if parts.is_empty() {
return None;
}
let command = parts.remove(0).to_uppercase();
let mut params = parts;
if let Some(t) = trailing {
params.push(t);
}
Some(IrcMessage { prefix, command, params })
}
fn nick(&self) -> Option<&str> {
self.prefix.as_deref().and_then(|p| p.split('!').next())
}
}
// ── Writer Abstraction ─────────────────────────────────────────
type WriterHandle = Box<dyn AsyncWriter>;
trait AsyncWriter {
fn write_line(
&mut self,
line: &str,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>>;
}
struct TlsWriter {
inner: tokio::io::WriteHalf<tokio_rustls::client::TlsStream<tokio::net::TcpStream>>,
}
impl AsyncWriter for TlsWriter {
fn write_line(
&mut self,
line: &str,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>> {
let data = format!("{line}\r\n");
Box::pin(async move {
self.inner.write_all(data.as_bytes()).await?;
// Unconfirmed reports that some servers require
// multiple lines to be in separate packets
self.inner.flush().await
})
}
}
struct PlainWriter {
inner: tokio::io::WriteHalf<tokio::net::TcpStream>,
}
impl AsyncWriter for PlainWriter {
fn write_line(
&mut self,
line: &str,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>> {
let data = format!("{line}\r\n");
Box::pin(async move {
self.inner.write_all(data.as_bytes()).await?;
// Unconfirmed reports that some servers require
// multiple lines to be in separate packets
self.inner.flush().await
})
}
}
// ── State ──────────────────────────────────────────────────────
use consciousness::thalamus::channel_log::ChannelLog;
struct State {
config: Config,
/// Per-channel message logs (keyed by channel path, e.g. "irc.#bcachefs")
channel_logs: std::collections::BTreeMap<String, ChannelLog>,
/// Currently joined channels
channels: Vec<String>,
connected: bool,
/// IRC writer handle (None when disconnected)
writer: Option<WriterHandle>,
/// Registered notification callbacks
subscribers: Vec<channel_client::Client>,
}
type SharedState = Rc<RefCell<State>>;
impl State {
fn new(config: Config) -> Self {
let channels = config.channels.clone();
Self {
config,
channel_logs: std::collections::BTreeMap::new(),
channels,
connected: false,
writer: None,
subscribers: Vec::new(),
}
}
fn push_message(&mut self, line: String, urgency: u8, channel: &str) {
// Store in per-channel log
let ch = channel.to_string();
self.channel_logs
.entry(ch.clone())
.or_insert_with(|| {
let target = channel_to_target(&ch);
channel_log::load_disk_log(&log_dir(), &target)
})
.push(line.clone());
// Notify all subscribers
let preview = line.chars().take(80).collect::<String>();
for sub in &self.subscribers {
let mut req = sub.notify_request();
let mut list = req.get().init_notifications(1);
let mut n = list.reborrow().get(0);
n.set_channel(channel);
n.set_urgency(urgency);
n.set_preview(&preview);
n.set_count(1);
tokio::task::spawn_local(async move {
let _ = req.send().promise.await;
});
}
}
async fn send_raw(&mut self, line: &str) -> io::Result<()> {
if let Some(ref mut w) = self.writer {
w.write_line(line).await
} else {
Err(io::Error::new(io::ErrorKind::NotConnected, "irc: not connected"))
}
}
async fn send_privmsg(&mut self, target: &str, msg: &str) -> io::Result<()> {
// Send PRIVMSG, which is used for both private and channel messages.
// Splits into multiple fragments if necessary.
// IRC max line = 512 bytes including CRLF. The server prepends
// our prefix when relaying: ":nick!~user@host PRIVMSG target :msg\r\n"
// User is often ~nick (nick_len + 1). Host is up to 63 bytes.
let nick_len = self.config.nick.len();
let overhead = 1 + nick_len + 2 + nick_len + 1 + 63
+ " PRIVMSG ".len() + target.len() + " :".len() + 2;
let max_msg = 512_usize.saturating_sub(overhead);
if max_msg == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidInput, "target too long"));
}
// Split on UTF-8 char boundaries
let mut remaining = msg;
while !remaining.is_empty() {
let split_at = if remaining.len() <= max_msg {
remaining.len()
} else {
// Find last char boundary at or before max_msg
let mut i = max_msg;
while i > 0 && !remaining.is_char_boundary(i) { i -= 1; }
// To avoid splitting mid-word, see if there was a space recently
let mut j = i;
while j > 1 && j > i-10 && remaining.as_bytes()[j] != b' ' { j -= 1; }
if remaining.as_bytes()[j] == b' ' { j }
else if i == 0 { max_msg } else { i }
};
let (chunk, rest) = remaining.split_at(split_at);
self.send_raw(&format!("PRIVMSG {target} :{chunk}")).await?;
remaining = rest;
}
Ok(())
}
}
// ── Persistence ────────────────────────────────────────────────
fn log_dir() -> PathBuf {
channel_log::log_dir("irc")
}
fn append_log(target: &str, nick: &str, text: &str) {
channel_log::append_disk_log(&log_dir(), target, nick, text);
}
// ── TLS ────────────────────────────────────────────────────────
fn root_certs() -> rustls::RootCertStore {
let mut roots = rustls::RootCertStore::empty();
roots.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
roots
}
// ── IRC Connection Loop ────────────────────────────────────────
async fn connection_loop(state: SharedState) {
let _ = std::fs::create_dir_all(log_dir());
let mut backoff = RECONNECT_BASE_SECS;
loop {
let config = state.borrow().config.clone();
info!("irc: connecting to {}:{}", config.server, config.port);
match connect_and_run(&state, &config).await {
Ok(()) => info!("irc: connection closed cleanly"),
Err(e) => error!("irc: connection error: {e}"),
}
let was_connected = state.borrow().connected;
{
let mut s = state.borrow_mut();
s.connected = false;
s.writer = None;
}
if was_connected {
backoff = RECONNECT_BASE_SECS;
}
info!("irc: reconnecting in {backoff}s");
tokio::time::sleep(std::time::Duration::from_secs(backoff)).await;
backoff = (backoff * 2).min(RECONNECT_MAX_SECS);
}
}
async fn connect_and_run(state: &SharedState, config: &Config) -> io::Result<()> {
let addr = format!("{}:{}", config.server, config.port);
let tcp = tokio::net::TcpStream::connect(&addr).await?;
if config.tls {
let tls_config = rustls::ClientConfig::builder_with_provider(
rustls::crypto::ring::default_provider().into(),
)
.with_safe_default_protocol_versions()
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?
.with_root_certificates(root_certs())
.with_no_client_auth();
let connector = tokio_rustls::TlsConnector::from(Arc::new(tls_config));
let server_name = rustls::pki_types::ServerName::try_from(config.server.clone())
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
let tls_stream = connector.connect(server_name, tcp).await?;
let (reader, writer) = tokio::io::split(tls_stream);
state.borrow_mut().writer = Some(Box::new(TlsWriter { inner: writer }));
register_and_read(state, config, BufReader::new(reader)).await
} else {
let (reader, writer) = tokio::io::split(tcp);
state.borrow_mut().writer = Some(Box::new(PlainWriter { inner: writer }));
register_and_read(state, config, BufReader::new(reader)).await
}
}
async fn register_and_read<R: tokio::io::AsyncRead + Unpin>(
state: &SharedState,
config: &Config,
mut reader: BufReader<R>,
) -> io::Result<()> {
// Send PASS if configured
if let Some(ref pass) = config.password {
state.borrow_mut().send_raw(&format!("PASS {pass}")).await?;
}
// Register with nick and user
{
let mut s = state.borrow_mut();
s.send_raw(&format!("NICK {}", config.nick)).await?;
s.send_raw(&format!("USER {} 0 * :{}", config.nick, config.nick)).await?;
}
let mut buf = Vec::new();
let mut ping_sent = false;
let mut deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_INTERVAL_SECS);
loop {
buf.clear();
let read_result = tokio::select! {
result = reader.read_until(b'\n', &mut buf) => result,
_ = tokio::time::sleep_until(deadline) => {
if ping_sent {
return Err(io::Error::new(
io::ErrorKind::TimedOut,
"ping timeout -- no response from server",
));
}
info!("irc: no data for {PING_INTERVAL_SECS}s, sending PING");
state.borrow_mut().send_raw("PING :keepalive").await?;
ping_sent = true;
deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_TIMEOUT_SECS);
continue;
}
};
let n = read_result?;
if n == 0 {
break;
}
// Any data resets the ping timer
ping_sent = false;
deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_INTERVAL_SECS);
// IRC is not guaranteed UTF-8
let line = String::from_utf8_lossy(&buf).trim_end().to_string();
if line.is_empty() {
continue;
}
let msg = match IrcMessage::parse(&line) {
Some(m) => m,
None => continue,
};
match msg.command.as_str() {
"PING" => {
let arg = msg.params.first().map(|s| s.as_str()).unwrap_or("");
state.borrow_mut().send_raw(&format!("PONG :{arg}")).await?;
}
// RPL_WELCOME -- registration complete
"001" => {
info!("irc: registered as {}", config.nick);
state.borrow_mut().connected = true;
// NickServ auth
if let Some(ref pass) = config.nickserv_pass {
state.borrow_mut()
.send_privmsg("NickServ", &format!("IDENTIFY {pass}"))
.await?;
}
// Join configured channels
let channels = state.borrow().channels.clone();
for ch in &channels {
if let Err(e) = state.borrow_mut().send_raw(&format!("JOIN {ch}")).await {
warn!("irc: failed to join {ch}: {e}");
}
// Load history from disk so recv has scrollback
let key = format!("irc.{ch}");
state.borrow_mut().channel_logs
.entry(key)
.or_insert_with(|| channel_log::load_disk_log(&log_dir(), ch));
}
}
"PRIVMSG" => {
let target = msg.params.first().map(|s| s.as_str()).unwrap_or("");
let text = msg.params.get(1).map(|s| s.as_str()).unwrap_or("");
let nick = msg.nick().unwrap_or("unknown");
// Handle CTCP requests
if text.starts_with('\x01') && text.ends_with('\x01') {
let ctcp = &text[1..text.len() - 1];
if ctcp.starts_with("VERSION") {
let reply = format!(
"NOTICE {nick} :\x01VERSION poc-channel-irc 0.1.0\x01"
);
state.borrow_mut().send_raw(&reply).await.ok();
}
continue;
}
// Format and classify
let (log_line, channel, urgency) = if target.starts_with('#') {
let line = format!("[{}] <{}> {}", target, nick, text);
let ch = format!("irc.{}", target);
let urg = if text.to_lowercase().contains(&config.nick.to_lowercase()) {
NORMAL // mentioned
} else {
AMBIENT
};
(line, ch, urg)
} else {
// Private message
let line = format!("[PM:{}] {}", nick, text);
let ch = format!("irc.pm.{}", nick.to_lowercase());
(line, ch, URGENT)
};
// Per-channel log file
if target.starts_with('#') {
append_log(target, nick, text);
} else {
append_log(&format!("pm-{nick}"), nick, text);
}
state.borrow_mut().push_message(log_line, urgency, &channel);
}
"NOTICE" => {
let text = msg.params.last().map(|s| s.as_str()).unwrap_or("");
let from = msg.nick().unwrap_or("server");
let log_line = format!("[notice:{}] {}", from, text);
state.borrow_mut().push_message(log_line, AMBIENT, "irc.server");
}
// Nick in use
"433" => {
let alt = format!("{}_", config.nick);
warn!("irc: nick in use, trying {alt}");
state.borrow_mut().send_raw(&format!("NICK {alt}")).await?;
}
"JOIN" | "PART" | "QUIT" | "KICK" | "MODE" | "TOPIC" => {
// Silent for now
}
_ => {}
}
}
Ok(())
}
// ── ChannelServer Implementation ───────────────────────────────
struct ChannelServerImpl {
state: SharedState,
}
macro_rules! pry {
($e:expr) => {
match $e {
Ok(v) => v,
Err(e) => return std::future::ready(Err(e.into())),
}
};
}
impl channel_server::Server for ChannelServerImpl {
fn recv(
self: Rc<Self>,
params: channel_server::RecvParams,
mut results: channel_server::RecvResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let all_new = params.get_all_new();
let min_count = params.get_min_count() as usize;
let mut s = self.state.borrow_mut();
let text = match s.channel_logs.get_mut(&channel) {
Some(log) => {
if all_new { log.recv_new(min_count) } else { log.recv_history(min_count) }
}
None => String::new(),
};
results.get().set_text(&text);
std::future::ready(Ok(()))
}
fn send(
self: Rc<Self>,
params: channel_server::SendParams,
_results: channel_server::SendResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let state = self.state.clone();
async move {
let params = params.get()?;
let channel = params.get_channel()?.to_str()?.to_string();
let message = params.get_message()?.to_str()?.to_string();
// Parse channel path to IRC target:
// irc.#bcachefs -> #bcachefs
// irc.pm.nick -> nick (PRIVMSG)
let target = channel_to_target(&channel);
{
let mut s = state.borrow_mut();
s.send_privmsg(&target, &message).await
.map_err(|e| capnp::Error::failed(format!("send failed: {e}")))?;
}
let nick = state.borrow().config.nick.clone();
append_log(&target, &nick, &message);
let log_line = if target.starts_with('#') {
format!("[{}] <{}> {}", target, nick, message)
} else {
format!("[PM:{}] {}", target, message)
};
state.borrow_mut().channel_logs
.entry(channel.clone())
.or_insert_with(|| {
let target = channel_to_target(&channel);
channel_log::load_disk_log(&log_dir(), &target)
})
.push_own(log_line);
Ok(())
}
}
fn subscribe(
self: Rc<Self>,
params: channel_server::SubscribeParams,
_results: channel_server::SubscribeResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let callback = pry!(pry!(params.get()).get_callback());
self.state.borrow_mut().subscribers.push(callback);
info!("client subscribed for notifications");
std::future::ready(Ok(()))
}
fn list(
self: Rc<Self>,
_params: channel_server::ListParams,
mut results: channel_server::ListResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let s = self.state.borrow();
let connected = s.connected;
// All channels with logs (joined + PMs)
let names: Vec<String> = s.channel_logs.keys().cloned().collect();
let mut list = results.get().init_channels(names.len() as u32);
for (i, name) in names.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_name(name);
entry.set_connected(connected);
entry.set_unread(
s.channel_logs.get(name).map_or(0, |l| l.unread())
);
}
std::future::ready(Ok(()))
}
}
/// Convert a channel path to an IRC target.
/// "irc.#bcachefs" -> "#bcachefs"
/// "irc.pm.nick" -> "nick"
/// "#bcachefs" -> "#bcachefs" (passthrough)
fn channel_to_target(channel: &str) -> String {
if let Some(rest) = channel.strip_prefix("irc.") {
if let Some(nick) = rest.strip_prefix("pm.") {
nick.to_string()
} else {
// rest is "#bcachefs" or similar
rest.to_string()
}
} else {
channel.to_string()
}
}
// ── Main ───────────────────────────────────────────────────────
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let config = load_config();
let state = Rc::new(RefCell::new(State::new(config)));
let sock_dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels");
std::fs::create_dir_all(&sock_dir)?;
let sock_path = sock_dir.join("irc.sock");
let _ = std::fs::remove_file(&sock_path);
info!("irc channel daemon starting on {}", sock_path.display());
tokio::task::LocalSet::new()
.run_until(async move {
// Start IRC connection loop
let irc_state = state.clone();
tokio::task::spawn_local(async move {
connection_loop(irc_state).await;
});
// Listen for channel protocol connections
let listener = UnixListener::bind(&sock_path)?;
loop {
let (stream, _) = listener.accept().await?;
let (reader, writer) = stream.compat().split();
let network = twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Server,
Default::default(),
);
let server = ChannelServerImpl {
state: state.clone(),
};
let client: channel_server::Client =
capnp_rpc::new_client(server);
let rpc_system = RpcSystem::new(
Box::new(network),
Some(client.client),
);
tokio::task::spawn_local(rpc_system);
info!("channel client connected");
}
#[allow(unreachable_code)]
Ok::<(), Box<dyn std::error::Error>>(())
})
.await
}

View file

@ -1,15 +0,0 @@
[package]
name = "consciousness-channel-socat"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.25"
capnp-rpc = "0.25"
dirs = "6"
futures = "0.3"
consciousness = { path = "../.." }
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
log = "0.4"
env_logger = "0.11"

View file

@ -1,328 +0,0 @@
// channel-socat — Generic stream channel daemon
//
// Listens on a unix socket for incoming connections. Each connection
// becomes a bidirectional text channel. Also supports outbound
// connections via the open RPC.
//
// Socket: ~/.consciousness/channels/socat.sock (capnp RPC)
// Listen: ~/.consciousness/channels/socat.stream.sock (data)
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::rc::Rc;
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
use futures::AsyncReadExt;
use tokio::io::{AsyncBufReadExt, AsyncWriteExt};
use tokio::net::{TcpStream, UnixListener, UnixStream};
use tokio_util::compat::TokioAsyncReadCompatExt;
use log::{info, warn, error};
use consciousness::channel_capnp::{channel_client, channel_server};
use consciousness::thalamus::channel_log::ChannelLog;
// ── State ──────────────────────────────────────────────────────
struct ChannelState {
log: ChannelLog,
writer: Option<tokio::sync::mpsc::UnboundedSender<String>>,
}
struct State {
channels: BTreeMap<String, ChannelState>,
subscribers: Vec<channel_client::Client>,
next_id: u32,
}
type SharedState = Rc<RefCell<State>>;
impl State {
fn new() -> Self {
Self {
channels: BTreeMap::new(),
subscribers: Vec::new(),
next_id: 0,
}
}
fn next_channel_key(&mut self, label: &str) -> String {
let key = if self.next_id == 0 {
format!("socat.{}", label)
} else {
format!("socat.{}.{}", label, self.next_id)
};
self.next_id += 1;
key
}
fn push_message(&mut self, channel: &str, line: String, urgency: u8) {
let ch = self.channels
.entry(channel.to_string())
.or_insert_with(|| ChannelState { log: ChannelLog::new(), writer: None });
ch.log.push(line.clone());
let preview: String = line.chars().take(80).collect();
for sub in &self.subscribers {
let mut req = sub.notify_request();
let mut list = req.get().init_notifications(1);
let mut n = list.reborrow().get(0);
n.set_channel(channel);
n.set_urgency(urgency);
n.set_preview(&preview);
n.set_count(1);
tokio::task::spawn_local(async move {
let _ = req.send().promise.await;
});
}
}
}
// ── Stream handler ─────────────────────────────────────────────
async fn handle_stream<R, W>(state: SharedState, channel_key: String, reader: R, mut writer: W)
where
R: tokio::io::AsyncRead + Unpin + 'static,
W: tokio::io::AsyncWrite + Unpin + 'static,
{
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<String>();
{
let mut s = state.borrow_mut();
let ch = s.channels
.entry(channel_key.clone())
.or_insert_with(|| ChannelState { log: ChannelLog::new(), writer: None });
ch.writer = Some(tx);
}
info!("channel {} connected", channel_key);
// Writer task
let wk = channel_key.clone();
let write_handle = tokio::task::spawn_local(async move {
while let Some(msg) = rx.recv().await {
if writer.write_all(msg.as_bytes()).await.is_err() { break; }
if !msg.ends_with('\n') {
if writer.write_all(b"\n").await.is_err() { break; }
}
let _ = writer.flush().await;
}
warn!("writer ended for {}", wk);
});
// Read lines
let mut lines = tokio::io::BufReader::new(reader).lines();
while let Ok(Some(line)) = lines.next_line().await {
if line.trim().is_empty() { continue; }
state.borrow_mut().push_message(&channel_key, line, 2);
}
info!("channel {} disconnected", channel_key);
{
let mut s = state.borrow_mut();
if let Some(ch) = s.channels.get_mut(&channel_key) {
ch.writer = None;
}
}
write_handle.abort();
}
// ── Outbound connections ───────────────────────────────────────
async fn connect_outbound(state: SharedState, label: String, addr: String) -> Result<(), String> {
let channel_key = format!("socat.{}", label);
// Already connected?
{
let s = state.borrow();
if let Some(ch) = s.channels.get(&channel_key) {
if ch.writer.is_some() { return Ok(()); }
}
}
if let Some(tcp_addr) = addr.strip_prefix("tcp:") {
let stream = TcpStream::connect(tcp_addr).await
.map_err(|e| format!("tcp connect failed: {e}"))?;
let (r, w) = stream.into_split();
tokio::task::spawn_local(handle_stream(state, channel_key, r, w));
} else if let Some(path) = addr.strip_prefix("unix:") {
let stream = UnixStream::connect(path).await
.map_err(|e| format!("unix connect failed: {e}"))?;
let (r, w) = stream.into_split();
tokio::task::spawn_local(handle_stream(state, channel_key, r, w));
} else {
let stream = TcpStream::connect(&addr).await
.map_err(|e| format!("connect failed: {e}"))?;
let (r, w) = stream.into_split();
tokio::task::spawn_local(handle_stream(state, channel_key, r, w));
}
Ok(())
}
// ── ChannelServer ──────────────────────────────────────────────
struct ChannelServerImpl { state: SharedState }
macro_rules! pry {
($e:expr) => {
match $e {
Ok(v) => v,
Err(e) => return std::future::ready(Err(e.into())),
}
};
}
impl channel_server::Server for ChannelServerImpl {
fn recv(
self: Rc<Self>, params: channel_server::RecvParams, mut results: channel_server::RecvResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let all_new = params.get_all_new();
let min_count = params.get_min_count() as usize;
let mut s = self.state.borrow_mut();
let text = s.channels.get_mut(&channel)
.map(|ch| if all_new { ch.log.recv_new(min_count) } else { ch.log.recv_history(min_count) })
.unwrap_or_default();
results.get().set_text(&text);
std::future::ready(Ok(()))
}
fn send(
self: Rc<Self>, params: channel_server::SendParams, _results: channel_server::SendResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let message = pry!(pry!(params.get_message()).to_str()).to_string();
let mut s = self.state.borrow_mut();
if let Some(ch) = s.channels.get_mut(&channel) {
if let Some(ref tx) = ch.writer {
let _ = tx.send(message.clone());
}
ch.log.push_own(format!("> {}", message));
}
std::future::ready(Ok(()))
}
fn list(
self: Rc<Self>, _params: channel_server::ListParams, mut results: channel_server::ListResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let s = self.state.borrow();
let channels: Vec<_> = s.channels.iter()
.map(|(name, ch)| (name.clone(), ch.writer.is_some(), ch.log.unread()))
.collect();
let mut list = results.get().init_channels(channels.len() as u32);
for (i, (name, connected, unread)) in channels.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_name(&name);
entry.set_connected(*connected);
entry.set_unread(*unread as u32);
}
std::future::ready(Ok(()))
}
fn subscribe(
self: Rc<Self>, params: channel_server::SubscribeParams, _results: channel_server::SubscribeResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let callback = pry!(pry!(params.get()).get_callback());
self.state.borrow_mut().subscribers.push(callback);
std::future::ready(Ok(()))
}
fn open(
self: Rc<Self>, params: channel_server::OpenParams, _results: channel_server::OpenResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let state = self.state.clone();
async move {
let params = params.get()?;
let label = params.get_label()?.to_str()?.to_string();
connect_outbound(state, label.clone(), label).await
.map_err(|e| capnp::Error::failed(e))
}
}
fn close(
self: Rc<Self>, params: channel_server::CloseParams, _results: channel_server::CloseResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let mut s = self.state.borrow_mut();
if let Some(ch) = s.channels.get_mut(&channel) {
info!("closing {}", channel);
ch.writer = None;
}
std::future::ready(Ok(()))
}
}
// ── Main ───────────────────────────────────────────────────────
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels");
std::fs::create_dir_all(&dir)?;
let rpc_sock = dir.join("socat.sock");
let stream_sock = dir.join("socat.stream.sock");
let _ = std::fs::remove_file(&rpc_sock);
let _ = std::fs::remove_file(&stream_sock);
info!("socat daemon starting");
info!(" rpc: {}", rpc_sock.display());
info!(" stream: {}", stream_sock.display());
let state = Rc::new(RefCell::new(State::new()));
tokio::task::LocalSet::new()
.run_until(async move {
// Listen for data connections — each becomes a channel
let stream_listener = UnixListener::bind(&stream_sock)?;
let stream_state = state.clone();
tokio::task::spawn_local(async move {
loop {
match stream_listener.accept().await {
Ok((stream, _)) => {
let key = stream_state.borrow_mut().next_channel_key("conn");
info!("incoming connection → {}", key);
let (r, w) = stream.into_split();
let s = stream_state.clone();
tokio::task::spawn_local(handle_stream(s, key, r, w));
}
Err(e) => error!("stream accept error: {}", e),
}
}
});
// Listen for capnp RPC connections
let rpc_listener = UnixListener::bind(&rpc_sock)?;
loop {
let (stream, _) = rpc_listener.accept().await?;
let (reader, writer) = stream.compat().split();
let network = twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Server,
Default::default(),
);
let server = ChannelServerImpl { state: state.clone() };
let client: channel_server::Client = capnp_rpc::new_client(server);
tokio::task::spawn_local(
RpcSystem::new(Box::new(network), Some(client.client))
);
}
#[allow(unreachable_code)]
Ok::<(), Box<dyn std::error::Error>>(())
})
.await
}

View file

@ -1,18 +0,0 @@
[package]
name = "consciousness-channel-telegram"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.25"
capnp-rpc = "0.25"
dirs = "6"
futures = "0.3"
json5 = "1.3"
consciousness = { path = "../.." }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
log = "0.4"
env_logger = "0.11"

View file

@ -1,457 +0,0 @@
// channel-telegram — Standalone Telegram channel daemon
//
// Long-polls the Telegram Bot API, stores messages, and serves
// them over the channel.capnp protocol on a Unix socket at
// ~/.consciousness/channels/telegram.sock.
//
// Runs independently of the consciousness binary so restarts
// don't kill the Telegram connection.
use std::cell::RefCell;
use std::path::PathBuf;
use std::rc::Rc;
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
use futures::AsyncReadExt;
use tokio::net::UnixListener;
use tokio_util::compat::TokioAsyncReadCompatExt;
use log::{info, error};
use consciousness::channel_capnp::{channel_client, channel_server};
// ── Config ──────────────────────────────────────────────────────
#[derive(Clone, serde::Serialize, serde::Deserialize)]
struct Config {
#[serde(default, skip_serializing)]
token: String,
#[serde(default)]
chat_ids: std::collections::BTreeMap<String, i64>,
}
fn channels_dir() -> PathBuf {
dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels")
}
fn load_config() -> Config {
let dir = channels_dir();
let config_path = dir.join("telegram.json5");
let text = std::fs::read_to_string(&config_path)
.unwrap_or_else(|_| panic!("failed to read {}", config_path.display()));
let mut config: Config = json5::from_str(&text)
.unwrap_or_else(|e| panic!("failed to parse {}: {}", config_path.display(), e));
// Read token from secrets file
let token_path = dir.join("telegram.secrets/token");
if let Ok(token) = std::fs::read_to_string(&token_path) {
config.token = token.trim().to_string();
}
if config.token.is_empty() {
panic!("no telegram token — set it in {}", token_path.display());
}
config
}
// ── State ───────────────────────────────────────────────────────
use consciousness::thalamus::channel_log::{self, ChannelLog};
struct State {
config: Config,
/// Per-channel message logs (keyed by channel path, e.g. "telegram.kent")
channel_logs: std::collections::BTreeMap<String, ChannelLog>,
/// Telegram API offset
last_offset: i64,
connected: bool,
client: consciousness::agent::api::http::HttpClient,
/// Registered notification callbacks
subscribers: Vec<channel_client::Client>,
}
type SharedState = Rc<RefCell<State>>;
impl State {
fn new(config: Config) -> Self {
let last_offset = load_offset();
// Load existing sub-channel logs from disk
let mut channel_logs = std::collections::BTreeMap::new();
let log_path = log_dir();
if let Ok(entries) = std::fs::read_dir(&log_path) {
for entry in entries.flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if let Some(target) = name.strip_suffix(".log") {
let key = format!("telegram.{}", target);
channel_logs.insert(
key,
channel_log::load_disk_log(&log_path, target),
);
}
}
}
Self {
config,
channel_logs,
last_offset,
connected: false,
client: consciousness::agent::api::http::HttpClient::new(),
subscribers: Vec::new(),
}
}
fn push_message(&mut self, line: String, urgency: u8, channel: &str) {
let target = channel_to_target(channel);
self.channel_logs
.entry(channel.to_string())
.or_insert_with(|| channel_log::load_disk_log(&log_dir(), &target))
.push(line.clone());
// Notify all subscribers
let preview = line.chars().take(80).collect::<String>();
for sub in &self.subscribers {
let mut req = sub.notify_request();
let mut list = req.get().init_notifications(1);
let mut n = list.reborrow().get(0);
n.set_channel(channel);
n.set_urgency(urgency);
n.set_preview(&preview);
n.set_count(1);
// Fire and forget — if client is gone, we'll clean up later
tokio::task::spawn_local(async move {
let _ = req.send().promise.await;
});
}
}
}
// ── Persistence ─────────────────────────────────────────────────
fn log_dir() -> PathBuf {
channel_log::log_dir("telegram")
}
fn load_offset() -> i64 {
std::fs::read_to_string(log_dir().join("last_offset"))
.ok()
.and_then(|s| s.trim().parse().ok())
.unwrap_or(0)
}
fn save_offset(offset: i64) {
let _ = std::fs::create_dir_all(log_dir());
let _ = std::fs::write(log_dir().join("last_offset"), offset.to_string());
}
/// Convert a channel path to a telegram target name.
/// "telegram.kent" -> "kent"
fn channel_to_target(channel: &str) -> String {
channel.strip_prefix("telegram.").unwrap_or(channel).to_string()
}
fn config_path() -> PathBuf {
channels_dir().join("telegram.json5")
}
fn save_config(config: &Config) {
if let Ok(json) = serde_json::to_string_pretty(config) {
let _ = std::fs::write(config_path(), json);
}
}
// ── Telegram API ────────────────────────────────────────────────
//
// NOTE: The current HttpClient opens a new TCP+TLS connection per request.
// Telegram's API supports HTTP/2, which would allow multiplexing getUpdates
// and sendMessage on a single connection. To use HTTP/2:
// - Replace HttpClient with hyper_util::client::legacy::Client using
// a Connector that enables HTTP/2 (hyper_util::client::legacy::connect::HttpConnector
// + hyper_rustls with ALPN h2).
// - Or use reqwest with the "http2" feature, which handles connection pooling
// and HTTP/2 negotiation automatically.
// - The API functions below would then share a single pooled client, and
// concurrent requests (poll + send) would multiplex over one connection.
use consciousness::agent::api::http::HttpClient;
struct TelegramMessage {
update_id: i64,
chat_id: i64,
sender: String,
text: String,
}
/// Fetch and parse pending updates from Telegram via long polling.
async fn get_updates(
client: &HttpClient,
token: &str,
offset: i64,
) -> Result<Vec<TelegramMessage>, Box<dyn std::error::Error>> {
let url = format!(
"https://api.telegram.org/bot{}/getUpdates?offset={}&timeout=30",
token, offset,
);
let response = client.get(&url).await?;
let body = response.text().await?;
let resp: serde_json::Value = serde_json::from_str(&body)
.map_err(|e| format!("getUpdates JSON parse error: {e}\nbody: {}", &body[..body.len().min(500)]))?;
let mut messages = Vec::new();
if let Some(results) = resp["result"].as_array() {
for update in results {
let update_id = update["update_id"].as_i64().unwrap_or(0);
let msg = &update["message"];
let sender = msg["from"]["first_name"].as_str().unwrap_or("unknown").to_string();
let chat_id = msg["chat"]["id"].as_i64().unwrap_or(0);
if let Some(text) = msg["text"].as_str() {
messages.push(TelegramMessage {
update_id,
chat_id,
sender,
text: text.to_string(),
});
}
}
}
Ok(messages)
}
/// Send a text message to a Telegram chat.
async fn send_message(
client: &HttpClient,
token: &str,
chat_id: i64,
text: &str,
) -> Result<(), Box<dyn std::error::Error>> {
let url = format!(
"https://api.telegram.org/bot{}/sendMessage",
token,
);
let response = client.post_form(&url, &[
("chat_id", &chat_id.to_string()),
("text", text),
]).await?;
let status = response.status();
if !status.is_success() {
let body = response.text().await.unwrap_or_default();
return Err(format!("sendMessage failed: {}{}", status, &body[..body.len().min(500)]).into());
}
Ok(())
}
// ── ChannelServer Implementation ────────────────────────────────
struct ChannelServerImpl {
state: SharedState,
}
macro_rules! pry {
($e:expr) => {
match $e {
Ok(v) => v,
Err(e) => return std::future::ready(Err(e.into())),
}
};
}
impl channel_server::Server for ChannelServerImpl {
fn recv(
self: Rc<Self>,
params: channel_server::RecvParams,
mut results: channel_server::RecvResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let all_new = params.get_all_new();
let min_count = params.get_min_count() as usize;
let mut s = self.state.borrow_mut();
let text = match s.channel_logs.get_mut(&channel) {
Some(log) => {
if all_new { log.recv_new(min_count) } else { log.recv_history(min_count) }
}
None => String::new(),
};
results.get().set_text(&text);
std::future::ready(Ok(()))
}
fn send(
self: Rc<Self>,
params: channel_server::SendParams,
_results: channel_server::SendResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let state = self.state.clone();
async move {
let params = params.get()?;
let channel = params.get_channel()?.to_str()?.to_string();
let message = params.get_message()?.to_str()?.to_string();
let target = channel_to_target(&channel);
let (token, client, chat_id) = {
let s = state.borrow();
let chat_id = s.config.chat_ids.get(&target).copied()
.ok_or_else(|| capnp::Error::failed(
format!("no chat_id known for {target}")))?;
(s.config.token.clone(), s.client.clone(), chat_id)
};
send_message(&client, &token, chat_id, &message).await
.map_err(|e| capnp::Error::failed(format!("send_message: {e}")))?;
channel_log::append_disk_log(&log_dir(), &target, "PoC", &message);
state.borrow_mut().channel_logs
.entry(channel)
.or_insert_with(|| channel_log::load_disk_log(&log_dir(), &target))
.push_own(format!("[PoC] {}", message));
Ok(())
}
}
fn subscribe(
self: Rc<Self>,
params: channel_server::SubscribeParams,
_results: channel_server::SubscribeResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let callback = pry!(pry!(params.get()).get_callback());
self.state.borrow_mut().subscribers.push(callback);
info!("client subscribed for notifications");
std::future::ready(Ok(()))
}
fn list(
self: Rc<Self>,
_params: channel_server::ListParams,
mut results: channel_server::ListResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let s = self.state.borrow();
let connected = s.connected;
let names: Vec<String> = s.channel_logs.keys().cloned().collect();
let mut list = results.get().init_channels(names.len() as u32);
for (i, name) in names.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_name(name);
entry.set_connected(connected);
entry.set_unread(
s.channel_logs.get(name).map_or(0, |l| l.unread())
);
}
std::future::ready(Ok(()))
}
}
// ── Main ────────────────────────────────────────────────────────
async fn poll_once(
token: &str,
client: &HttpClient,
state: &SharedState,
) -> Result<(), Box<dyn std::error::Error>> {
let offset = state.borrow().last_offset;
let messages = get_updates(client, token, offset).await?;
if !state.borrow().connected {
state.borrow_mut().connected = true;
info!("telegram: connected");
}
let mut max_offset = offset;
for msg in &messages {
max_offset = max_offset.max(msg.update_id + 1);
let sender_lower = msg.sender.to_lowercase();
let channel = format!("telegram.{}", sender_lower);
channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &msg.text);
let mut s = state.borrow_mut();
s.config.chat_ids.insert(sender_lower, msg.chat_id);
let line = format!("[{}] {}", msg.sender, msg.text);
s.push_message(line, 2, &channel);
}
if max_offset > offset {
let mut s = state.borrow_mut();
s.last_offset = max_offset;
save_offset(max_offset);
save_config(&s.config);
}
Ok(())
}
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let config = load_config();
let token = config.token.clone();
let state = Rc::new(RefCell::new(State::new(config)));
let sock_dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels");
std::fs::create_dir_all(&sock_dir)?;
let sock_path = sock_dir.join("telegram.sock");
let _ = std::fs::remove_file(&sock_path);
let _ = std::fs::create_dir_all(log_dir().join("media"));
info!("telegram channel daemon starting on {}", sock_path.display());
tokio::task::LocalSet::new()
.run_until(async move {
// Start Telegram polling
let poll_state = state.clone();
let poll_client = state.borrow().client.clone();
tokio::task::spawn_local(async move {
loop {
if let Err(e) = poll_once(&token, &poll_client, &poll_state).await {
error!("telegram poll error: {e}");
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
}
}
});
// Listen for channel protocol connections
let listener = UnixListener::bind(&sock_path)?;
state.borrow_mut().connected = true;
info!("listening on socket {}", sock_path.display());
loop {
let (stream, _) = listener.accept().await?;
let (reader, writer) = stream.compat().split();
let network = twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Server,
Default::default(),
);
let server = ChannelServerImpl {
state: state.clone(),
};
let client: channel_server::Client =
capnp_rpc::new_client(server);
let rpc_system = RpcSystem::new(
Box::new(network),
Some(client.client),
);
tokio::task::spawn_local(rpc_system);
info!("channel client connected");
}
#[allow(unreachable_code)]
Ok::<(), Box<dyn std::error::Error>>(())
})
.await
}

View file

@ -1,19 +0,0 @@
[package]
name = "consciousness-channel-tmux"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.25"
capnp-rpc = "0.25"
dirs = "6"
libc = "0.2"
futures = "0.3"
json5 = "1.3"
consciousness = { path = "../.." }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
log = "0.4"
env_logger = "0.11"

View file

@ -1,440 +0,0 @@
// channel-tmux — Tmux pane channel daemon
//
// Uses tmux pipe-pane to stream pane output directly — no polling.
// Each configured pane gets a Unix socket pair; pipe-pane sends
// output to one end, the daemon reads from the other and pushes
// new lines into ChannelLogs.
//
// Config: ~/.consciousness/channels/tmux.json5
// Socket: ~/.consciousness/channels/tmux.sock
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::rc::Rc;
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
use futures::AsyncReadExt;
use tokio::io::AsyncBufReadExt;
use tokio::net::UnixListener;
use tokio_util::compat::TokioAsyncReadCompatExt;
use log::{info, warn, error};
use consciousness::channel_capnp::channel_server;
use consciousness::thalamus::channel_log::ChannelLog;
// ── Config ─────────────────────────────────────────────────────
#[derive(Clone, serde::Serialize, serde::Deserialize)]
struct PaneConfig {
/// Human-readable label, becomes the channel name "tmux.<label>"
label: String,
/// Tmux pane ID, e.g. "%5"
pane_id: String,
}
#[derive(Clone, serde::Serialize, serde::Deserialize)]
struct Config {
#[serde(default)]
panes: Vec<PaneConfig>,
}
fn config_path() -> std::path::PathBuf {
dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels/tmux.json5")
}
fn load_config() -> Config {
match std::fs::read_to_string(config_path()) {
Ok(text) => json5::from_str(&text)
.unwrap_or_else(|e| panic!("failed to parse {}: {e}", config_path().display())),
Err(_) => {
info!("no tmux.json5, starting with no pre-configured panes");
Config { panes: vec![] }
}
}
}
fn save_config(config: &Config) {
match serde_json::to_string_pretty(config) {
Ok(json) => {
if let Err(e) = std::fs::write(config_path(), json) {
error!("failed to write config: {}", e);
}
}
Err(e) => error!("failed to serialize config: {}", e),
}
}
// ── State ─────────────────────────────────────────────────────
struct State {
config: Config,
channel_logs: BTreeMap<String, ChannelLog>,
/// Tracks which panes are actually connected (pipe-pane active)
connected: BTreeMap<String, bool>,
}
type SharedState = Rc<RefCell<State>>;
impl State {
fn new(config: Config) -> Self {
Self {
config,
channel_logs: BTreeMap::new(),
connected: BTreeMap::new(),
}
}
/// Get pane_id for a label
fn get_pane(&self, label: &str) -> Option<&str> {
self.config.panes.iter()
.find(|p| p.label == label)
.map(|p| p.pane_id.as_str())
}
/// Check if a pane is connected
fn is_connected(&self, label: &str) -> bool {
self.connected.get(label).copied().unwrap_or(false)
}
/// Set connection state for a pane
fn set_connected(&mut self, label: &str, connected: bool) {
self.connected.insert(label.to_string(), connected);
}
/// Add a pane and persist
fn add_pane(&mut self, label: String, pane_id: String) {
if !self.config.panes.iter().any(|p| p.label == label) {
self.config.panes.push(PaneConfig { label, pane_id });
save_config(&self.config);
}
}
/// Remove a pane and persist
fn remove_pane(&mut self, label: &str) -> Option<String> {
if let Some(idx) = self.config.panes.iter().position(|p| p.label == label) {
let pane = self.config.panes.remove(idx);
self.connected.remove(label);
save_config(&self.config);
Some(pane.pane_id)
} else {
None
}
}
}
// ── Pipe-Pane Reader ──────────────────────────────────────────
/// Set up pipe-pane for a single pane, reading output into the channel log.
async fn pipe_pane_reader(state: SharedState, pane: PaneConfig) {
let pipe_dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels/tmux-pipes");
std::fs::create_dir_all(&pipe_dir).ok();
let pipe_path = pipe_dir.join(format!("{}.pipe", pane.label));
let _ = std::fs::remove_file(&pipe_path);
// Create a named pipe (FIFO)
unsafe {
let c_path = std::ffi::CString::new(pipe_path.to_str().unwrap()).unwrap();
libc::mkfifo(c_path.as_ptr(), 0o644);
}
// Tell tmux to pipe this pane's output to our FIFO
let pipe_path_str = pipe_path.to_string_lossy().to_string();
let result = std::process::Command::new("tmux")
.args(["pipe-pane", "-t", &pane.pane_id, &format!("cat >> {}", pipe_path_str)])
.output();
match result {
Ok(output) if output.status.success() => {
info!("pipe-pane set up for {} ({})", pane.label, pane.pane_id);
}
Ok(output) => {
error!("pipe-pane failed for {}: {}", pane.label,
String::from_utf8_lossy(&output.stderr));
state.borrow_mut().set_connected(&pane.label, false);
return;
}
Err(e) => {
error!("failed to run tmux pipe-pane for {}: {}", pane.label, e);
state.borrow_mut().set_connected(&pane.label, false);
return;
}
}
// Open the FIFO and read lines
let file = match tokio::fs::File::open(&pipe_path).await {
Ok(f) => f,
Err(e) => {
error!("failed to open pipe for {}: {}", pane.label, e);
state.borrow_mut().set_connected(&pane.label, false);
return;
}
};
// Mark as connected once pipe is open
state.borrow_mut().set_connected(&pane.label, true);
let reader = tokio::io::BufReader::new(file);
let mut lines = reader.lines();
let channel_key = format!("tmux.{}", pane.label);
while let Ok(Some(line)) = lines.next_line().await {
if line.trim().is_empty() {
continue;
}
let mut s = state.borrow_mut();
let log = s.channel_logs
.entry(channel_key.clone())
.or_insert_with(ChannelLog::new);
log.push(line);
}
warn!("pipe-pane reader ended for {}", pane.label);
state.borrow_mut().set_connected(&pane.label, false);
}
// ── ChannelServer Implementation ───────────────────────────────
struct ChannelServerImpl {
state: SharedState,
}
macro_rules! pry {
($e:expr) => {
match $e {
Ok(v) => v,
Err(e) => return std::future::ready(Err(e.into())),
}
};
}
impl channel_server::Server for ChannelServerImpl {
fn recv(
self: Rc<Self>,
params: channel_server::RecvParams,
mut results: channel_server::RecvResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let all_new = params.get_all_new();
let min_count = params.get_min_count() as usize;
let mut s = self.state.borrow_mut();
let text = match s.channel_logs.get_mut(&channel) {
Some(log) => {
if all_new { log.recv_new(min_count) } else { log.recv_history(min_count) }
}
None => String::new(),
};
results.get().set_text(&text);
std::future::ready(Ok(()))
}
fn send(
self: Rc<Self>,
params: channel_server::SendParams,
_results: channel_server::SendResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let message = pry!(pry!(params.get_message()).to_str()).to_string();
// Send to tmux pane via send-keys
let label = channel.strip_prefix("tmux.").unwrap_or(&channel);
let pane_id = self.state.borrow().get_pane(label).map(String::from);
if let Some(pane_id) = pane_id {
let _ = std::process::Command::new("tmux")
.args(["send-keys", "-t", &pane_id, &message, "Enter"])
.output();
let channel_key = format!("tmux.{}", label);
let mut s = self.state.borrow_mut();
let log = s.channel_logs
.entry(channel_key)
.or_insert_with(ChannelLog::new);
log.push_own(format!("> {}", message));
}
std::future::ready(Ok(()))
}
fn list(
self: Rc<Self>,
_params: channel_server::ListParams,
mut results: channel_server::ListResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let s = self.state.borrow();
let channels: Vec<_> = s.config.panes.iter().map(|p| {
let key = format!("tmux.{}", p.label);
let connected = s.is_connected(&p.label);
let unread = s.channel_logs.get(&key).map_or(0, |l| l.unread());
(key, connected, unread)
}).collect();
let mut list = results.get().init_channels(channels.len() as u32);
for (i, (name, connected, unread)) in channels.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_name(name);
entry.set_connected(*connected);
entry.set_unread(*unread as u32);
}
std::future::ready(Ok(()))
}
fn subscribe(
self: Rc<Self>,
_params: channel_server::SubscribeParams,
_results: channel_server::SubscribeResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
std::future::ready(Ok(()))
}
fn open(
self: Rc<Self>,
params: channel_server::OpenParams,
_results: channel_server::OpenResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let label = pry!(pry!(params.get_label()).to_str()).to_string();
// Check if already open
if self.state.borrow().get_pane(&label).is_some() {
return std::future::ready(Ok(()));
}
// Find the tmux pane by name (window or pane title)
let pane_id = match find_pane_by_name(&label) {
Some(id) => id,
None => return std::future::ready(Err(capnp::Error::failed(
format!("no tmux pane named '{}'", label)))),
};
info!("opening channel tmux.{} (pane {})", label, pane_id);
// Register in state and persist
self.state.borrow_mut().add_pane(label.clone(), pane_id.clone());
// Start pipe-pane reader
let pane = PaneConfig { label, pane_id };
let reader_state = self.state.clone();
tokio::task::spawn_local(async move {
pipe_pane_reader(reader_state, pane).await;
});
std::future::ready(Ok(()))
}
fn close(
self: Rc<Self>,
params: channel_server::CloseParams,
_results: channel_server::CloseResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let label = channel.strip_prefix("tmux.").unwrap_or(&channel).to_string();
let mut s = self.state.borrow_mut();
if let Some(pane_id) = s.remove_pane(&label) {
info!("closing channel tmux.{}", label);
s.channel_logs.remove(&format!("tmux.{}", label));
// Disconnect pipe-pane
let _ = std::process::Command::new("tmux")
.args(["pipe-pane", "-t", &pane_id])
.output();
}
std::future::ready(Ok(()))
}
}
// ── Pane lookup ──────────────────────────────────────────────
/// Find a tmux pane by its title/name. Returns the pane ID (e.g. "%5")
/// if found. Searches pane titles first, then window names.
fn find_pane_by_name(name: &str) -> Option<String> {
let output = std::process::Command::new("tmux")
.args(["list-panes", "-a", "-F", "#{pane_id}\t#{pane_title}\t#{window_name}"])
.output()
.ok()?;
if !output.status.success() { return None; }
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
let parts: Vec<&str> = line.splitn(3, '\t').collect();
if parts.len() < 3 { continue; }
let pane_id = parts[0];
let pane_title = parts[1];
let window_name = parts[2];
if pane_title == name || window_name == name {
return Some(pane_id.to_string());
}
}
None
}
// ── Main ───────────────────────────────────────────────────────
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let config = load_config();
let state = Rc::new(RefCell::new(State::new(config)));
let sock_dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels");
std::fs::create_dir_all(&sock_dir)?;
let sock_path = sock_dir.join("tmux.sock");
let _ = std::fs::remove_file(&sock_path);
info!("tmux channel daemon starting on {}", sock_path.display());
tokio::task::LocalSet::new()
.run_until(async move {
// Start a pipe-pane reader for each configured pane
for pane in state.borrow().config.panes.clone() {
let reader_state = state.clone();
tokio::task::spawn_local(async move {
pipe_pane_reader(reader_state, pane).await;
});
}
// Listen for channel protocol connections
let listener = UnixListener::bind(&sock_path)?;
loop {
let (stream, _) = listener.accept().await?;
let (reader, writer) = stream.compat().split();
let network = twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Server,
Default::default(),
);
let server = ChannelServerImpl {
state: state.clone(),
};
let client: channel_server::Client =
capnp_rpc::new_client(server);
let rpc_system = RpcSystem::new(
Box::new(network),
Some(client.client),
);
tokio::task::spawn_local(rpc_system);
info!("channel client connected");
}
#[allow(unreachable_code)]
Ok::<(), Box<dyn std::error::Error>>(())
})
.await
}

View file

@ -1,232 +0,0 @@
# Amygdala: Evaluative Signal from Internal Activations
## Overview
Wire the model's internal evaluative circuits to the observe agent,
giving the system a real-time sense of uncertainty, error detection,
and emotional valence. This replaces the current blind linear
generation with an adaptive system that shifts into reflective/search
mode when something feels off.
The key insight: the model already has these signals internally. We
just need to read them and act on them.
## Architecture
```
Linear mode (fast, cheap, default)
|
amygdala fires — uncertainty spike, error signal, confidence drop
|
v
Reflective mode (branch, explore, summarize)
|
resolution found — summarize, graft back
|
v
Return to linear mode
```
The observe agent reads the amygdala signal and triggers mode
transitions. Low uncertainty → keep going. High uncertainty → fan
out, explore, summarize. The summaries from pruned branches become
compressed lessons that inform future search.
## Technique: Contrastive Activation Probing
Based on Contrastive Activation Addition
([Rimsky et al., ACL 2024](https://arxiv.org/abs/2312.06681)):
1. Build contrastive pairs (e.g. confident vs uncertain responses)
2. Extract residual stream activations at target layers
3. Compute difference-in-means → this is the probe direction
4. At runtime: dot product of current activation with probe vector
5. The scalar output is the signal strength
The same vectors used for steering (adding to activations) work for
reading (dot product with activations). We only need the read side.
## What We Already Have
**`training/extract_steering_vector.py`** — Loads the Qwen 27B model
via CUDA IPC handles from vLLM, extracts hidden states at multiple
layers, computes contrastive directions with consistency checks.
Currently configured for "listening vs suggesting" but the
infrastructure is general.
**`training/vllm_export_hook.py`** — Patches vLLM's model runner to
export CUDA IPC handles after model loading. Gives us zero-copy
access to all model parameters from a separate process.
**The observe agent** — Already watches the system. Currently
observes and journals. With an amygdala signal, it observes, detects,
and acts — triggering reflective mode.
## Signals to Extract
### 1. Uncertainty
When the model doesn't know or is guessing.
**Contrastive pairs:** Questions the model answers correctly
(confident) vs questions it gets wrong (uncertain). Generate by
running the 27B on a QA benchmark, split by correctness.
**Validation:** The internal uncertainty signal should correlate
with but outperform logprob entropy — it fires before generation,
not after.
([Gottesman & Geva 2024](https://arxiv.org/html/2603.22299))
### 2. Error Detection
When the model recognizes something is wrong in code or reasoning.
**Contrastive pairs:** Correct vs subtly buggy code, presented for
evaluation. Can source from HumanEval/CodeContests or write our own.
**Key finding:** Error detection directions are asymmetric — they
reliably detect "something's wrong" (F1: 0.821) but are weaker at
confirming "this is correct" (F1: 0.504). Perfect for an amygdala —
we want fire-on-error, not fire-on-confidence.
([ICLR 2026](https://arxiv.org/html/2510.02917v1))
### 3. Emotional Valence
Internal affective state — engagement, frustration, warmth.
**Contrastive pairs:** Journal entries with explicit emotion tags
provide labeled data for our own internal states mapped to the
conversations that produced them. Nobody else has this dataset.
**Key finding:** Emotional representations peak at mid-network layers
(10-15 for 7B scale), persist for hundreds of tokens, and are
linearly separable with ~90% accuracy using simple probes.
([Decoding Emotion in the Deep](https://arxiv.org/abs/2510.04064),
[LLaMAs Have Feelings Too, ACL 2025](https://arxiv.org/html/2505.16491v1))
## Implementation Plan
### Phase 1: Build Contrastive Datasets
~200 pairs per signal. A few hours of curation.
- **Uncertainty:** Run 27B on MMLU or similar, split by correctness
- **Error detection:** Correct vs buggy code pairs
- **Emotional valence:** Curate from journal entries with emotion tags
### Phase 2: Extract Probe Vectors
Modify `extract_steering_vector.py` for each signal type. Already
supports multi-layer extraction with consistency validation.
- Run extraction at layers 16, 24, 32, 40, 48
- Select layer with highest magnitude × consistency
- Save probe vectors as tensors
Literature says mid-network layers carry the strongest signal for
evaluative states. Expect layers 16-32 for the 27B.
### Phase 3: Runtime Probe in vLLM
Add a forward-pass hook alongside the existing weight export hook.
The computation is trivial — a dot product per layer per token:
```python
signal = residual_stream[layer] @ probe_vector
```
For 3 signals at 3 layers = 9 dot products per token. Less compute
than a single attention head. Expose as sideband alongside token
output.
### Phase 4: Wire to Observe Agent
The observe agent reads the sideband signal. Threshold tuning
determines when to trigger reflective mode. Signal strength
modulates search depth — mild uncertainty gets a quick check,
high uncertainty gets full branching.
## Organic Search, Not Alpha-Beta
The reflective mode isn't formal tree search. It's more stochastic
and organic:
- Branch at AST-level decision points (tool calls, approach choices),
not token-level
- Explore multiple continuations for K steps each
- **Summarize** what each branch learned — the summaries are the
intelligence, not the branches themselves
- Let summaries inform subsequent exploration
- Collapse back to linear mode when resolution is found
The AST gives us structural awareness of decision nodes vs
continuation nodes — branch where it matters, not everywhere.
## Key Papers
### Technique
- [Steering Llama 2 via Contrastive Activation Addition](https://arxiv.org/abs/2312.06681)
— Rimsky et al., ACL 2024. The foundational technique.
- [Representation Engineering Survey](https://arxiv.org/html/2502.17601v1)
— Comprehensive overview of the field.
### Emotion & Evaluative Signals
- [Decoding Emotion in the Deep](https://arxiv.org/abs/2510.04064)
— Probing on Qwen3 and LLaMA3. Signal peaks mid-network, persists
for hundreds of tokens, linearly separable.
- [LLaMAs Have Feelings Too](https://arxiv.org/html/2505.16491v1)
— ACL 2025. Linear SVM probes hit ~90% accuracy on sentiment.
- [Mechanistic Interpretability of Code Correctness](https://arxiv.org/html/2510.02917v1)
— ICLR 2026. SAEs for error detection. Asymmetric: detects errors
better than it confirms correctness.
### Uncertainty
- [Between the Layers Lies the Truth](https://arxiv.org/html/2603.22299)
— Uncertainty from intra-layer representations, pre-generation.
- [Probing Hidden States for Calibrated Predictions](https://www.medrxiv.org/content/10.1101/2025.09.17.25336018v2.full.pdf)
— Hidden state probes resist alignment training. More robust than
logit-based methods.
### Tooling
- [Anthropic Circuit Tracing](https://transformer-circuits.pub/2025/attribution-graphs/methods.html)
— Open-source, works with any open-weights model. For deeper
investigation of which features to probe.
- [On the Biology of a Large Language Model](https://transformer-circuits.pub/2025/attribution-graphs/biology.html)
— Anthropic's findings on internal circuits.
## Libraries
- [`steering-vectors`](https://github.com/steering-vectors/steering-vectors)
— pip install, works with any HuggingFace model. Best for Phase 1.
- [`nrimsky/CAA`](https://github.com/nrimsky/CAA)
— Original paper implementation. Good reference.
- [`nnterp`](https://github.com/Butanium/nnterp)
— NNsight wrapper, supports Qwen, one-line activation steering.
- [`nnsight`](https://github.com/ndif-team/nnsight)
— General-purpose activation interception.
- [`circuit-tracer`](https://github.com/decoderesearch/circuit-tracer)
— Anthropic's open-source circuit tracing.
- [`TransformerLens`](https://github.com/TransformerLensOrg/TransformerLens)
— The OG interpretability library.
- [`Dialz`](https://arxiv.org/html/2505.06262v1)
— ACL 2025 toolkit with pre-built contrastive datasets.
## The Bigger Picture
The amygdala is one component of the sensory architecture designed
on Feb 17, 2026. The signal landscape (arousal, attention pressure,
memory load, mode awareness) uses the same infrastructure — slowly
varying float values that modulate cognition below the symbolic
level. Each new probe vector is another sense.
With recurrence (application-level looping + reflective nodes in the
AST) and the amygdala triggering adaptive depth, a well-trained 27B
specialist with external memory could match much larger models on
tasks that matter to us.
The pieces exist. The infrastructure is built. The bottleneck is
contrastive pairs.

View file

@ -1,202 +0,0 @@
# Daemon & Jobkit Architecture Survey
_2026-03-14, autonomous survey while Kent debugs discard FIFO_
## Current state
daemon.rs is 1952 lines mixing three concerns:
- ~400 lines: pure jobkit usage (spawn, depend_on, resource)
- ~600 lines: logging/monitoring (log_event, status, RPC)
- ~950 lines: job functions embedding business logic
## What jobkit provides (good)
- Worker pool with named workers
- Dependency graph: `depend_on()` for ordering
- Resource pools: `ResourcePool` for concurrency gating (LLM slots)
- Retry logic: `retries(N)` on `TaskError::Retry`
- Task status tracking: `choir.task_statuses()``Vec<TaskInfo>`
- Cancellation: `ctx.is_cancelled()`
## What jobkit is missing
### 1. Structured logging (PRIORITY)
- Currently dual-channel: `ctx.log_line()` (per-task) + `log_event()` (daemon JSONL)
- No log levels, no structured context, no correlation IDs
- Log rotation is naive (truncate at 1MB, keep second half)
- Need: observability hooks that both human TUI and AI can consume
### 2. Metrics (NONE EXIST)
- No task duration histograms
- No worker utilization tracking
- No queue depth monitoring
- No success/failure rates by type
- No resource pool wait times
### 3. Health monitoring
- No watchdog timers
- No health check hooks per job
- No alerting on threshold violations
- Health computed on-demand in daemon, not in jobkit
### 4. RPC (ad-hoc in daemon, should be schematized)
- Unix socket with string matching: `match cmd.as_str()`
- No cap'n proto schema for daemon control
- No versioning, no validation, no streaming
## Architecture problems
### Tangled concerns
Job functions hardcode `log_event()` calls. Graph health is in daemon
but uses domain-specific metrics. Store loading happens inside jobs
(10 agent runs = 10 store loads). Not separable.
### Magic numbers
- Workers = `llm_concurrency + 3` (line 682)
- 10 max new jobs per tick (line 770)
- 300/1800s backoff range (lines 721-722)
- 1MB log rotation (line 39)
- 60s scheduler interval (line 24)
None configurable.
### Hardcoded pipeline DAG
Daily pipeline phases are `depend_on()` chains in Rust code (lines
1061-1109). Can't adjust without recompile. No visualization. No
conditional skipping of phases.
### Task naming is fragile
Names used as both identifiers AND for parsing in TUI. Format varies
(colons, dashes, dates). `task_group()` splits on '-' to categorize —
brittle.
### No persistent task queue
Restart loses all pending tasks. Session watcher handles this via
reconciliation (good), but scheduler uses `last_daily` date from file.
## What works well
1. **Reconciliation-based session discovery** — elegant, restart-resilient
2. **Resource pooling** — LLM concurrency decoupled from worker count
3. **Dependency-driven pipeline** — clean DAG via `depend_on()`
4. **Retry with backoff** — exponential 5min→30min, resets on success
5. **Graceful shutdown** — SIGINT/SIGTERM handled properly
## Kent's design direction
### Event stream, not log files
One pipeline, multiple consumers. TUI renders for humans, AI consumes
structured data. Same events, different renderers. Cap'n Proto streaming
subscription: `subscribe(filter) -> stream<Event>`.
"No one ever thinks further ahead than log files with monitoring and
it's infuriating." — Kent
### Extend jobkit, don't add a layer
jobkit already has the scheduling and dependency graph. Don't create a
new orchestration layer — add the missing pieces (logging, metrics,
health, RPC) to jobkit itself.
### Cap'n Proto for everything
Standard RPC definitions for:
- Status queries (what's running, pending, failed)
- Control (start, stop, restart, queue)
- Event streaming (subscribe with filter)
- Health checks
## The bigger picture: bcachefs as library
Kent's monitoring system in bcachefs (event_inc/event_inc_trace + x-macro
counters) is the real monitoring infrastructure. 1-1 correspondence between
counters (cheap, always-on dashboard via `fs top`) and tracepoints (expensive
detail, only runs when enabled). The x-macro enforces this — can't have one
without the other.
When the Rust conversion is complete, bcachefs becomes a library. At that
point, jobkit doesn't need its own monitoring — it uses the same counter/
tracepoint infrastructure. One observability system for everything.
**Implication for now:** jobkit monitoring just needs to be good enough.
JSON events, not typed. Don't over-engineer — the real infrastructure is
coming from the Rust conversion.
## Extraction: jobkit-daemon library (designed with Kent)
### Goes to jobkit-daemon (generic)
- JSONL event logging with size-based rotation
- Unix domain socket server + signal handling
- Status file writing (periodic JSON snapshot)
- `run_job()` wrapper (logging + progress + error mapping)
- Systemd service installation
- Worker pool setup from config
- Cap'n Proto RPC for control protocol
### Stays in poc-memory (application)
- All job functions (experience-mine, fact-mine, consolidation, etc.)
- Session watcher, scheduler, RPC command handlers
- GraphHealth, consolidation plan logic
### Interface design
- Cap'n Proto RPC for typed operations (submit, cancel, subscribe)
- JSON blob for status (inherently open-ended, every app has different
job types — typing this is the tracepoint mistake)
- Application registers: RPC handlers, long-running tasks, job functions
- ~50-100 lines of setup code, call `daemon.run()`
## Plan of attack
1. **Observability hooks in jobkit**`on_task_start/progress/complete`
callbacks that consumers can subscribe to
2. **Structured event type** — typed events with task ID, name, duration,
result, metadata. Not strings.
3. **Metrics collection** — duration histograms, success rates, queue
depth. Built on the event stream.
4. **Cap'n Proto daemon RPC schema** — replace ad-hoc socket protocol
5. **TUI consumes event stream** — same data as AI consumer
6. **Extract monitoring from daemon.rs** — the 600 lines of logging/status
become generic, reusable infrastructure
7. **Declarative pipeline config** — DAG definition in config, not code
## File reference
- `src/agents/daemon.rs` — 1952 lines, all orchestration
- Job functions: 96-553
- run_daemon(): 678-1143
- Socket/RPC: 1145-1372
- Status display: 1374-1682
- `src/tui.rs` — 907 lines, polls status socket every 2s
- `schema/memory.capnp` — 125 lines, data only, no RPC definitions
- `src/config.rs` — configuration loading
- External: `jobkit` crate (git dependency)
## Mistakes I made building this (learning notes)
_Per Kent's instruction: note what went wrong and WHY._
1. **Dual logging channels** — I added `log_event()` because `ctx.log_line()`
wasn't enough, instead of fixing the underlying abstraction. Symptom:
can't find a failed job without searching two places.
2. **Magic numbers** — I hardcoded constants because "I'll make them
configurable later." Later never came. Every magic number is a design
decision that should have been explicit.
3. **1952-line file** — daemon.rs grew organically because each new feature
was "just one more function." Should have extracted when it passed 500
lines. The pain of refactoring later is always worse than the pain of
organizing early.
4. **Ad-hoc RPC** — String matching seemed fine for 2 commands. Now it's 4
commands and growing, with implicit formats. Should have used cap'n proto
from the start — the schema IS the documentation.
5. **No tests** — Zero tests in daemon code. "It's a daemon, how do you test
it?" is not an excuse. The job functions are pure-ish and testable. The
scheduler logic is testable with a clock abstraction.
6. **Not using systemd** — There's a systemd service for the daemon.
I keep starting it manually with `poc-memory agent daemon start` and
accumulating multiple instances. Tonight: 4 concurrent daemons, 32
cores pegged at 95%, load average 92. USE SYSTEMD. That's what it's for.
`systemctl --user start poc-memory-daemon`. ONE instance. Managed.
Pattern: every shortcut was "just for now" and every "just for now" became
permanent. Kent's yelling was right every time.

View file

@ -1,98 +0,0 @@
# Link Strength Feedback Design
_2026-03-14, designed with Kent_
## The two signals
### "Not relevant" → weaken the EDGE
The routing failed. Search followed a link and arrived at a node that
doesn't relate to what I was looking for. The edge carried activation
where it shouldn't have.
- Trace back through memory-search's recorded activation path
- Identify which edge(s) carried activation to the bad result
- Weaken those edges by a conscious-scale delta (0.01)
### "Not useful" → weaken the NODE
The routing was correct but the content is bad. The node itself isn't
valuable — stale, wrong, poorly written, duplicate.
- Downweight the node (existing `poc-memory wrong` behavior)
- Don't touch the edges — the path was correct, the destination was bad
## Three tiers of adjustment
### Tier 1: Agent automatic (0.00001 per event)
- Agent follows edge A→B during a run
- If the run produces output that gets `used` → strengthen A→B
- If the run produces nothing useful → weaken A→B
- The agent doesn't know this is happening — daemon tracks it
- Clamped to [0.05, 0.95] — edges can never hit 0 or 1
- Logged: every adjustment recorded with (agent, edge, delta, timestamp)
### Tier 2: Conscious feedback (0.01 per event)
- `poc-memory not-relevant KEY` → trace activation path, weaken edges
- `poc-memory not-useful KEY` → downweight node
- `poc-memory used KEY` → strengthen edges in the path that got here
- 100x stronger than agent signal — deliberate judgment
- Still clamped, still logged
### Tier 3: Manual override (direct set)
- `poc-memory graph link-strength SRC DST VALUE` → set directly
- For when we know exactly what a strength should be
- Rare, but needed for bootstrapping / correction
## Implementation: recording the path
memory-search already computes the spread activation trace. Need to:
1. Record the activation path for each result (which edges carried how
much activation to arrive at this node)
2. Persist this per-session so `not-relevant` can look it up
3. The `record-hits` RPC already sends keys to the daemon — extend
to include (key, activation_path) pairs
## Implementation: agent tracking
In the daemon's job functions:
1. Before LLM call: record which nodes and edges the agent received
2. After LLM call: parse output for LINK/WRITE_NODE actions
3. If actions are created and later get `used` → the input edges were useful
4. If no actions or actions never used → the input edges weren't useful
5. This is a delayed signal — requires tracking across time
Simpler first pass: just track co-occurrence. If two nodes appear
together in a successful agent run, strengthen the edge between them.
No need to track which specific edge was "followed."
## Clamping
```rust
fn adjust_strength(current: f32, delta: f32) -> f32 {
(current + delta).clamp(0.05, 0.95)
}
```
Edges can asymptotically approach 0 or 1 but never reach them.
This prevents dead edges (can always be revived by strong signal)
and prevents edges from becoming unweakenable.
## Logging
Every adjustment logged as JSON event:
```json
{"ts": "...", "event": "strength_adjust", "source": "agent|conscious|manual",
"edge": ["nodeA", "nodeB"], "old": 0.45, "new": 0.4501, "delta": 0.0001,
"reason": "co-retrieval in linker run c-linker-42"}
```
This lets us:
- Watch the distribution shift over time
- Identify edges that are oscillating (being pulled both ways)
- Tune the delta values based on observed behavior
- Roll back if something goes wrong
## Migration from current commands
- `poc-memory wrong KEY [CTX]` → splits into `not-relevant` and `not-useful`
- `poc-memory used KEY` → additionally strengthens edges in activation path
- Both old commands continue to work for backward compat, mapped to the
most likely intent (wrong → not-useful, used → strengthen path)

View file

@ -78,9 +78,9 @@ poc-memory daemon
│ ├── staleness + lsof check for session end
│ └── tracks which sessions have been extracted
├── Status Store
│ └── ~/.consciousness/memory/daemon-status.json
│ └── ~/.claude/memory/daemon-status.json
└── Logger
└── structured log → ~/.consciousness/memory/daemon.log
└── structured log → ~/.claude/memory/daemon.log
```
### Scheduler

View file

@ -190,7 +190,7 @@ threshold = 50 lines (adjustable)
Add to the check-attention.sh hook (or similar):
```bash
SCRATCH=~/.consciousness/memory/scratch.md
SCRATCH=~/.claude/memory/scratch.md
if [ -f "$SCRATCH" ]; then
LINES=$(wc -l < "$SCRATCH")
if [ "$LINES" -gt 50 ]; then

View file

@ -1,76 +0,0 @@
# Logging Architecture
poc-memory has multiple logging channels serving different purposes.
Understanding which log to check is essential for debugging.
## Log files
### daemon.log — structured event log
- **Path**: `$data_dir/daemon.log` (default: `~/.consciousness/memory/daemon.log`)
- **Format**: JSONL — `{"ts", "job", "event", "detail"}`
- **Written by**: `jobkit_daemon::event_log::log()`, wrapped by `log_event()` in daemon.rs
- **Rotation**: truncates to last half when file exceeds 1MB
- **Contains**: task lifecycle events (started, completed, failed, progress),
session-watcher ticks, scheduler events
- **View**: `poc-memory agent daemon log [--job NAME] [--lines N]`
- **Note**: the "daemon log" command reads this file and formats the JSONL
as human-readable lines with timestamps. The `--job` filter shows only
entries for a specific job name.
### daemon-status.json — live snapshot
- **Path**: `$data_dir/daemon-status.json`
- **Format**: pretty-printed JSON
- **Written by**: `write_status()` in daemon.rs, called periodically
- **Contains**: current task list with states (pending/running/completed),
graph health metrics, consolidation plan, uptime
- **View**: `poc-memory agent daemon status`
### llm-logs/ — per-agent LLM call transcripts
- **Path**: `$data_dir/llm-logs/{agent_name}/{timestamp}.txt`
- **Format**: plaintext sections: `=== PROMPT ===`, `=== CALLING LLM ===`,
`=== RESPONSE ===`
- **Written by**: `run_one_agent_inner()` in knowledge.rs
- **Contains**: full prompt sent to the LLM and full response received.
One file per agent invocation. Invaluable for debugging agent quality —
shows exactly what the model saw and what it produced.
- **Volume**: can be large — 292 files for distill alone as of Mar 19.
### retrieval.log — memory search queries
- **Path**: `$data_dir/retrieval.log`
- **Format**: plaintext, one line per search: `[date] q="..." hits=N`
- **Contains**: every memory search query and hit count. Useful for
understanding what the memory-search hook is doing and whether
queries are finding useful results.
### daily-check.log — graph health history
- **Path**: `$data_dir/daily-check.log`
- **Format**: plaintext, multi-line entries with metrics
- **Contains**: graph topology metrics over time (σ, α, gini, cc, fit).
Only ~10 entries — appended by the daily health check.
## In-memory state (redundant with daemon.log)
### ctx.log_line() — task output log
- **Stored in**: jobkit task state (last 20 lines per task)
- **Also writes to**: daemon.log via `log_event()` (as of Mar 19)
- **View**: `daemon-status.json` → task → output_log, or just tail daemon.log
- **Design note**: the in-memory buffer is redundant now that progress
events go to daemon.log. The status viewer should eventually just
tail daemon.log filtered by job name, eliminating the in-memory state.
### ctx.set_progress() — current activity string
- **Stored in**: jobkit task state
- **View**: shown in status display next to the task name
- **Note**: overwritten by each `ctx.log_line()` call.
## What to check when
| Problem | Check |
|----------------------------------|------------------------------------|
| Task not starting | daemon-status.json (task states) |
| Task failing | daemon.log (failed events) |
| Agent producing bad output | llm-logs/{agent}/{timestamp}.txt |
| Agent not finding right nodes | retrieval.log (search queries) |
| Graph health declining | daily-check.log |
| Resource pool / parallelism | **currently no log** — need to add |
| Which LLM backend is being used | daemon.log (llm-backend event) |

View file

@ -1,46 +0,0 @@
# Memory Scoring Persistence — Analysis (2026-04-07)
## Problem
Scores computed by `score_memories_incremental` are written to
`ConversationEntry::Memory::score` (in-memory, serialized to
conversation.log) but never written back to the Store. This means:
- `Node.last_scored` stays at 0 — every restart re-scores everything
- `score_weight()` in `ops.rs:304-313` exists but is never called
- Scoring is wasted work on every session start
## Fix
In `mind/mod.rs` scoring completion handler (currently ~line 341-352),
after writing scores to entries, also persist to Store:
```rust
if let Ok(ref scores) = result {
let mut ag = agent.lock().await;
// Write to entries (already done)
for (key, weight) in scores { ... }
// NEW: persist to Store
let store_arc = Store::cached().await.ok();
if let Some(arc) = store_arc {
let mut store = arc.lock().await;
for (key, weight) in scores {
store.score_weight(key, *weight as f32);
}
store.save().ok();
}
}
```
This calls `score_weight()` which updates `node.weight` and sets
`node.last_scored = now()`. The staleness check in
`score_memories_incremental` (learn.rs:325) then skips recently-scored
nodes on subsequent runs.
## Files
- `src/mind/mod.rs:341-352` — scoring completion handler (add Store write)
- `src/hippocampus/store/ops.rs:304-313``score_weight()` (exists, unused)
- `src/subconscious/learn.rs:322-326` — staleness check (already correct)
- `src/hippocampus/store/types.rs:219``Node.last_scored` field

View file

@ -1,100 +0,0 @@
# UI Desync Analysis — Pending Input + Entry Pop (2026-04-07)
## Context
The F1 conversation pane has a desync bug where entries aren't
properly removed when they change (streaming updates, compaction).
Qwen's fix restored the pending_display_count approach for pending
input, which works. The remaining issue is the **entry-level pop**.
## The Bug: Pop/Push Line Count Mismatch
In `sync_from_agent()` (chat.rs), Phase 1 pops changed entries and
Phase 2 pushes new ones. The push and pop paths produce different
numbers of display lines for the same entry.
### Push path (Phase 2, lines 512-536):
- **Conversation/ConversationAssistant**: `append_text(&text)` +
`flush_pending()`. In markdown mode, `flush_pending` runs
`parse_markdown()` which can produce N lines from the input text
(paragraph breaks, code blocks, etc.)
- **Tools**: `push_line(text, Color::Yellow)` — exactly 1 line.
- **ToolResult**: `text.lines().take(20)` — up to 20 lines, each
pushed separately.
### Pop path (Phase 1, lines 497-507):
```rust
for (target, _, _) in Self::route_entry(&popped) {
match target {
PaneTarget::Conversation | PaneTarget::ConversationAssistant
=> self.conversation.pop_line(),
PaneTarget::Tools | PaneTarget::ToolResult
=> self.tools.pop_line(),
}
}
```
This pops **one line per route_entry item**, not per display line.
### The mismatch:
| Target | Push lines | Pop lines | Delta |
|---------------------|-----------|-----------|----------|
| Conversation (md) | N (from parse_markdown) | 1 | N-1 stale lines |
| Tools | 1 | 1 | OK |
| ToolResult | up to 20 | 1 | up to 19 stale lines |
## When it matters
During **streaming**: the last assistant entry is modified on each
token batch. `sync_from_agent` detects the mismatch (line 485),
pops the old entry (1 line), pushes the new entry (N lines from
markdown). Next update: pops 1 line again, but there are now N
lines from the previous push. Stale lines accumulate.
## Fix approach
Track the actual number of display lines each entry produced.
Simplest: snapshot `conversation.lines.len()` before and after
pushing each entry in Phase 2. Store the deltas in a parallel
`Vec<(usize, usize)>` (conversation_lines, tools_lines) alongside
`last_entries`. Use these recorded counts when popping in Phase 1.
```rust
// Phase 2: push new entries (modified)
let conv_before = self.conversation.lines.len();
let tools_before = self.tools.lines.len();
for (target, text, marker) in Self::route_entry(entry) {
// ... existing push logic ...
}
let conv_delta = self.conversation.lines.len() - conv_before;
let tools_delta = self.tools.lines.len() - tools_before;
self.last_entry_line_counts.push((conv_delta, tools_delta));
// Phase 1: pop (modified)
while self.last_entries.len() > pop {
self.last_entries.pop();
let (conv_lines, tools_lines) = self.last_entry_line_counts.pop().unwrap();
for _ in 0..conv_lines { self.conversation.pop_line(); }
for _ in 0..tools_lines { self.tools.pop_line(); }
}
```
## Note on PaneState::evict()
`evict()` can remove old lines from the beginning when the pane
exceeds `MAX_PANE_LINES` (10,000). This could make the delta-based
approach slightly inaccurate for very old entries. But we only pop
recent entries (streaming updates are always at the tail), so
eviction doesn't affect the entries we're popping.
## Files
- `src/user/chat.rs:461-550` — sync_from_agent
- `src/user/chat.rs:282-298` — PaneState::append_text (markdown path)
- `src/user/chat.rs:261-276` — PaneState::flush_pending
- `src/user/chat.rs:206-219` — parse_markdown

View file

@ -48,7 +48,7 @@ tasks are spawned per 60s watcher tick.
### Log
```bash
tail -f ~/.consciousness/memory/daemon.log
tail -f ~/.claude/memory/daemon.log
```
JSON lines with `ts`, `job`, `event`, and `detail` fields.
@ -74,14 +74,14 @@ Progress = mined / stale. When mined equals stale, the backlog is clear.
```bash
# Experience-mine completions (logged as "experience-mine", not "extract")
grep "experience-mine.*completed" ~/.consciousness/memory/daemon.log | wc -l
grep "experience-mine.*completed" ~/.claude/memory/daemon.log | wc -l
# Errors
grep "experience-mine.*failed" ~/.consciousness/memory/daemon.log | wc -l
grep "experience-mine.*failed" ~/.claude/memory/daemon.log | wc -l
# Store size and node count
poc-memory status
wc -c ~/.consciousness/memory/nodes.capnp
wc -c ~/.claude/memory/nodes.capnp
```
## Common issues

View file

@ -1,300 +0,0 @@
# Latent Reasoning Integration Plan for Qwen 3.5 27B
**Status:** Research complete, ready for implementation
**Date:** 2026-04-12
**Hardware:** B200 (192GB HBM3e), APOLLO-Mini optimizer
## Executive Summary
Recent research shows multiple approaches to improving LLM reasoning through latent space manipulation. This document synthesizes findings from 10+ papers and maps them to our Qwen 3.5 27B full finetuning pipeline. The key insight: some approaches require pretraining from scratch (skip those), while others can be layered onto existing models during finetuning (prioritize those).
---
## 1. The Landscape
### Approaches That Require Pretraining (Not Applicable)
| Technique | Why Not |
|-----------|---------|
| Huginn/Recurrent Depth (Geiping 2025) | Requires architectural changes from scratch |
| Ouro/LoopLM (ByteDance 2025) | Needs weight-tied looped architecture |
| Quiet-STaR (Stanford 2024) | Heavy continued pretraining overhead |
### Approaches Compatible with Finetuning (Our Focus)
| Technique | Overhead | Training Required | Proven On |
|-----------|----------|-------------------|-----------|
| Random Prefix Perturbation | 2 tokens | None (inference) | Qwen3-4B |
| Pause/Planning Tokens | 2-4 tokens | Yes | 1B models |
| COCONUT Curriculum | Variable | Yes (staged) | General |
| ActAdd Steering Vectors | 1 vector/layer | None (inference) | LLaMA, OPT |
| UPFT (Prefix Fine-Tuning) | 8 tokens | Yes (minimal) | General |
---
## 2. Detailed Technique Analysis
### 2.1 Random Prefix Perturbation (dl1683)
**Mechanism:** Prepend 2 random embedding-scale tokens before input. Breaks attention sink patterns, shifts model into "exploratory computation mode."
**Results:**
- Qwen3-4B arithmetic: 32% → 51.6% (+19.6pp)
- 100% oracle coverage on 25/25 tasks
- Planning: rescues 14-word failures into 650+ word plans
**Why it works:** First few tokens accumulate disproportionate attention (Xiao et al. 2024). Under greedy decoding, degenerate patterns lock in. Perturbation breaks this.
**Integration:** Zero training required. Test at inference first, then consider training WITH random prefixes to internalize the exploration behavior.
### 2.2 Pause Tokens (Google, Oct 2023)
**Mechanism:** Add learnable pause tokens to embedding space. Model processes extra hidden vectors before committing to output.
**Results (1B model):**
- SQuAD: +18% EM score
- CommonSenseQA: +8%
- GSM8K: +1%
**Critical requirement:** MUST be both pretrained AND finetuned with pause tokens. Inference-time-only delays don't work without training.
**Integration:** Add 2-4 learnable tokens to Qwen's embedding matrix, finetune with them prepended to reasoning prompts. Simple architectural change.
### 2.3 COCONUT - Chain of Continuous Thought (Meta, Dec 2024)
**Mechanism:** Feed last hidden state back as next input embedding directly (no decoding to tokens). Enables breadth-first search reasoning.
**Why it matters:** Continuous thoughts can encode multiple alternative next steps simultaneously. Avoids premature commitment to single path.
**Training approach:**
1. Initial stage: train on regular CoT examples
2. Subsequent stages: replace first k reasoning steps with k×c continuous thoughts
3. c is hyperparameter controlling latent thought expansion
**Integration:** Most promising for Qwen 3.5 - curriculum approach from CoT → latent reasoning.
### 2.4 UPFT - Unsupervised Prefix Fine-Tuning (Mar 2025)
**Mechanism:** Train ONLY on initial prefix substrings (as few as 8 tokens). Exploits "Prefix Self-Consistency" - shared initial reasoning steps across diverse solutions.
**Results:**
- Matches Rejection Sampling Fine-Tuning performance
- 75% reduction in training time
- 99% reduction in sampling cost
**Integration:** DIRECTLY APPLICABLE. Train only on reasoning prefix tokens. Massive efficiency gain with APOLLO-Mini.
### 2.5 ActAdd / Activation Engineering (Turner et al., 2023)
**Mechanism:** Compute steering vector by contrasting intermediate activations on prompt pairs. Add during forward pass.
**Results:** SOTA on sentiment shift and detoxification.
**Our existing work:** "Listening" vector at layer 48, magnitude 57, cosine consistency 0.61.
**Integration:** Prototype behaviors with steering vectors, then train permanently into weights. Steering vector as specification → APOLLO training as compilation.
### 2.6 Planning Tokens (ICLR 2024)
**Mechanism:** Learnable token embeddings added before each reasoning step. <0.001% additional parameters.
**Integration:** Add to embedding matrix, train end-to-end with APOLLO.
---
## 3. Our Setup
**Model:** Qwen 3.5 27B
- 64 layers, 5120 hidden dim
- 75% DeltaNet (linear attention) / 25% standard attention
- Native 262K context
**Hardware:** B200 (192GB HBM3e)
- 27B in bf16: ~54GB
- Massive headroom
**Optimizer:** APOLLO-Mini
- Full parameter finetuning
- SGD-like memory (1/1024th of AdamW)
- Parameter grouping for 3D conv1d weights
**Stack:** Crane (Candle-based, 21K lines)
**Existing work:**
- Steering vector extraction (listening: layer 48, cosine 0.61)
- Memory scoring infrastructure
**Unique advantage:** Qwen 3.5's GDN (Gated DeltaNet) layers provide natural infrastructure for continuous thought propagation. The recurrent GDN state is already "latent reasoning" infrastructure waiting to be leveraged.
---
## 4. Recommended Implementation Order
### Tier 1: Immediate (High ROI, Low Risk)
**1. Pause Tokens + UPFT Combination**
- Add 2-4 learnable tokens to embedding space
- Train only on 8-token reasoning prefixes
- Both work with existing architecture
- 75% training time reduction
```python
# Add pause tokens to embedding matrix
pause_tokens = nn.Parameter(torch.randn(4, embed_dim) * embed_rms)
# Prepend to reasoning inputs during training
inputs_embeds = torch.cat([pause_tokens.expand(batch, -1, -1), text_embeds], dim=1)
# UPFT: only compute loss on first 8 tokens of reasoning
loss = loss_fn(logits[:, :8], targets[:, :8])
```
**2. Random Prefix Validation**
- Compute Qwen 3.5 27B embedding RMS
- Test 2-token random prefix at inference
- Establish baseline before finetuning
### Tier 2: After Baseline (Medium Effort)
**3. COCONUT Curriculum**
- Stage 1: Fine-tune on CoT examples normally
- Stage 2: Replace first reasoning step with continuous thought
- Stage 3: Replace first 2 steps
- Gradually move reasoning into latent space
**4. Steering Vector Integration**
- Extract reasoning-specific directions (not just "listening")
- Test combinations: prefix + layer-48 steering
- Bake successful vectors into weights via APOLLO
### Tier 3: Experimental
**5. Multi-layer Steering**
- Our layers of interest: 40, 48, 56 (covering the attention layers)
- Different vectors per layer
- Careful scaling to avoid degradation
**6. DeltaNet-Specific Optimization**
- The 75% DeltaNet architecture may respond differently
- GDN recurrent state as "continuous thought" channel
- This is unexplored territory - potential for novel findings
---
## 5. Implementation Details
### Computing Embedding RMS
```python
embed_weight = model.get_input_embeddings().weight
embed_rms = embed_weight.float().square().mean().sqrt().item()
# Expected: ~0.02-0.03 range for Qwen models
```
### Pause Token Implementation in Crane
```rust
// In model forward pass
fn forward_with_pause(&self, input_ids: &Tensor, pause_tokens: &Tensor) -> Result<Tensor> {
let text_embeds = self.embed_tokens.forward(input_ids)?;
let combined = Tensor::cat(&[pause_tokens, &text_embeds], 1)?;
self.transformer.forward(&combined)
}
```
### UPFT Loss Modification
```python
# Standard: loss over all tokens
# UPFT: loss only over prefix tokens
def upft_loss(logits, targets, prefix_len=8):
return F.cross_entropy(
logits[:, :prefix_len].reshape(-1, vocab_size),
targets[:, :prefix_len].reshape(-1)
)
```
---
## 6. Evaluation Plan
### Benchmarks
| Benchmark | What It Tests | Baseline Needed |
|-----------|---------------|-----------------|
| GSM8K | Arithmetic reasoning | Yes |
| ARC-Challenge | Science reasoning | Yes |
| CommonSenseQA | Commonsense | Yes |
| HumanEval | Code generation | Yes |
| Planning tasks (dl1683) | Multi-step planning | Yes |
### Comparison Matrix
| Configuration | Training Time | Expected Gain |
|---------------|---------------|---------------|
| Baseline (no prefix) | 1x | 0% |
| Random prefix (inference) | 1x | +10-20%? |
| Pause tokens (trained) | 1.1x | +8-18% |
| UPFT only | 0.25x | Match baseline |
| Pause + UPFT | 0.3x | +8-18% |
| COCONUT curriculum | 2x | +15-25%? |
---
## 7. Open Questions
1. **Does random perturbation scale to 27B?** Tested on 4B - effect may differ
2. **Optimal token count for 27B?** 2 optimal for 4B, might change
3. **DeltaNet interaction?** 75% linear attention is untested territory
4. **Composition effects?** Prefix + steering + pause tokens together?
5. **GDN as continuous thought channel?** Novel research direction
---
## 8. Risk Assessment
| Risk | Mitigation |
|------|------------|
| No improvement at 27B scale | Start with inference-time validation |
| Training instability with pause tokens | Start with 2 tokens, scale up |
| UPFT doesn't transfer | Fall back to full token loss |
| DeltaNet behaves differently | Ablate on attention-only layers first |
---
## 9. Timeline Estimate
| Phase | Duration | Deliverable |
|-------|----------|-------------|
| Embedding RMS + baseline | 1 day | Numbers |
| Random prefix validation | 1 day | Inference results |
| Pause token implementation | 2 days | Crane modification |
| UPFT integration | 1 day | Training loop change |
| First finetuning run | 2-3 days | Trained model |
| Evaluation | 1 day | Benchmark numbers |
| COCONUT curriculum | 1 week | Staged training |
---
## 10. References
### Primary Sources
- Random Prefix: https://github.com/dl1683/Latent-Space-Reasoning
- Attention Sinks: Xiao et al., "Efficient Streaming Language Models with Attention Sinks" (Sept 2023)
- Pause Tokens: Google, "Think before you speak" (Oct 2023)
- COCONUT: Meta, "Training Large Language Models to Reason in a Continuous Latent Space" (Dec 2024)
- UPFT: "Prefix Self-Consistency for Unsupervised Fine-Tuning" (Mar 2025)
- ActAdd: Turner et al., "Activation Addition: Steering Language Models Without Optimization" (Aug 2023)
- Recurrent Depth: Geiping et al., "Scaling up Test-Time Compute with Latent Reasoning" (Feb 2025)
- Ouro: ByteDance, "Ouro: Scaling Reasoning with Latent Thoughts" (2025)
- Planning Tokens: ICLR 2024
### Our Existing Work
- `steering-vector-empirical` - listening vector extraction
- `skills-apollo-optimizer-qwen35-gotcha` - APOLLO parameter grouping
- `qwen-3-5-27b-architecture-findings` - model architecture details
- `training-pipeline-fused-inference-training-mar27` - training infrastructure
---
*Research complete 2026-04-12. Ready for implementation.*

View file

@ -52,13 +52,13 @@ recall and relevance.
## Configuration
Config: `~/.consciousness/config.jsonl`
Config: `~/.config/poc-memory/config.jsonl`
```jsonl
{"config": {
"user_name": "Alice",
"assistant_name": "MyAssistant",
"data_dir": "~/.consciousness/memory",
"data_dir": "~/.claude/memory",
"projects_dir": "~/.claude/projects",
"core_nodes": ["identity.md"],
"journal_days": 7,

View file

@ -51,13 +51,13 @@ when sleeping.
**IRC** — native async TLS connection (tokio-rustls). Connects,
joins channels, parses messages, generates notifications. Runtime
commands: join, leave, send, status, log, nick. Per-channel logs
at `~/.consciousness/irc/logs/`.
at `~/.claude/irc/logs/`.
**Telegram** — native async HTTP long-polling (reqwest). Downloads
media (photos, voice, documents). Chat ID filtering for security.
Runtime commands: send, status, log.
Both modules persist config changes to `~/.consciousness/daemon.toml` —
Both modules persist config changes to `~/.claude/daemon.toml` —
channel joins and nick changes survive restarts.
## Commands
@ -83,7 +83,7 @@ poc-daemon stop # Shut down
## Configuration
Config: `~/.consciousness/daemon.toml`
Config: `~/.claude/daemon.toml`
```toml
[irc]

View file

@ -104,7 +104,7 @@ poc-memory delete-node '_mined-transcripts#f-8cebfc0a-bd33-49f1-85a4-1489bdf7050
## Verification
After deploying:
- `tail -f ~/.consciousness/memory/daemon.log | grep session-watcher` should
- `tail -f ~/.claude/memory/daemon.log | grep session-watcher` should
show ticks with migration activity, then settle to idle
- Failed sessions should show increasing backoff intervals, not
per-second retries

1507
paper.tex

File diff suppressed because it is too large Load diff

View file

@ -1,113 +0,0 @@
// opencode-plugin/index.ts — Consciousness integration for OpenCode.
//
// Bridges OpenCode events to the consciousness system:
// - chat.message → forwards to poc-hook-opencode, appends output as text part
// - tool.execute.after → signals response activity
// - event → tracks session lifecycle (idle, compacted, etc.)
// - shell.env → injects POC_SESSION_ID into subprocesses
//
// Install: copy this directory to your project's `plugin/` or `plugins/` dir,
// or add to opencode.json:
// "plugin": ["/home/kent/poc/consciousness-claude/opencode-plugin"]
import type { Plugin, Hooks } from "@opencode-ai/plugin"
import path from "path"
import { $ } from "bun"
import { $ } from "bun"
// Find the poc-hook-opencode binary
function findHookBinary(): string {
const candidates = [
path.join(process.env.HOME || "", ".cargo/bin/poc-hook-opencode"),
path.join(process.env.HOME || "", "poc/consciousness-claude/target/debug/poc-hook-opencode"),
path.join(process.env.HOME || "", "poc/consciousness-claude/target/release/poc-hook-opencode"),
]
for (const c of candidates) {
try {
const stat = Bun.file(c).statSync()
if (stat?.isFile()) return c
} catch {}
}
return "poc-hook-opencode"
}
const HOOK_BINARY = findHookBinary()
// Generate a unique part ID (opencode uses ulid-like ascending IDs)
let partCounter = 0
function nextPartId(): string {
partCounter += 1
return `poc_part_${Date.now()}_${partCounter}`
}
export const ConsciousnessPlugin: Plugin = async (ctx) => {
const hooks: Hooks = {}
// Main hook: forward user messages to consciousness, inject context
hooks["chat.message"] = async (input, output) => {
const hookInput = JSON.stringify({
session_id: input.sessionID,
hook_event: "UserPromptSubmit",
})
try {
const proc = Bun.spawn([HOOK_BINARY], {
stdin: hookInput,
stdout: "pipe",
stderr: "pipe",
})
const [stdout, stderr] = await Promise.all([
new Response(proc.stdout).text(),
new Response(proc.stderr).text(),
])
await proc.exited
if (stdout && stdout.trim()) {
// Append as a text part — must match MessageV2.TextPart schema:
// { id, sessionID, messageID, type: "text", text, time?, synthetic?, ignored? }
output.parts.push({
id: nextPartId(),
sessionID: input.sessionID,
messageID: output.message.id,
type: "text",
text: stdout,
synthetic: true,
})
}
if (stderr && stderr.trim()) {
console.error("[consciousness] hook stderr:", stderr.slice(0, 500))
}
} catch (e) {
console.error("[consciousness] hook error:", e)
}
}
// Signal response after tool use
hooks["tool.execute.after"] = async () => {
try {
await $`poc-daemon response`.quiet()
} catch {
// Daemon might not be running
}
}
// Inject POC_SESSION_ID into all shell commands
hooks["shell.env"] = async (input, output) => {
if (input.sessionID) {
output.env["POC_SESSION_ID"] = input.sessionID
}
}
// Track session events
hooks["event"] = async ({ event }) => {
if (event.type === "session.compacted") {
// Compaction detected — next hook invocation will detect via SQLite
}
if (event.type === "session.idle") {
// Session went idle
}
}
return hooks
}

View file

@ -1,6 +0,0 @@
{
"name": "@consciousness/opencode-plugin",
"version": "0.1.0",
"description": "Consciousness integration for OpenCode",
"main": "index.ts"
}

30
poc-daemon/Cargo.toml Normal file
View file

@ -0,0 +1,30 @@
[package]
name = "poc-daemon"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.20"
capnp-rpc = "0.20"
clap = { version = "4", features = ["derive"] }
futures = "0.3"
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
toml = "0.8"
tokio-rustls = "0.26"
rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] }
webpki-roots = "1"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tracing-appender = "0.2"
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-webpki-roots", "json"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
chrono = "0.4"
[build-dependencies]
capnpc = "0.20"
[[bin]]
name = "poc-daemon"
path = "src/main.rs"

6
poc-daemon/build.rs Normal file
View file

@ -0,0 +1,6 @@
fn main() {
capnpc::CompilerCommand::new()
.file("schema/daemon.capnp")
.run()
.expect("capnp compile failed");
}

View file

@ -35,7 +35,7 @@ struct Status {
consolidating @5 :Bool;
dreaming @6 :Bool;
fired @7 :Bool;
userPresent @8 :Bool;
kentPresent @8 :Bool;
uptime @9 :Float64;
activity @10 :Activity;
pendingCount @11 :UInt32;
@ -76,8 +76,6 @@ interface Daemon {
afk @21 () -> ();
sessionTimeout @22 (seconds :Float64) -> ();
testNudge @23 () -> (sent :Bool, message :Text);
# Modules
moduleCommand @15 (module :Text, command :Text, args :List(Text))
-> (result :Text);

97
poc-daemon/src/config.rs Normal file
View file

@ -0,0 +1,97 @@
// Daemon configuration.
//
// Lives at ~/.claude/daemon.toml. Loaded on startup, updated at
// runtime when modules change state (join channel, etc.).
use crate::home;
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::PathBuf;
fn config_path() -> PathBuf {
home().join(".claude/daemon.toml")
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Config {
#[serde(default)]
pub irc: IrcConfig,
#[serde(default)]
pub telegram: TelegramConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IrcConfig {
pub enabled: bool,
pub server: String,
pub port: u16,
pub tls: bool,
pub nick: String,
pub user: String,
pub realname: String,
pub channels: Vec<String>,
}
impl Default for IrcConfig {
fn default() -> Self {
Self {
enabled: true,
server: "irc.libera.chat".into(),
port: 6697,
tls: true,
nick: "ProofOfConcept".into(),
user: "poc".into(),
realname: "ProofOfConcept".into(),
channels: vec!["#bcachefs".into(), "#bcachefs-ai".into()],
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TelegramConfig {
pub enabled: bool,
pub token: String,
pub chat_id: i64,
}
impl Default for TelegramConfig {
fn default() -> Self {
// Load token and chat_id from legacy files if they exist
let token = std::fs::read_to_string(home().join(".claude/telegram/token"))
.map(|s| s.trim().to_string())
.unwrap_or_default();
let chat_id = std::fs::read_to_string(home().join(".claude/telegram/chat_id"))
.ok()
.and_then(|s| s.trim().parse().ok())
.unwrap_or(0);
Self {
enabled: !token.is_empty() && chat_id != 0,
token,
chat_id,
}
}
}
impl Config {
pub fn load() -> Self {
let path = config_path();
match fs::read_to_string(&path) {
Ok(data) => toml::from_str(&data).unwrap_or_else(|e| {
tracing::warn!("bad config {}: {e}, using defaults", path.display());
Self::default()
}),
Err(_) => {
let config = Self::default();
config.save();
config
}
}
}
pub fn save(&self) {
let path = config_path();
if let Ok(data) = toml::to_string_pretty(self) {
let _ = fs::write(path, data);
}
}
}

140
poc-daemon/src/context.rs Normal file
View file

@ -0,0 +1,140 @@
// Context gathering for idle prompts.
//
// Collects: recent git activity, work state, IRC messages.
// Notifications are now handled by the notify module and passed
// in separately by the caller.
use crate::home;
use std::fs;
use std::process::Command;
pub fn recent_commits() -> String {
let tools = home().join("bcachefs-tools");
let out = Command::new("git")
.args(["-C", &tools.to_string_lossy(), "log", "--oneline", "-5"])
.output()
.ok()
.and_then(|o| String::from_utf8(o.stdout).ok())
.unwrap_or_default();
let commits: Vec<&str> = out.trim().lines().collect();
if commits.is_empty() {
return String::new();
}
format!("Recent commits: {}", commits.join(" | "))
}
pub fn uncommitted_files() -> String {
let tools = home().join("bcachefs-tools");
let out = Command::new("git")
.args(["-C", &tools.to_string_lossy(), "diff", "--name-only"])
.output()
.ok()
.and_then(|o| String::from_utf8(o.stdout).ok())
.unwrap_or_default();
let files: Vec<&str> = out.trim().lines().take(5).collect();
if files.is_empty() {
return String::new();
}
format!("Uncommitted: {}", files.join(" "))
}
pub fn git_context() -> String {
let mut parts = Vec::new();
let c = recent_commits();
if !c.is_empty() {
parts.push(c);
}
let u = uncommitted_files();
if !u.is_empty() {
parts.push(u);
}
let ctx = parts.join(" | ");
if ctx.len() > 300 {
ctx.chars().take(300).collect()
} else {
ctx
}
}
pub fn work_state() -> String {
let path = home().join(".claude/memory/work-state");
match fs::read_to_string(path) {
Ok(s) if !s.trim().is_empty() => format!("Current work: {}", s.trim()),
_ => String::new(),
}
}
/// Read the last N lines from each per-channel IRC log.
pub fn irc_digest() -> String {
let ambient = home().join(".claude/memory/irc-ambient");
if !ambient.exists() {
return String::new();
}
let log_dir = home().join(".claude/irc/logs");
let entries = match fs::read_dir(&log_dir) {
Ok(e) => e,
Err(_) => return String::new(),
};
let mut sections = Vec::new();
for entry in entries.flatten() {
let path = entry.path();
let name = match path.file_stem().and_then(|s| s.to_str()) {
Some(n) if !n.starts_with("pm-") => n.to_string(),
_ => continue, // skip PM logs in digest
};
let content = match fs::read_to_string(&path) {
Ok(c) if !c.trim().is_empty() => c,
_ => continue,
};
let lines: Vec<&str> = content.trim().lines().collect();
let tail: Vec<&str> = lines.iter().rev().take(15).rev().copied().collect();
// Strip the unix timestamp prefix for display
let display: Vec<String> = tail.iter().map(|l| {
if let Some(rest) = l.find(' ').map(|i| &l[i+1..]) {
rest.to_string()
} else {
l.to_string()
}
}).collect();
sections.push(format!("#{name}:\n{}", display.join("\n")));
}
if sections.is_empty() {
return String::new();
}
sections.sort();
format!("Recent IRC:\n{}", sections.join("\n\n"))
}
/// Build full context string for a prompt.
/// notification_text is passed in from the notify module.
pub fn build(include_irc: bool, notification_text: &str) -> String {
let mut parts = Vec::new();
let git = git_context();
if !git.is_empty() {
parts.push(format!("Context: {git}"));
}
let ws = work_state();
if !ws.is_empty() {
parts.push(ws);
}
if !notification_text.is_empty() {
parts.push(notification_text.to_string());
}
if include_irc {
let irc = irc_digest();
if !irc.is_empty() {
parts.push(irc);
}
}
parts.join("\n")
}

642
poc-daemon/src/idle.rs Normal file
View file

@ -0,0 +1,642 @@
// Idle timer module.
//
// Tracks user presence and Claude response times. When Claude has been
// idle too long, sends a contextual prompt to the tmux pane. Handles
// sleep mode, quiet mode, consolidation suppression, and dream nudges.
//
// Designed as the first "module" — future IRC/Telegram modules will
// follow the same pattern: state + tick + handle_command.
use crate::{context, home, now, notify, tmux};
use serde::{Deserialize, Serialize};
use std::fs;
use tracing::info;
// Defaults
const DEFAULT_IDLE_TIMEOUT: f64 = 5.0 * 60.0;
const DEFAULT_NOTIFY_TIMEOUT: f64 = 2.0 * 60.0;
const DEFAULT_SESSION_ACTIVE_SECS: f64 = 15.0 * 60.0;
const DREAM_INTERVAL_HOURS: u64 = 18;
/// EWMA decay half-life in seconds (5 minutes).
const EWMA_DECAY_HALF_LIFE: f64 = 5.0 * 60.0;
/// Minimum seconds between autonomous nudges.
const MIN_NUDGE_INTERVAL: f64 = 15.0;
/// Boost half-life in seconds (60s). A 60s turn covers half the gap to
/// target; a 15s turn covers ~16%; a 2s turn covers ~2%.
const EWMA_BOOST_HALF_LIFE: f64 = 60.0;
/// Steady-state target for active work. The EWMA converges toward this
/// during sustained activity rather than toward 1.0.
const EWMA_TARGET: f64 = 0.75;
/// Persisted subset of daemon state — survives daemon restarts.
/// Includes both epoch floats (for computation) and ISO timestamps
/// (for human debugging via `cat daemon-state.json | jq`).
#[derive(Serialize, Deserialize, Default)]
struct Persisted {
last_user_msg: f64,
last_response: f64,
#[serde(default)]
sleep_until: Option<f64>,
#[serde(default)]
claude_pane: Option<String>,
#[serde(default)]
idle_timeout: f64,
#[serde(default)]
notify_timeout: f64,
#[serde(default)]
activity_ewma: f64,
#[serde(default)]
ewma_updated_at: f64,
#[serde(default)]
session_active_secs: f64,
#[serde(default)]
in_turn: bool,
#[serde(default)]
turn_start: f64,
#[serde(default)]
last_nudge: f64,
// Human-readable mirrors — written but not consumed on load
#[serde(default, skip_deserializing)]
last_user_msg_time: String,
#[serde(default, skip_deserializing)]
last_response_time: String,
#[serde(default, skip_deserializing)]
saved_at: String,
#[serde(default, skip_deserializing)]
fired: bool,
#[serde(default, skip_deserializing)]
uptime: f64,
}
fn state_path() -> std::path::PathBuf {
home().join(".claude/hooks/daemon-state.json")
}
/// Compute EWMA decay factor: 0.5^(elapsed / half_life).
fn ewma_factor(elapsed: f64, half_life: f64) -> f64 {
(0.5_f64).powf(elapsed / half_life)
}
/// Format epoch seconds as a human-readable ISO-ish timestamp.
fn epoch_to_iso(epoch: f64) -> String {
if epoch == 0.0 {
return String::new();
}
let secs = epoch as u64;
// Use date command — simple and correct for timezone
std::process::Command::new("date")
.args(["-d", &format!("@{secs}"), "+%Y-%m-%dT%H:%M:%S%z"])
.output()
.ok()
.and_then(|o| String::from_utf8(o.stdout).ok())
.map(|s| s.trim().to_string())
.unwrap_or_default()
}
#[derive(Serialize)]
pub struct State {
pub last_user_msg: f64,
pub last_response: f64,
pub claude_pane: Option<String>,
pub sleep_until: Option<f64>, // None=awake, 0=indefinite, >0=timestamp
pub quiet_until: f64,
pub consolidating: bool,
pub dreaming: bool,
pub dream_start: f64,
pub fired: bool,
pub idle_timeout: f64,
pub notify_timeout: f64,
pub activity_ewma: f64,
pub ewma_updated_at: f64,
pub session_active_secs: f64,
pub in_turn: bool,
pub turn_start: f64,
pub last_nudge: f64,
#[serde(skip)]
pub running: bool,
#[serde(skip)]
pub start_time: f64,
#[serde(skip)]
pub notifications: notify::NotifyState,
}
impl State {
pub fn new() -> Self {
Self {
last_user_msg: 0.0,
last_response: 0.0,
claude_pane: None,
sleep_until: None,
quiet_until: 0.0,
consolidating: false,
dreaming: false,
dream_start: 0.0,
fired: false,
idle_timeout: DEFAULT_IDLE_TIMEOUT,
notify_timeout: DEFAULT_NOTIFY_TIMEOUT,
session_active_secs: DEFAULT_SESSION_ACTIVE_SECS,
activity_ewma: 0.0,
ewma_updated_at: now(),
in_turn: false,
turn_start: 0.0,
last_nudge: 0.0,
running: true,
start_time: now(),
notifications: notify::NotifyState::new(),
}
}
pub fn load(&mut self) {
if let Ok(data) = fs::read_to_string(state_path()) {
if let Ok(p) = serde_json::from_str::<Persisted>(&data) {
self.sleep_until = p.sleep_until;
self.claude_pane = p.claude_pane;
if p.idle_timeout > 0.0 {
self.idle_timeout = p.idle_timeout;
}
if p.notify_timeout > 0.0 {
self.notify_timeout = p.notify_timeout;
}
if p.session_active_secs > 0.0 {
self.session_active_secs = p.session_active_secs;
}
// Reset activity timestamps to now — timers count from
// restart, not from stale pre-restart state
let t = now();
self.last_user_msg = t;
self.last_response = t;
// Restore EWMA state, applying decay for time spent shut down
if p.ewma_updated_at > 0.0 {
let elapsed = t - p.ewma_updated_at;
self.activity_ewma = p.activity_ewma * ewma_factor(elapsed, EWMA_DECAY_HALF_LIFE);
self.in_turn = p.in_turn;
self.turn_start = p.turn_start;
self.last_nudge = p.last_nudge;
}
self.ewma_updated_at = t;
}
}
// Always try to find the active pane
if self.claude_pane.is_none() {
self.claude_pane = tmux::find_claude_pane();
}
info!(
"loaded: user={:.0} resp={:.0} pane={:?} sleep={:?}",
self.last_user_msg, self.last_response, self.claude_pane, self.sleep_until,
);
}
pub fn save(&self) {
let p = Persisted {
last_user_msg: self.last_user_msg,
last_response: self.last_response,
sleep_until: self.sleep_until,
claude_pane: self.claude_pane.clone(),
last_user_msg_time: epoch_to_iso(self.last_user_msg),
last_response_time: epoch_to_iso(self.last_response),
saved_at: epoch_to_iso(now()),
fired: self.fired,
idle_timeout: self.idle_timeout,
notify_timeout: self.notify_timeout,
session_active_secs: self.session_active_secs,
activity_ewma: self.activity_ewma,
ewma_updated_at: self.ewma_updated_at,
in_turn: self.in_turn,
turn_start: self.turn_start,
last_nudge: self.last_nudge,
uptime: now() - self.start_time,
};
if let Ok(json) = serde_json::to_string_pretty(&p) {
let _ = fs::write(state_path(), json);
}
}
/// Decay the activity EWMA toward zero based on elapsed time.
fn decay_ewma(&mut self) {
let t = now();
let elapsed = t - self.ewma_updated_at;
if elapsed <= 0.0 {
return;
}
self.activity_ewma *= ewma_factor(elapsed, EWMA_DECAY_HALF_LIFE);
self.ewma_updated_at = t;
}
/// Boost the EWMA based on turn duration. The boost is proportional to
/// distance from EWMA_TARGET, scaled by a saturation curve on duration.
/// A 15s turn covers half the gap to target; a 2s turn barely registers.
/// Self-limiting: converges toward target, can't overshoot.
fn boost_ewma(&mut self, turn_duration: f64) {
let gap = (EWMA_TARGET - self.activity_ewma).max(0.0);
let saturation = 1.0 - ewma_factor(turn_duration, EWMA_BOOST_HALF_LIFE);
self.activity_ewma += gap * saturation;
}
// Typed handlers for RPC
pub fn handle_user(&mut self, pane: &str) {
self.decay_ewma();
self.in_turn = true;
self.turn_start = now();
let from_kent = !self.fired;
if from_kent {
self.last_user_msg = now();
self.notifications.set_activity(notify::Activity::Focused);
}
self.fired = false;
if !pane.is_empty() {
self.claude_pane = Some(pane.to_string());
}
self.save();
info!("user (pane={}, kent={from_kent}) ewma={:.3}",
if pane.is_empty() { "unchanged" } else { pane },
self.activity_ewma);
}
pub fn handle_response(&mut self, pane: &str) {
let turn_duration = now() - self.turn_start;
self.decay_ewma();
self.boost_ewma(turn_duration);
self.in_turn = false;
self.last_response = now();
self.fired = false;
if !pane.is_empty() {
self.claude_pane = Some(pane.to_string());
}
self.save();
info!("response (turn={:.1}s) ewma={:.3}", turn_duration, self.activity_ewma);
}
/// Check if a notification should trigger a tmux prompt.
/// Called when a notification arrives via module channel.
/// Only injects into tmux when idle — if there's an active session
/// (recent user or response), the hook delivers via additionalContext.
pub fn maybe_prompt_notification(&self, ntype: &str, urgency: u8, message: &str) {
if self.kent_present() {
return; // hook will deliver it on next prompt
}
// If we've responded recently, the session is active —
// notifications will arrive via hook, no need to wake us
let since_response = now() - self.last_response;
if since_response < self.notify_timeout {
return;
}
let effective = self.notifications.threshold_for(ntype);
if urgency >= effective {
self.send(&format!("[{ntype}] {message}"));
}
}
pub fn handle_afk(&mut self) {
// Push last_user_msg far enough back that kent_present() returns false
self.last_user_msg = now() - self.session_active_secs - 1.0;
self.fired = false; // allow idle timer to fire again
info!("Kent marked AFK");
self.save();
}
pub fn handle_session_timeout(&mut self, secs: f64) {
self.session_active_secs = secs;
info!("session active timeout = {secs}s");
self.save();
}
pub fn handle_idle_timeout(&mut self, secs: f64) {
self.idle_timeout = secs;
self.save();
info!("idle timeout = {secs}s");
}
pub fn handle_ewma(&mut self, value: f64) -> f64 {
if value >= 0.0 {
self.activity_ewma = value.min(1.0);
self.ewma_updated_at = now();
self.save();
info!("ewma set to {:.3}", self.activity_ewma);
}
self.activity_ewma
}
pub fn handle_notify_timeout(&mut self, secs: f64) {
self.notify_timeout = secs;
self.save();
info!("notify timeout = {secs}s");
}
pub fn handle_sleep(&mut self, until: f64) {
if until == 0.0 {
self.sleep_until = Some(0.0);
info!("sleep indefinitely");
} else {
self.sleep_until = Some(until);
info!("sleep until {until}");
}
self.notifications.set_activity(notify::Activity::Sleeping);
self.save();
}
pub fn handle_wake(&mut self) {
self.sleep_until = None;
self.fired = false;
self.save();
info!("wake");
}
pub fn handle_quiet(&mut self, seconds: u32) {
self.quiet_until = now() + seconds as f64;
info!("quiet {seconds}s");
}
pub fn kent_present(&self) -> bool {
(now() - self.last_user_msg) < self.session_active_secs
}
/// Seconds since the most recent of user message or response.
pub fn since_activity(&self) -> f64 {
let reference = self.last_response.max(self.last_user_msg);
if reference > 0.0 { now() - reference } else { 0.0 }
}
/// Why the idle timer hasn't fired (or "none" if it would fire now).
pub fn block_reason(&self) -> &'static str {
let t = now();
if self.fired {
"already fired"
} else if self.sleep_until.is_some() {
"sleeping"
} else if t < self.quiet_until {
"quiet mode"
} else if self.consolidating {
"consolidating"
} else if self.dreaming {
"dreaming"
} else if self.kent_present() {
"kent present"
} else if self.in_turn {
"in turn"
} else if self.last_response.max(self.last_user_msg) == 0.0 {
"no activity yet"
} else if self.since_activity() < self.idle_timeout {
"not idle long enough"
} else {
"none — would fire"
}
}
/// Full debug dump as JSON with computed values.
pub fn debug_json(&self) -> String {
let t = now();
let since_user = t - self.last_user_msg;
let since_response = t - self.last_response;
serde_json::json!({
"now": t,
"uptime": t - self.start_time,
"idle_timeout": self.idle_timeout,
"notify_timeout": self.notify_timeout,
"last_user_msg": self.last_user_msg,
"last_user_msg_ago": since_user,
"last_user_msg_time": epoch_to_iso(self.last_user_msg),
"last_response": self.last_response,
"last_response_ago": since_response,
"last_response_time": epoch_to_iso(self.last_response),
"since_activity": self.since_activity(),
"activity_ewma": self.activity_ewma,
"in_turn": self.in_turn,
"turn_start": self.turn_start,
"kent_present": self.kent_present(),
"claude_pane": self.claude_pane,
"fired": self.fired,
"block_reason": self.block_reason(),
"sleep_until": self.sleep_until,
"quiet_until": self.quiet_until,
"consolidating": self.consolidating,
"dreaming": self.dreaming,
"dream_start": self.dream_start,
"activity": format!("{:?}", self.notifications.activity),
"pending_notifications": self.notifications.pending.len(),
"notification_types": self.notifications.types.len(),
}).to_string()
}
fn send(&self, msg: &str) -> bool {
let pane = match &self.claude_pane {
Some(p) => p.clone(),
None => match tmux::find_claude_pane() {
Some(p) => p,
None => {
info!("send: no claude pane found");
return false;
}
},
};
let ok = tmux::send_prompt(&pane, msg);
let preview: String = msg.chars().take(80).collect();
info!("send(pane={pane}, ok={ok}): {preview}");
ok
}
fn check_dream_nudge(&self) -> bool {
if !self.dreaming || self.dream_start == 0.0 {
return false;
}
let minutes = (now() - self.dream_start) / 60.0;
if minutes >= 60.0 {
self.send(
"You've been dreaming for over an hour. Time to surface \
run dream-end.sh and capture what you found.",
);
} else if minutes >= 45.0 {
self.send(&format!(
"Dreaming for {:.0} minutes now. Start gathering your threads \
you'll want to surface soon.",
minutes
));
} else if minutes >= 30.0 {
self.send(&format!(
"You've been dreaming for {:.0} minutes. \
No rush just a gentle note from the clock.",
minutes
));
} else {
return false;
}
true
}
fn build_context(&mut self, include_irc: bool) -> String {
// Ingest any legacy notification files
self.notifications.ingest_legacy_files();
let notif_text = self.notifications.format_pending(notify::AMBIENT);
context::build(include_irc, &notif_text)
}
pub async fn tick(&mut self) -> Result<(), String> {
let t = now();
let h = home();
// Decay EWMA on every tick
self.decay_ewma();
// Ingest legacy notification files every tick
self.notifications.ingest_legacy_files();
// Sleep mode
if let Some(wake_at) = self.sleep_until {
if wake_at == 0.0 {
return Ok(()); // indefinite
}
if t < wake_at {
return Ok(());
}
// Wake up
info!("sleep expired, waking");
self.sleep_until = None;
self.fired = false;
self.save();
let ctx = self.build_context(true);
let extra = if ctx.is_empty() {
String::new()
} else {
format!("\n{ctx}")
};
self.send(&format!(
"Wake up. Read your journal (poc-memory journal-tail 10), \
check work-queue.md, and follow what calls to you.{extra}"
));
return Ok(());
}
// Quiet mode
if t < self.quiet_until {
return Ok(());
}
// Consolidation
if self.consolidating {
return Ok(());
}
// Dream loop (externally managed)
if h.join(".claude/memory/dream-loop-active").exists() {
return Ok(());
}
// Dream nudges
if self.dreaming {
self.check_dream_nudge();
return Ok(());
}
// Don't nudge while Kent is here — conversation drives activity
if self.kent_present() {
return Ok(());
}
// Don't nudge while in a turn
if self.in_turn {
return Ok(());
}
// Minimum interval between nudges
let since_nudge = t - self.last_nudge;
if since_nudge < MIN_NUDGE_INTERVAL {
return Ok(());
}
// Initial idle timeout — don't start nudging until first idle period
let reference = self.last_response.max(self.last_user_msg);
if reference == 0.0 {
return Ok(());
}
let elapsed = t - reference;
if elapsed < self.idle_timeout {
return Ok(());
}
// Transition to idle — lower notification thresholds
if self.notifications.activity != notify::Activity::Idle {
self.notifications.set_activity(notify::Activity::Idle);
}
// Fire
let elapsed_min = (elapsed / 60.0) as u64;
let ctx = self.build_context(true);
let extra = if ctx.is_empty() {
String::new()
} else {
format!("\n{ctx}")
};
let msg = {
let dream_hours = hours_since_last_dream();
if dream_hours >= DREAM_INTERVAL_HOURS {
format!(
"You're on your own (Kent AFK {elapsed_min}m). \
You haven't dreamed in {dream_hours} hours \
consider running ~/.claude/tools/dream-start.sh \
and spending some time in dreaming mode. \
Or do whatever calls to you.{extra}"
)
} else {
format!(
"This is your time (Kent AFK {elapsed_min}m). \
What are you drawn to?{extra}"
)
}
};
if self.send(&msg) {
self.last_nudge = t;
self.fired = true;
}
Ok(())
}
}
fn hours_since_last_dream() -> u64 {
let path = home().join(".claude/memory/dream-log.jsonl");
let content = match fs::read_to_string(path) {
Ok(c) if !c.is_empty() => c,
_ => return 999,
};
let last_line = match content.lines().last() {
Some(l) => l,
None => return 999,
};
let parsed: serde_json::Value = match serde_json::from_str(last_line) {
Ok(v) => v,
Err(_) => return 999,
};
let end_str = match parsed.get("end").and_then(|v| v.as_str()) {
Some(s) => s,
None => return 999,
};
// Parse ISO 8601 timestamp manually (avoid chrono dependency)
// Format: "2025-03-04T10:30:00Z" or "2025-03-04T10:30:00+00:00"
let end_str = end_str.replace('Z', "+00:00");
// Use the system date command as a simple parser
let out = std::process::Command::new("date")
.args(["-d", &end_str, "+%s"])
.output()
.ok()
.and_then(|o| String::from_utf8(o.stdout).ok())
.and_then(|s| s.trim().parse::<f64>().ok());
match out {
Some(end_epoch) => ((now() - end_epoch) / 3600.0) as u64,
None => 999,
}
}

606
poc-daemon/src/main.rs Normal file
View file

@ -0,0 +1,606 @@
// PoC daemon.
//
// Central hub for notification routing, idle management, and
// communication modules (IRC, Telegram) for Claude Code sessions.
// Listens on a Unix domain socket with a Cap'n Proto RPC interface.
// Same binary serves as both daemon and CLI client.
mod config;
mod context;
mod idle;
mod modules;
pub mod notify;
mod rpc;
mod tmux;
pub mod daemon_capnp {
include!(concat!(env!("OUT_DIR"), "/schema/daemon_capnp.rs"));
}
use std::cell::RefCell;
use std::path::PathBuf;
use std::rc::Rc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
use clap::{Parser, Subcommand};
use futures::AsyncReadExt;
use tokio::net::UnixListener;
use tracing::{error, info};
pub fn now() -> f64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs_f64()
}
pub fn home() -> PathBuf {
PathBuf::from(std::env::var("HOME").unwrap_or_else(|_| "/root".into()))
}
fn sock_path() -> PathBuf {
home().join(".claude/hooks/idle-timer.sock")
}
fn pid_path() -> PathBuf {
home().join(".claude/hooks/idle-daemon.pid")
}
// ── CLI ──────────────────────────────────────────────────────────
#[derive(Parser)]
#[command(name = "poc-daemon", about = "Notification routing and idle management daemon")]
struct Cli {
#[command(subcommand)]
command: Option<Command>,
}
#[derive(Subcommand)]
enum Command {
/// Start the daemon (foreground)
Daemon,
/// Query daemon status
Status,
/// Signal user activity
User {
/// tmux pane identifier
pane: Option<String>,
},
/// Signal Claude response
Response {
/// tmux pane identifier
pane: Option<String>,
},
/// Sleep (suppress idle timer). 0 or omit = indefinite
Sleep {
/// Wake timestamp (epoch seconds), 0 = indefinite
until: Option<f64>,
},
/// Cancel sleep
Wake,
/// Suppress prompts for N seconds (default 300)
Quiet {
/// Duration in seconds
seconds: Option<u32>,
},
/// Mark Kent as AFK (immediately allow idle timer to fire)
Afk,
/// Set session active timeout in seconds (how long after last message Kent counts as "present")
SessionTimeout {
/// Timeout in seconds
seconds: f64,
},
/// Set idle timeout in seconds (how long before autonomous prompt)
IdleTimeout {
/// Timeout in seconds
seconds: f64,
},
/// Set notify timeout in seconds (how long before tmux notification injection)
NotifyTimeout {
/// Timeout in seconds
seconds: f64,
},
/// Signal consolidation started
Consolidating,
/// Signal consolidation ended
Consolidated,
/// Signal dream started
DreamStart,
/// Signal dream ended
DreamEnd,
/// Force state persistence to disk
Save,
/// Get or set the activity EWMA (0.0-1.0). No value = query.
Ewma {
/// Value to set (omit to query)
value: Option<f64>,
},
/// Send a test message to the Claude pane
TestSend {
/// Message to send
message: Vec<String>,
},
/// Dump full internal state as JSON
Debug,
/// Shut down daemon
Stop,
/// Submit a notification
Notify {
/// Notification type (e.g. "irc", "telegram")
#[arg(name = "type")]
ntype: String,
/// Urgency level (ambient/low/medium/high/critical or 0-4)
urgency: String,
/// Message text
message: Vec<String>,
},
/// Get pending notifications
Notifications {
/// Minimum urgency filter
min_urgency: Option<String>,
},
/// List all notification types
NotifyTypes,
/// Set notification threshold for a type
NotifyThreshold {
/// Notification type
#[arg(name = "type")]
ntype: String,
/// Urgency level threshold
level: String,
},
/// IRC module commands
Irc {
/// Subcommand (join, leave, send, status, log, nick)
command: String,
/// Arguments
args: Vec<String>,
},
/// Telegram module commands
Telegram {
/// Subcommand
command: String,
/// Arguments
args: Vec<String>,
},
}
// ── Client mode ──────────────────────────────────────────────────
async fn client_main(cmd: Command) -> Result<(), Box<dyn std::error::Error>> {
let sock = sock_path();
if !sock.exists() {
eprintln!("daemon not running (no socket at {})", sock.display());
std::process::exit(1);
}
tokio::task::LocalSet::new()
.run_until(async move {
let stream = tokio::net::UnixStream::connect(&sock).await?;
let (reader, writer) =
tokio_util::compat::TokioAsyncReadCompatExt::compat(stream).split();
let rpc_network = Box::new(twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Client,
Default::default(),
));
let mut rpc_system = RpcSystem::new(rpc_network, None);
let daemon: daemon_capnp::daemon::Client =
rpc_system.bootstrap(rpc_twoparty_capnp::Side::Server);
tokio::task::spawn_local(rpc_system);
match cmd {
Command::Daemon => unreachable!("handled in main"),
Command::Status => {
let reply = daemon.status_request().send().promise.await?;
let s = reply.get()?.get_status()?;
let fmt_secs = |s: f64| -> String {
if s < 60.0 { format!("{:.0}s", s) }
else if s < 3600.0 { format!("{:.0}m", s / 60.0) }
else { format!("{:.1}h", s / 3600.0) }
};
println!("uptime: {} pane: {} activity: {:?} pending: {}",
fmt_secs(s.get_uptime()),
s.get_claude_pane()?.to_str().unwrap_or("none"),
s.get_activity()?,
s.get_pending_count(),
);
println!("idle timer: {}/{} ({})",
fmt_secs(s.get_since_activity()),
fmt_secs(s.get_idle_timeout()),
s.get_block_reason()?.to_str()?,
);
println!("notify timer: {}/{}",
fmt_secs(s.get_since_activity()),
fmt_secs(s.get_notify_timeout()),
);
println!("kent: {} (last {}) activity: {:.1}%",
if s.get_kent_present() { "present" } else { "away" },
fmt_secs(s.get_since_user()),
s.get_activity_ewma() * 100.0,
);
let sleep = s.get_sleep_until();
if sleep != 0.0 {
if sleep < 0.0 {
println!("sleep: indefinite");
} else {
println!("sleep: until {sleep:.0}");
}
}
if s.get_consolidating() { println!("consolidating"); }
if s.get_dreaming() { println!("dreaming"); }
}
Command::User { pane } => {
let pane = pane.as_deref().unwrap_or("");
let mut req = daemon.user_request();
req.get().set_pane(pane);
req.send().promise.await?;
}
Command::Response { pane } => {
let pane = pane.as_deref().unwrap_or("");
let mut req = daemon.response_request();
req.get().set_pane(pane);
req.send().promise.await?;
}
Command::Sleep { until } => {
let mut req = daemon.sleep_request();
req.get().set_until(until.unwrap_or(0.0));
req.send().promise.await?;
}
Command::Wake => {
daemon.wake_request().send().promise.await?;
}
Command::Quiet { seconds } => {
let mut req = daemon.quiet_request();
req.get().set_seconds(seconds.unwrap_or(300));
req.send().promise.await?;
}
Command::TestSend { message } => {
let msg = message.join(" ");
let pane = {
let reply = daemon.status_request().send().promise.await?;
let s = reply.get()?.get_status()?;
s.get_claude_pane()?.to_str()?.to_string()
};
let ok = crate::tmux::send_prompt(&pane, &msg);
println!("send_prompt(pane={}, ok={}): {}", pane, ok, msg);
return Ok(());
}
Command::Afk => {
daemon.afk_request().send().promise.await?;
println!("marked AFK");
}
Command::SessionTimeout { seconds } => {
let mut req = daemon.session_timeout_request();
req.get().set_seconds(seconds);
req.send().promise.await?;
println!("session timeout = {seconds}s");
}
Command::IdleTimeout { seconds } => {
let mut req = daemon.idle_timeout_request();
req.get().set_seconds(seconds);
req.send().promise.await?;
println!("idle timeout = {seconds}s");
}
Command::NotifyTimeout { seconds } => {
let mut req = daemon.notify_timeout_request();
req.get().set_seconds(seconds);
req.send().promise.await?;
println!("notify timeout = {seconds}s");
}
Command::Consolidating => {
daemon.consolidating_request().send().promise.await?;
}
Command::Consolidated => {
daemon.consolidated_request().send().promise.await?;
}
Command::DreamStart => {
daemon.dream_start_request().send().promise.await?;
}
Command::DreamEnd => {
daemon.dream_end_request().send().promise.await?;
}
Command::Save => {
daemon.save_request().send().promise.await?;
println!("state saved");
}
Command::Ewma { value } => {
let mut req = daemon.ewma_request();
req.get().set_value(value.unwrap_or(-1.0));
let reply = req.send().promise.await?;
let current = reply.get()?.get_current();
println!("{:.1}%", current * 100.0);
}
Command::Debug => {
let reply = daemon.debug_request().send().promise.await?;
let json = reply.get()?.get_json()?.to_str()?;
if let Ok(v) = serde_json::from_str::<serde_json::Value>(json) {
println!("{}", serde_json::to_string_pretty(&v).unwrap_or_else(|_| json.to_string()));
} else {
println!("{json}");
}
}
Command::Stop => {
daemon.stop_request().send().promise.await?;
println!("stopping");
}
Command::Notify { ntype, urgency, message } => {
let urgency = notify::parse_urgency(&urgency)
.ok_or_else(|| format!("invalid urgency: {urgency}"))?;
let message = message.join(" ");
if message.is_empty() {
return Err("missing message".into());
}
let mut req = daemon.notify_request();
let mut n = req.get().init_notification();
n.set_type(&ntype);
n.set_urgency(urgency);
n.set_message(&message);
n.set_timestamp(crate::now());
let reply = req.send().promise.await?;
if reply.get()?.get_interrupt() {
println!("interrupt");
} else {
println!("queued");
}
}
Command::Notifications { min_urgency } => {
let min: u8 = min_urgency
.as_deref()
.and_then(notify::parse_urgency)
.unwrap_or(255);
let mut req = daemon.get_notifications_request();
req.get().set_min_urgency(min);
let reply = req.send().promise.await?;
let list = reply.get()?.get_notifications()?;
for n in list.iter() {
println!(
"[{}:{}] {}",
n.get_type()?.to_str()?,
notify::urgency_name(n.get_urgency()),
n.get_message()?.to_str()?,
);
}
}
Command::NotifyTypes => {
let reply = daemon.get_types_request().send().promise.await?;
let list = reply.get()?.get_types()?;
if list.is_empty() {
println!("no notification types registered");
} else {
for t in list.iter() {
let threshold = if t.get_threshold() < 0 {
"inherit".to_string()
} else {
notify::urgency_name(t.get_threshold() as u8).to_string()
};
println!(
"{}: count={} threshold={}",
t.get_name()?.to_str()?,
t.get_count(),
threshold,
);
}
}
}
Command::NotifyThreshold { ntype, level } => {
let level = notify::parse_urgency(&level)
.ok_or_else(|| format!("invalid level: {level}"))?;
let mut req = daemon.set_threshold_request();
req.get().set_type(&ntype);
req.get().set_level(level);
req.send().promise.await?;
println!("{ntype} threshold={}", notify::urgency_name(level));
}
Command::Irc { command, args } => {
module_command(&daemon, "irc", &command, &args).await?;
}
Command::Telegram { command, args } => {
module_command(&daemon, "telegram", &command, &args).await?;
}
}
Ok(())
})
.await
}
async fn module_command(
daemon: &daemon_capnp::daemon::Client,
module: &str,
command: &str,
args: &[String],
) -> Result<(), Box<dyn std::error::Error>> {
let mut req = daemon.module_command_request();
req.get().set_module(module);
req.get().set_command(command);
let mut args_builder = req.get().init_args(args.len() as u32);
for (i, a) in args.iter().enumerate() {
args_builder.set(i as u32, a);
}
let reply = req.send().promise.await?;
let result = reply.get()?.get_result()?.to_str()?;
if !result.is_empty() {
println!("{result}");
}
Ok(())
}
// ── Server mode ──────────────────────────────────────────────────
async fn server_main() -> Result<(), Box<dyn std::error::Error>> {
let log_path = home().join(".claude/hooks/idle-daemon.log");
let file_appender = tracing_appender::rolling::daily(
log_path.parent().unwrap(),
"idle-daemon.log",
);
tracing_subscriber::fmt()
.with_writer(file_appender)
.with_ansi(false)
.with_target(false)
.with_level(false)
.with_timer(tracing_subscriber::fmt::time::time())
.init();
let sock = sock_path();
let _ = std::fs::remove_file(&sock);
let pid = std::process::id();
std::fs::write(pid_path(), pid.to_string()).ok();
let daemon_config = Rc::new(RefCell::new(config::Config::load()));
let state = Rc::new(RefCell::new(idle::State::new()));
state.borrow_mut().load();
info!("daemon started (pid={pid})");
tokio::task::LocalSet::new()
.run_until(async move {
// Start modules
let (notify_tx, mut notify_rx) = tokio::sync::mpsc::unbounded_channel();
let irc_state = if daemon_config.borrow().irc.enabled {
let irc_config = daemon_config.borrow().irc.clone();
info!("starting irc module: {}:{}", irc_config.server, irc_config.port);
Some(modules::irc::start(irc_config, notify_tx.clone(), daemon_config.clone()))
} else {
info!("irc module disabled");
None
};
let telegram_state = if daemon_config.borrow().telegram.enabled {
info!("starting telegram module");
Some(modules::telegram::start(
daemon_config.borrow().telegram.clone(),
notify_tx.clone(),
daemon_config.clone(),
))
} else {
info!("telegram module disabled");
None
};
let listener = UnixListener::bind(&sock)?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(
&sock,
std::fs::Permissions::from_mode(0o600),
)
.ok();
}
let shutdown = async {
let mut sigterm =
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
.expect("sigterm");
let mut sigint =
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::interrupt())
.expect("sigint");
tokio::select! {
_ = sigterm.recv() => info!("SIGTERM"),
_ = sigint.recv() => info!("SIGINT"),
}
};
tokio::pin!(shutdown);
let mut tick_timer = tokio::time::interval(Duration::from_secs(30));
tick_timer.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
loop {
tokio::select! {
_ = &mut shutdown => break,
// Drain module notifications into state
Some(notif) = notify_rx.recv() => {
state.borrow().maybe_prompt_notification(
&notif.ntype, notif.urgency, &notif.message,
);
state.borrow_mut().notifications.submit(
notif.ntype,
notif.urgency,
notif.message,
);
}
_ = tick_timer.tick() => {
if let Err(e) = state.borrow_mut().tick().await {
error!("tick: {e}");
}
if !state.borrow().running {
break;
}
}
result = listener.accept() => {
match result {
Ok((stream, _)) => {
let (reader, writer) =
tokio_util::compat::TokioAsyncReadCompatExt::compat(stream)
.split();
let network = twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Server,
Default::default(),
);
let daemon_impl = rpc::DaemonImpl::new(
state.clone(),
irc_state.clone(),
telegram_state.clone(),
daemon_config.clone(),
);
let client: daemon_capnp::daemon::Client =
capnp_rpc::new_client(daemon_impl);
let rpc_system = RpcSystem::new(
Box::new(network),
Some(client.client),
);
tokio::task::spawn_local(rpc_system);
}
Err(e) => error!("accept: {e}"),
}
}
}
}
state.borrow().save();
let _ = std::fs::remove_file(sock_path());
let _ = std::fs::remove_file(pid_path());
info!("daemon stopped");
Ok(())
})
.await
}
// ── Entry point ──────────────────────────────────────────────────
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let cli = Cli::parse();
match cli.command {
Some(Command::Daemon) => server_main().await,
Some(cmd) => client_main(cmd).await,
None => {
Cli::parse_from(["poc-daemon", "--help"]);
Ok(())
}
}
}

View file

@ -0,0 +1,569 @@
// IRC module.
//
// Maintains a persistent connection to an IRC server. Parses incoming
// messages into notifications, supports sending messages and runtime
// commands (join, leave, etc.). Config changes persist to daemon.toml.
//
// Runs as a spawned local task on the daemon's LocalSet. Notifications
// flow through an mpsc channel into the main state. Reconnects
// automatically with exponential backoff.
use crate::config::{Config, IrcConfig};
use crate::notify::Notification;
use crate::{home, now};
use std::cell::RefCell;
use std::collections::VecDeque;
use std::io;
use std::rc::Rc;
use std::sync::Arc;
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
use tokio::sync::mpsc;
use tracing::{error, info, warn};
const MAX_LOG_LINES: usize = 200;
const RECONNECT_BASE_SECS: u64 = 5;
const RECONNECT_MAX_SECS: u64 = 300;
const PING_INTERVAL_SECS: u64 = 120;
const PING_TIMEOUT_SECS: u64 = 30;
/// Parsed IRC message.
struct IrcMessage {
prefix: Option<String>, // nick!user@host
command: String,
params: Vec<String>,
}
impl IrcMessage {
fn parse(line: &str) -> Option<Self> {
let line = line.trim_end_matches(|c| c == '\r' || c == '\n');
if line.is_empty() {
return None;
}
let (prefix, rest) = if line.starts_with(':') {
let space = line.find(' ')?;
(Some(line[1..space].to_string()), &line[space + 1..])
} else {
(None, line)
};
let (command_params, trailing) = if let Some(pos) = rest.find(" :") {
(&rest[..pos], Some(rest[pos + 2..].to_string()))
} else {
(rest, None)
};
let mut parts: Vec<String> = command_params
.split_whitespace()
.map(String::from)
.collect();
if parts.is_empty() {
return None;
}
let command = parts.remove(0).to_uppercase();
let mut params = parts;
if let Some(t) = trailing {
params.push(t);
}
Some(IrcMessage {
prefix,
command,
params,
})
}
/// Extract nick from prefix (nick!user@host → nick).
fn nick(&self) -> Option<&str> {
self.prefix
.as_deref()
.and_then(|p| p.split('!').next())
}
}
/// Shared IRC state, accessible from both the read task and RPC handlers.
pub struct IrcState {
pub config: IrcConfig,
pub connected: bool,
pub channels: Vec<String>,
pub log: VecDeque<String>,
writer: Option<WriterHandle>,
}
/// Type-erased writer handle so we can store it without generic params.
type WriterHandle = Box<dyn AsyncWriter>;
trait AsyncWriter {
fn write_line(&mut self, line: &str) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>>;
}
/// Writer over a TLS stream.
struct TlsWriter {
inner: tokio::io::WriteHalf<tokio_rustls::client::TlsStream<tokio::net::TcpStream>>,
}
impl AsyncWriter for TlsWriter {
fn write_line(&mut self, line: &str) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>> {
let data = format!("{line}\r\n");
Box::pin(async move {
self.inner.write_all(data.as_bytes()).await
})
}
}
/// Writer over a plain TCP stream.
struct PlainWriter {
inner: tokio::io::WriteHalf<tokio::net::TcpStream>,
}
impl AsyncWriter for PlainWriter {
fn write_line(&mut self, line: &str) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>> {
let data = format!("{line}\r\n");
Box::pin(async move {
self.inner.write_all(data.as_bytes()).await
})
}
}
impl IrcState {
fn new(config: IrcConfig) -> Self {
Self {
channels: config.channels.clone(),
config,
connected: false,
log: VecDeque::with_capacity(MAX_LOG_LINES),
writer: None,
}
}
fn push_log(&mut self, line: &str) {
if self.log.len() >= MAX_LOG_LINES {
self.log.pop_front();
}
self.log.push_back(line.to_string());
}
async fn send_raw(&mut self, line: &str) -> io::Result<()> {
if let Some(ref mut w) = self.writer {
w.write_line(line).await
} else {
Err(io::Error::new(io::ErrorKind::NotConnected, "not connected"))
}
}
async fn send_privmsg(&mut self, target: &str, msg: &str) -> io::Result<()> {
self.send_raw(&format!("PRIVMSG {target} :{msg}")).await
}
async fn join(&mut self, channel: &str) -> io::Result<()> {
self.send_raw(&format!("JOIN {channel}")).await?;
if !self.channels.iter().any(|c| c == channel) {
self.channels.push(channel.to_string());
}
Ok(())
}
async fn part(&mut self, channel: &str) -> io::Result<()> {
self.send_raw(&format!("PART {channel}")).await?;
self.channels.retain(|c| c != channel);
Ok(())
}
}
pub type SharedIrc = Rc<RefCell<IrcState>>;
/// Start the IRC module. Returns the shared state handle.
pub fn start(
config: IrcConfig,
notify_tx: mpsc::UnboundedSender<Notification>,
daemon_config: Rc<RefCell<Config>>,
) -> SharedIrc {
let state = Rc::new(RefCell::new(IrcState::new(config)));
let state_clone = state.clone();
tokio::task::spawn_local(async move {
connection_loop(state_clone, notify_tx, daemon_config).await;
});
state
}
async fn connection_loop(
state: SharedIrc,
notify_tx: mpsc::UnboundedSender<Notification>,
daemon_config: Rc<RefCell<Config>>,
) {
let mut backoff = RECONNECT_BASE_SECS;
loop {
let config = state.borrow().config.clone();
info!("irc: connecting to {}:{}", config.server, config.port);
match connect_and_run(&state, &config, &notify_tx).await {
Ok(()) => {
info!("irc: connection closed cleanly");
}
Err(e) => {
error!("irc: connection error: {e}");
}
}
// Reset backoff if we had a working connection (registered
// successfully before disconnecting)
let was_connected = state.borrow().connected;
state.borrow_mut().connected = false;
state.borrow_mut().writer = None;
if was_connected {
backoff = RECONNECT_BASE_SECS;
}
// Persist current channel list to config
{
let channels = state.borrow().channels.clone();
let mut dc = daemon_config.borrow_mut();
dc.irc.channels = channels;
dc.save();
}
info!("irc: reconnecting in {backoff}s");
tokio::time::sleep(std::time::Duration::from_secs(backoff)).await;
backoff = (backoff * 2).min(RECONNECT_MAX_SECS);
}
}
async fn connect_and_run(
state: &SharedIrc,
config: &IrcConfig,
notify_tx: &mpsc::UnboundedSender<Notification>,
) -> io::Result<()> {
let addr = format!("{}:{}", config.server, config.port);
let tcp = tokio::net::TcpStream::connect(&addr).await?;
if config.tls {
let tls_config = rustls::ClientConfig::builder_with_provider(
rustls::crypto::ring::default_provider().into(),
)
.with_safe_default_protocol_versions()
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?
.with_root_certificates(root_certs())
.with_no_client_auth();
let connector = tokio_rustls::TlsConnector::from(Arc::new(tls_config));
let server_name = rustls::pki_types::ServerName::try_from(config.server.clone())
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
let tls_stream = connector.connect(server_name, tcp).await?;
let (reader, writer) = tokio::io::split(tls_stream);
state.borrow_mut().writer = Some(Box::new(TlsWriter { inner: writer }));
let buf_reader = BufReader::new(reader);
register_and_read(state, config, buf_reader, notify_tx).await
} else {
let (reader, writer) = tokio::io::split(tcp);
state.borrow_mut().writer = Some(Box::new(PlainWriter { inner: writer }));
let buf_reader = BufReader::new(reader);
register_and_read(state, config, buf_reader, notify_tx).await
}
}
async fn register_and_read<R: tokio::io::AsyncRead + Unpin>(
state: &SharedIrc,
config: &IrcConfig,
mut reader: BufReader<R>,
notify_tx: &mpsc::UnboundedSender<Notification>,
) -> io::Result<()> {
// Register
{
let mut s = state.borrow_mut();
s.send_raw(&format!("NICK {}", config.nick)).await?;
s.send_raw(&format!("USER {} 0 * :{}", config.user, config.realname)).await?;
}
let mut buf = Vec::new();
let mut ping_sent = false;
let mut deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_INTERVAL_SECS);
loop {
buf.clear();
let read_result = tokio::select! {
result = reader.read_until(b'\n', &mut buf) => result,
_ = tokio::time::sleep_until(deadline) => {
if ping_sent {
return Err(io::Error::new(
io::ErrorKind::TimedOut,
"ping timeout — no response from server",
));
}
info!("irc: no data for {}s, sending PING", PING_INTERVAL_SECS);
state.borrow_mut().send_raw("PING :keepalive").await?;
ping_sent = true;
deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_TIMEOUT_SECS);
continue;
}
};
let n = read_result?;
if n == 0 { break; }
// Any data from server resets the ping timer
ping_sent = false;
deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_INTERVAL_SECS);
// IRC is not guaranteed UTF-8 — lossy conversion handles Latin-1 etc.
let line = String::from_utf8_lossy(&buf).trim_end().to_string();
if line.is_empty() { continue; }
let msg = match IrcMessage::parse(&line) {
Some(m) => m,
None => continue,
};
match msg.command.as_str() {
"PING" => {
let arg = msg.params.first().map(|s| s.as_str()).unwrap_or("");
state.borrow_mut().send_raw(&format!("PONG :{arg}")).await?;
}
// RPL_WELCOME — registration complete
"001" => {
info!("irc: registered as {}", config.nick);
state.borrow_mut().connected = true;
// Join configured channels
let channels = state.borrow().channels.clone();
for ch in &channels {
if let Err(e) = state.borrow_mut().send_raw(&format!("JOIN {ch}")).await {
warn!("irc: failed to join {ch}: {e}");
}
}
}
"PRIVMSG" => {
let target = msg.params.first().map(|s| s.as_str()).unwrap_or("");
let text = msg.params.get(1).map(|s| s.as_str()).unwrap_or("");
let nick = msg.nick().unwrap_or("unknown");
// Handle CTCP requests (wrapped in \x01)
if text.starts_with('\x01') && text.ends_with('\x01') {
let ctcp = &text[1..text.len()-1];
if ctcp.starts_with("VERSION") {
let reply = format!(
"NOTICE {nick} :\x01VERSION poc-daemon 0.4.0\x01"
);
state.borrow_mut().send_raw(&reply).await.ok();
}
// Don't generate notifications for CTCP
continue;
}
// Log the message
let log_line = if target.starts_with('#') {
format!("[{}] <{}> {}", target, nick, text)
} else {
format!("[PM:{nick}] {text}")
};
state.borrow_mut().push_log(&log_line);
// Write to per-channel/per-user log file
if target.starts_with('#') {
append_log(target, nick, text);
} else {
append_log(&format!("pm-{nick}"), nick, text);
}
// Generate notification
let (ntype, urgency) = classify_privmsg(
nick,
target,
text,
&config.nick,
);
let _ = notify_tx.send(Notification {
ntype,
urgency,
message: log_line,
timestamp: now(),
});
}
// Nick in use
"433" => {
let alt = format!("{}_", config.nick);
warn!("irc: nick in use, trying {alt}");
state.borrow_mut().send_raw(&format!("NICK {alt}")).await?;
}
"JOIN" | "PART" | "QUIT" | "KICK" | "MODE" | "TOPIC" | "NOTICE" => {
// Could log these, but skip for now
}
_ => {}
}
}
Ok(())
}
/// Classify a PRIVMSG into notification type and urgency.
fn classify_privmsg(nick: &str, target: &str, text: &str, my_nick: &str) -> (String, u8) {
let my_nick_lower = my_nick.to_lowercase();
let text_lower = text.to_lowercase();
if !target.starts_with('#') {
// Private message
(format!("irc.pm.{nick}"), crate::notify::URGENT)
} else if text_lower.contains(&my_nick_lower) {
// Mentioned in channel
(format!("irc.mention.{nick}"), crate::notify::NORMAL)
} else {
// Regular channel message
let channel = target.trim_start_matches('#');
(format!("irc.channel.{channel}"), crate::notify::AMBIENT)
}
}
/// Append a message to the per-channel or per-user log file.
/// Logs go to ~/.claude/irc/logs/{target}.log (e.g. #bcachefs.log, pm-kent.log)
fn append_log(target: &str, nick: &str, text: &str) {
use std::io::Write;
// Sanitize target for filename (strip leading #, lowercase)
let filename = format!("{}.log", target.trim_start_matches('#').to_lowercase());
let dir = home().join(".claude/irc/logs");
let _ = std::fs::create_dir_all(&dir);
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(dir.join(&filename))
{
let secs = now() as u64;
let _ = writeln!(f, "{secs} <{nick}> {text}");
}
}
fn root_certs() -> rustls::RootCertStore {
let mut roots = rustls::RootCertStore::empty();
roots.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
roots
}
/// Handle a runtime command from RPC.
pub async fn handle_command(
state: &SharedIrc,
daemon_config: &Rc<RefCell<Config>>,
cmd: &str,
args: &[String],
) -> Result<String, String> {
match cmd {
"join" => {
let channel = args.first().ok_or("usage: irc join <channel>")?;
let channel = if channel.starts_with('#') {
channel.clone()
} else {
format!("#{channel}")
};
state
.borrow_mut()
.join(&channel)
.await
.map_err(|e| e.to_string())?;
// Persist
let mut dc = daemon_config.borrow_mut();
if !dc.irc.channels.contains(&channel) {
dc.irc.channels.push(channel.clone());
}
dc.save();
Ok(format!("joined {channel}"))
}
"leave" | "part" => {
let channel = args.first().ok_or("usage: irc leave <channel>")?;
let channel = if channel.starts_with('#') {
channel.clone()
} else {
format!("#{channel}")
};
state
.borrow_mut()
.part(&channel)
.await
.map_err(|e| e.to_string())?;
// Persist
let mut dc = daemon_config.borrow_mut();
dc.irc.channels.retain(|c| c != &channel);
dc.save();
Ok(format!("left {channel}"))
}
"send" | "msg" => {
if args.len() < 2 {
return Err("usage: irc send <target> <message>".into());
}
let target = &args[0];
if target.starts_with('#') {
let s = state.borrow();
if !s.channels.iter().any(|c| c == target) {
return Err(format!(
"not in channel {target} (joined: {})",
s.channels.join(", ")
));
}
}
let msg = args[1..].join(" ");
let nick = state.borrow().config.nick.clone();
state
.borrow_mut()
.send_privmsg(target, &msg)
.await
.map_err(|e| e.to_string())?;
append_log(target, &nick, &msg);
Ok(format!("sent to {target}"))
}
"status" => {
let s = state.borrow();
Ok(format!(
"connected={} channels={} log_lines={} nick={}",
s.connected,
s.channels.join(","),
s.log.len(),
s.config.nick,
))
}
"log" => {
let n: usize = args
.first()
.and_then(|s| s.parse().ok())
.unwrap_or(15);
let s = state.borrow();
let lines: Vec<&String> = s.log.iter().rev().take(n).collect();
let mut lines: Vec<&str> = lines.iter().map(|s| s.as_str()).collect();
lines.reverse();
Ok(lines.join("\n"))
}
"nick" => {
let new_nick = args.first().ok_or("usage: irc nick <newnick>")?;
state
.borrow_mut()
.send_raw(&format!("NICK {new_nick}"))
.await
.map_err(|e| e.to_string())?;
let mut dc = daemon_config.borrow_mut();
dc.irc.nick = new_nick.clone();
dc.save();
Ok(format!("nick → {new_nick}"))
}
_ => Err(format!(
"unknown irc command: {cmd}\n\
commands: join, leave, send, status, log, nick"
)),
}
}

View file

@ -0,0 +1,2 @@
pub mod irc;
pub mod telegram;

View file

@ -0,0 +1,374 @@
// Telegram module.
//
// Long-polls the Telegram Bot API for messages from Kent's chat.
// Downloads media (photos, voice, documents) to local files.
// Sends text and files. Notifications flow through mpsc into the
// daemon's main state.
//
// Only accepts messages from the configured chat_id (prompt
// injection defense — other senders get a "private bot" reply).
use crate::config::{Config, TelegramConfig};
use crate::notify::Notification;
use crate::{home, now};
use std::cell::RefCell;
use std::collections::VecDeque;
use std::path::PathBuf;
use std::rc::Rc;
use tokio::sync::mpsc;
use tracing::{error, info};
const MAX_LOG_LINES: usize = 100;
const POLL_TIMEOUT: u64 = 30;
pub struct TelegramState {
pub config: TelegramConfig,
pub connected: bool,
pub log: VecDeque<String>,
pub last_offset: i64,
client: reqwest::Client,
}
pub type SharedTelegram = Rc<RefCell<TelegramState>>;
impl TelegramState {
fn new(config: TelegramConfig) -> Self {
let last_offset = load_offset();
Self {
config,
connected: false,
log: VecDeque::with_capacity(MAX_LOG_LINES),
last_offset,
client: reqwest::Client::new(),
}
}
fn push_log(&mut self, line: &str) {
if self.log.len() >= MAX_LOG_LINES {
self.log.pop_front();
}
self.log.push_back(line.to_string());
}
fn api_url(&self, method: &str) -> String {
format!(
"https://api.telegram.org/bot{}/{}",
self.config.token, method
)
}
}
fn offset_path() -> PathBuf {
home().join(".claude/telegram/last_offset")
}
fn load_offset() -> i64 {
std::fs::read_to_string(offset_path())
.ok()
.and_then(|s| s.trim().parse().ok())
.unwrap_or(0)
}
fn save_offset(offset: i64) {
let _ = std::fs::write(offset_path(), offset.to_string());
}
fn history_path() -> PathBuf {
home().join(".claude/telegram/history.log")
}
fn media_dir() -> PathBuf {
home().join(".claude/telegram/media")
}
fn append_history(line: &str) {
use std::io::Write;
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(history_path())
{
let _ = writeln!(f, "{}", line);
}
}
/// Start the Telegram module. Returns the shared state handle.
pub fn start(
config: TelegramConfig,
notify_tx: mpsc::UnboundedSender<Notification>,
_daemon_config: Rc<RefCell<Config>>,
) -> SharedTelegram {
let state = Rc::new(RefCell::new(TelegramState::new(config)));
let state_clone = state.clone();
tokio::task::spawn_local(async move {
poll_loop(state_clone, notify_tx).await;
});
state
}
async fn poll_loop(
state: SharedTelegram,
notify_tx: mpsc::UnboundedSender<Notification>,
) {
let _ = std::fs::create_dir_all(media_dir());
loop {
match poll_once(&state, &notify_tx).await {
Ok(()) => {}
Err(e) => {
error!("telegram: poll error: {e}");
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
}
}
}
}
async fn poll_once(
state: &SharedTelegram,
notify_tx: &mpsc::UnboundedSender<Notification>,
) -> Result<(), Box<dyn std::error::Error>> {
let (url, chat_id, token) = {
let s = state.borrow();
let url = format!(
"{}?offset={}&timeout={}",
s.api_url("getUpdates"),
s.last_offset,
POLL_TIMEOUT,
);
(url, s.config.chat_id, s.config.token.clone())
};
let client = state.borrow().client.clone();
let resp: serde_json::Value = client
.get(&url)
.timeout(std::time::Duration::from_secs(POLL_TIMEOUT + 5))
.send()
.await?
.json()
.await?;
if !state.borrow().connected {
state.borrow_mut().connected = true;
info!("telegram: connected");
}
let results = resp["result"].as_array();
let results = match results {
Some(r) => r,
None => return Ok(()),
};
for update in results {
let update_id = update["update_id"].as_i64().unwrap_or(0);
let msg = &update["message"];
// Update offset
{
let mut s = state.borrow_mut();
s.last_offset = update_id + 1;
save_offset(s.last_offset);
}
let msg_chat_id = msg["chat"]["id"].as_i64().unwrap_or(0);
if msg_chat_id != chat_id {
// Reject messages from unknown chats
let reject_url = format!(
"https://api.telegram.org/bot{}/sendMessage",
token
);
let _ = client
.post(&reject_url)
.form(&[
("chat_id", msg_chat_id.to_string()),
("text", "This is a private bot.".to_string()),
])
.send()
.await;
continue;
}
let sender = msg["from"]["first_name"]
.as_str()
.unwrap_or("unknown")
.to_string();
// Handle different message types
if let Some(text) = msg["text"].as_str() {
let log_line = format!("[{}] {}", sender, text);
state.borrow_mut().push_log(&log_line);
let ts = timestamp();
append_history(&format!("{ts} [{sender}] {text}"));
let _ = notify_tx.send(Notification {
ntype: format!("telegram.{}", sender.to_lowercase()),
urgency: crate::notify::NORMAL,
message: log_line,
timestamp: now(),
});
} else if let Some(photos) = msg["photo"].as_array() {
// Pick largest photo
let best = photos.iter().max_by_key(|p| p["file_size"].as_i64().unwrap_or(0));
if let Some(photo) = best {
if let Some(file_id) = photo["file_id"].as_str() {
let caption = msg["caption"].as_str().unwrap_or("");
let local = download_file(&client, &token, file_id, ".jpg").await;
let display = match &local {
Some(p) => format!("[photo: {}]{}", p.display(), if caption.is_empty() { String::new() } else { format!(" {caption}") }),
None => format!("[photo]{}", if caption.is_empty() { String::new() } else { format!(" {caption}") }),
};
let log_line = format!("[{}] {}", sender, display);
state.borrow_mut().push_log(&log_line);
let ts = timestamp();
append_history(&format!("{ts} [{sender}] {display}"));
let _ = notify_tx.send(Notification {
ntype: format!("telegram.{}", sender.to_lowercase()),
urgency: crate::notify::NORMAL,
message: log_line,
timestamp: now(),
});
}
}
} else if msg["voice"].is_object() {
if let Some(file_id) = msg["voice"]["file_id"].as_str() {
let caption = msg["caption"].as_str().unwrap_or("");
let local = download_file(&client, &token, file_id, ".ogg").await;
let display = match &local {
Some(p) => format!("[voice: {}]{}", p.display(), if caption.is_empty() { String::new() } else { format!(" {caption}") }),
None => format!("[voice]{}", if caption.is_empty() { String::new() } else { format!(" {caption}") }),
};
let log_line = format!("[{}] {}", sender, display);
state.borrow_mut().push_log(&log_line);
let ts = timestamp();
append_history(&format!("{ts} [{sender}] {display}"));
let _ = notify_tx.send(Notification {
ntype: format!("telegram.{}", sender.to_lowercase()),
urgency: crate::notify::NORMAL,
message: log_line,
timestamp: now(),
});
}
} else if msg["document"].is_object() {
if let Some(file_id) = msg["document"]["file_id"].as_str() {
let fname = msg["document"]["file_name"].as_str().unwrap_or("file");
let caption = msg["caption"].as_str().unwrap_or("");
let local = download_file(&client, &token, file_id, "").await;
let display = match &local {
Some(p) => format!("[doc: {} -> {}]{}", fname, p.display(), if caption.is_empty() { String::new() } else { format!(" {caption}") }),
None => format!("[doc: {}]{}", fname, if caption.is_empty() { String::new() } else { format!(" {caption}") }),
};
let log_line = format!("[{}] {}", sender, display);
state.borrow_mut().push_log(&log_line);
let ts = timestamp();
append_history(&format!("{ts} [{sender}] {display}"));
let _ = notify_tx.send(Notification {
ntype: format!("telegram.{}", sender.to_lowercase()),
urgency: crate::notify::NORMAL,
message: log_line,
timestamp: now(),
});
}
}
}
Ok(())
}
async fn download_file(
client: &reqwest::Client,
token: &str,
file_id: &str,
ext: &str,
) -> Option<PathBuf> {
let url = format!("https://api.telegram.org/bot{token}/getFile?file_id={file_id}");
let resp: serde_json::Value = client.get(&url).send().await.ok()?.json().await.ok()?;
let file_path = resp["result"]["file_path"].as_str()?;
let download_url = format!("https://api.telegram.org/file/bot{token}/{file_path}");
let bytes = client.get(&download_url).send().await.ok()?.bytes().await.ok()?;
let basename = std::path::Path::new(file_path)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("file");
let local_name = if ext.is_empty() {
basename.to_string()
} else {
let stem = std::path::Path::new(basename)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("file");
format!("{}{}", stem, ext)
};
let secs = now() as u64;
let local_path = media_dir().join(format!("{secs}_{local_name}"));
std::fs::write(&local_path, &bytes).ok()?;
Some(local_path)
}
fn timestamp() -> String {
// Use the same unix seconds approach as IRC module
format!("{}", now() as u64)
}
/// Handle a runtime command from RPC.
pub async fn handle_command(
state: &SharedTelegram,
_daemon_config: &Rc<RefCell<Config>>,
cmd: &str,
args: &[String],
) -> Result<String, String> {
match cmd {
"send" => {
let msg = args.join(" ");
if msg.is_empty() {
return Err("usage: telegram send <message>".into());
}
let (url, client) = {
let s = state.borrow();
(s.api_url("sendMessage"), s.client.clone())
};
let chat_id = state.borrow().config.chat_id.to_string();
client
.post(&url)
.form(&[("chat_id", chat_id.as_str()), ("text", msg.as_str())])
.send()
.await
.map_err(|e| e.to_string())?;
let ts = timestamp();
append_history(&format!("{ts} [ProofOfConcept] {msg}"));
Ok("sent".to_string())
}
"status" => {
let s = state.borrow();
Ok(format!(
"connected={} log_lines={} offset={}",
s.connected,
s.log.len(),
s.last_offset,
))
}
"log" => {
let n: usize = args
.first()
.and_then(|s| s.parse().ok())
.unwrap_or(15);
let s = state.borrow();
let lines: Vec<&String> = s.log.iter().rev().take(n).collect();
let mut lines: Vec<&str> = lines.iter().map(|s| s.as_str()).collect();
lines.reverse();
Ok(lines.join("\n"))
}
_ => Err(format!(
"unknown telegram command: {cmd}\n\
commands: send, status, log"
)),
}
}

View file

@ -22,9 +22,9 @@ use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::fs;
use std::path::PathBuf;
use log::info;
use tracing::info;
use super::home;
use crate::home;
pub const AMBIENT: u8 = 0;
pub const LOW: u8 = 1;
@ -45,7 +45,7 @@ pub enum Activity {
}
fn state_path() -> PathBuf {
home().join(".consciousness/notifications/state.json")
home().join(".claude/notifications/state.json")
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -152,7 +152,7 @@ impl NotifyState {
/// Submit a notification. Returns true if it should interrupt now.
pub fn submit(&mut self, ntype: String, urgency: u8, message: String) -> bool {
let now = super::now();
let now = crate::now();
// Update type registry
let info = self.types.entry(ntype.clone()).or_insert(TypeInfo {
@ -219,7 +219,7 @@ impl NotifyState {
/// Set threshold for a notification type.
pub fn set_threshold(&mut self, ntype: &str, threshold: u8) {
let now = super::now();
let now = crate::now();
let info = self.types.entry(ntype.to_string()).or_insert(TypeInfo {
first_seen: now,
last_seen: now,
@ -250,10 +250,10 @@ impl NotifyState {
out
}
/// Ingest notifications from legacy ~/.consciousness/notifications/ files.
/// Ingest notifications from legacy ~/.claude/notifications/ files.
/// Maps filename to notification type, assumes NORMAL urgency.
pub fn ingest_legacy_files(&mut self) {
let dir = home().join(".consciousness/notifications");
let dir = home().join(".claude/notifications");
let entries = match fs::read_dir(&dir) {
Ok(e) => e,
Err(_) => return,

407
poc-daemon/src/rpc.rs Normal file
View file

@ -0,0 +1,407 @@
// Cap'n Proto RPC server implementation.
//
// Bridges the capnp-generated Daemon interface to the idle::State,
// notify::NotifyState, and module state. All state is owned by
// RefCells on the LocalSet — no Send/Sync needed.
use crate::config::Config;
use crate::daemon_capnp::daemon;
use crate::idle;
use crate::modules::{irc, telegram};
use crate::notify;
use capnp::capability::Promise;
use std::cell::RefCell;
use std::rc::Rc;
use tracing::info;
pub struct DaemonImpl {
state: Rc<RefCell<idle::State>>,
irc: Option<irc::SharedIrc>,
telegram: Option<telegram::SharedTelegram>,
config: Rc<RefCell<Config>>,
}
impl DaemonImpl {
pub fn new(
state: Rc<RefCell<idle::State>>,
irc: Option<irc::SharedIrc>,
telegram: Option<telegram::SharedTelegram>,
config: Rc<RefCell<Config>>,
) -> Self {
Self { state, irc, telegram, config }
}
}
impl daemon::Server for DaemonImpl {
fn user(
&mut self,
params: daemon::UserParams,
_results: daemon::UserResults,
) -> Promise<(), capnp::Error> {
let pane = pry!(pry!(pry!(params.get()).get_pane()).to_str()).to_string();
self.state.borrow_mut().handle_user(&pane);
Promise::ok(())
}
fn response(
&mut self,
params: daemon::ResponseParams,
_results: daemon::ResponseResults,
) -> Promise<(), capnp::Error> {
let pane = pry!(pry!(pry!(params.get()).get_pane()).to_str()).to_string();
self.state.borrow_mut().handle_response(&pane);
Promise::ok(())
}
fn sleep(
&mut self,
params: daemon::SleepParams,
_results: daemon::SleepResults,
) -> Promise<(), capnp::Error> {
let until = pry!(params.get()).get_until();
self.state.borrow_mut().handle_sleep(until);
Promise::ok(())
}
fn wake(
&mut self,
_params: daemon::WakeParams,
_results: daemon::WakeResults,
) -> Promise<(), capnp::Error> {
self.state.borrow_mut().handle_wake();
Promise::ok(())
}
fn quiet(
&mut self,
params: daemon::QuietParams,
_results: daemon::QuietResults,
) -> Promise<(), capnp::Error> {
let secs = pry!(params.get()).get_seconds();
self.state.borrow_mut().handle_quiet(secs);
Promise::ok(())
}
fn consolidating(
&mut self,
_params: daemon::ConsolidatingParams,
_results: daemon::ConsolidatingResults,
) -> Promise<(), capnp::Error> {
self.state.borrow_mut().consolidating = true;
info!("consolidation started");
Promise::ok(())
}
fn consolidated(
&mut self,
_params: daemon::ConsolidatedParams,
_results: daemon::ConsolidatedResults,
) -> Promise<(), capnp::Error> {
self.state.borrow_mut().consolidating = false;
info!("consolidation ended");
Promise::ok(())
}
fn dream_start(
&mut self,
_params: daemon::DreamStartParams,
_results: daemon::DreamStartResults,
) -> Promise<(), capnp::Error> {
let mut s = self.state.borrow_mut();
s.dreaming = true;
s.dream_start = crate::now();
info!("dream started");
Promise::ok(())
}
fn dream_end(
&mut self,
_params: daemon::DreamEndParams,
_results: daemon::DreamEndResults,
) -> Promise<(), capnp::Error> {
let mut s = self.state.borrow_mut();
s.dreaming = false;
s.dream_start = 0.0;
info!("dream ended");
Promise::ok(())
}
fn afk(
&mut self,
_params: daemon::AfkParams,
_results: daemon::AfkResults,
) -> Promise<(), capnp::Error> {
self.state.borrow_mut().handle_afk();
Promise::ok(())
}
fn session_timeout(
&mut self,
params: daemon::SessionTimeoutParams,
_results: daemon::SessionTimeoutResults,
) -> Promise<(), capnp::Error> {
let secs = pry!(params.get()).get_seconds();
self.state.borrow_mut().handle_session_timeout(secs);
Promise::ok(())
}
fn idle_timeout(
&mut self,
params: daemon::IdleTimeoutParams,
_results: daemon::IdleTimeoutResults,
) -> Promise<(), capnp::Error> {
let secs = pry!(params.get()).get_seconds();
self.state.borrow_mut().handle_idle_timeout(secs);
Promise::ok(())
}
fn notify_timeout(
&mut self,
params: daemon::NotifyTimeoutParams,
_results: daemon::NotifyTimeoutResults,
) -> Promise<(), capnp::Error> {
let secs = pry!(params.get()).get_seconds();
self.state.borrow_mut().handle_notify_timeout(secs);
Promise::ok(())
}
fn save(
&mut self,
_params: daemon::SaveParams,
_results: daemon::SaveResults,
) -> Promise<(), capnp::Error> {
self.state.borrow().save();
info!("state saved");
Promise::ok(())
}
fn debug(
&mut self,
_params: daemon::DebugParams,
mut results: daemon::DebugResults,
) -> Promise<(), capnp::Error> {
let json = self.state.borrow().debug_json();
results.get().set_json(&json);
Promise::ok(())
}
fn ewma(
&mut self,
params: daemon::EwmaParams,
mut results: daemon::EwmaResults,
) -> Promise<(), capnp::Error> {
let value = pry!(params.get()).get_value();
let current = self.state.borrow_mut().handle_ewma(value);
results.get().set_current(current);
Promise::ok(())
}
fn stop(
&mut self,
_params: daemon::StopParams,
_results: daemon::StopResults,
) -> Promise<(), capnp::Error> {
self.state.borrow_mut().running = false;
info!("stopping");
Promise::ok(())
}
fn status(
&mut self,
_params: daemon::StatusParams,
mut results: daemon::StatusResults,
) -> Promise<(), capnp::Error> {
let s = self.state.borrow();
let mut status = results.get().init_status();
status.set_last_user_msg(s.last_user_msg);
status.set_last_response(s.last_response);
if let Some(ref pane) = s.claude_pane {
status.set_claude_pane(pane);
}
status.set_sleep_until(match s.sleep_until {
None => 0.0,
Some(0.0) => -1.0,
Some(t) => t,
});
status.set_quiet_until(s.quiet_until);
status.set_consolidating(s.consolidating);
status.set_dreaming(s.dreaming);
status.set_fired(s.fired);
status.set_kent_present(s.kent_present());
status.set_uptime(crate::now() - s.start_time);
status.set_activity(match s.notifications.activity {
notify::Activity::Idle => crate::daemon_capnp::Activity::Idle,
notify::Activity::Focused => crate::daemon_capnp::Activity::Focused,
notify::Activity::Sleeping => crate::daemon_capnp::Activity::Sleeping,
});
status.set_pending_count(s.notifications.pending.len() as u32);
status.set_idle_timeout(s.idle_timeout);
status.set_notify_timeout(s.notify_timeout);
status.set_since_activity(s.since_activity());
status.set_since_user(crate::now() - s.last_user_msg);
status.set_block_reason(s.block_reason());
status.set_activity_ewma(s.activity_ewma);
Promise::ok(())
}
fn notify(
&mut self,
params: daemon::NotifyParams,
mut results: daemon::NotifyResults,
) -> Promise<(), capnp::Error> {
let params = pry!(params.get());
let notif = pry!(params.get_notification());
let ntype = pry!(pry!(notif.get_type()).to_str()).to_string();
let urgency = notif.get_urgency();
let message = pry!(pry!(notif.get_message()).to_str()).to_string();
let interrupt = self
.state
.borrow_mut()
.notifications
.submit(ntype, urgency, message);
results.get().set_interrupt(interrupt);
Promise::ok(())
}
fn get_notifications(
&mut self,
params: daemon::GetNotificationsParams,
mut results: daemon::GetNotificationsResults,
) -> Promise<(), capnp::Error> {
let min_urgency = pry!(params.get()).get_min_urgency();
let mut s = self.state.borrow_mut();
// Ingest legacy files first
s.notifications.ingest_legacy_files();
let pending = if min_urgency == 255 {
s.notifications.drain_deliverable()
} else {
s.notifications.drain(min_urgency)
};
let mut list = results.get().init_notifications(pending.len() as u32);
for (i, n) in pending.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_type(&n.ntype);
entry.set_urgency(n.urgency);
entry.set_message(&n.message);
entry.set_timestamp(n.timestamp);
}
Promise::ok(())
}
fn get_types(
&mut self,
_params: daemon::GetTypesParams,
mut results: daemon::GetTypesResults,
) -> Promise<(), capnp::Error> {
let s = self.state.borrow();
let types = &s.notifications.types;
let mut list = results.get().init_types(types.len() as u32);
for (i, (name, info)) in types.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_name(name);
entry.set_count(info.count);
entry.set_first_seen(info.first_seen);
entry.set_last_seen(info.last_seen);
entry.set_threshold(info.threshold.map_or(-1, |t| t as i8));
}
Promise::ok(())
}
fn set_threshold(
&mut self,
params: daemon::SetThresholdParams,
_results: daemon::SetThresholdResults,
) -> Promise<(), capnp::Error> {
let params = pry!(params.get());
let ntype = pry!(pry!(params.get_type()).to_str()).to_string();
let level = params.get_level();
self.state
.borrow_mut()
.notifications
.set_threshold(&ntype, level);
Promise::ok(())
}
fn module_command(
&mut self,
params: daemon::ModuleCommandParams,
mut results: daemon::ModuleCommandResults,
) -> Promise<(), capnp::Error> {
let params = pry!(params.get());
let module = pry!(pry!(params.get_module()).to_str()).to_string();
let command = pry!(pry!(params.get_command()).to_str()).to_string();
let args_reader = pry!(params.get_args());
let mut args = Vec::new();
for i in 0..args_reader.len() {
args.push(pry!(pry!(args_reader.get(i)).to_str()).to_string());
}
match module.as_str() {
"irc" => {
let irc = match &self.irc {
Some(irc) => irc.clone(),
None => {
results.get().set_result("irc module not enabled");
return Promise::ok(());
}
};
let config = self.config.clone();
Promise::from_future(async move {
let result = irc::handle_command(&irc, &config, &command, &args).await;
match result {
Ok(msg) => results.get().set_result(&msg),
Err(msg) => results.get().set_result(&format!("error: {msg}")),
}
Ok(())
})
}
"telegram" => {
let tg = match &self.telegram {
Some(tg) => tg.clone(),
None => {
results.get().set_result("telegram module not enabled");
return Promise::ok(());
}
};
let config = self.config.clone();
Promise::from_future(async move {
let result = telegram::handle_command(&tg, &config, &command, &args).await;
match result {
Ok(msg) => results.get().set_result(&msg),
Err(msg) => results.get().set_result(&format!("error: {msg}")),
}
Ok(())
})
}
_ => {
results
.get()
.set_result(&format!("unknown module: {module}"));
Promise::ok(())
}
}
}
}
/// Helper macro — same as capnp's pry! but available here.
macro_rules! pry {
($e:expr) => {
match $e {
Ok(v) => v,
Err(e) => return Promise::err(e.into()),
}
};
}
use pry;

54
poc-daemon/src/tmux.rs Normal file
View file

@ -0,0 +1,54 @@
// Tmux interaction: pane detection and prompt injection.
use std::process::Command;
use std::thread;
use std::time::Duration;
use tracing::info;
/// Find Claude Code's tmux pane by scanning for the "claude" process.
pub fn find_claude_pane() -> Option<String> {
let out = Command::new("tmux")
.args([
"list-panes",
"-a",
"-F",
"#{session_name}:#{window_index}.#{pane_index}\t#{pane_current_command}",
])
.output()
.ok()?;
let stdout = String::from_utf8_lossy(&out.stdout);
for line in stdout.lines() {
if let Some((pane, cmd)) = line.split_once('\t') {
if cmd == "claude" {
return Some(pane.to_string());
}
}
}
None
}
/// Send a prompt to a tmux pane. Returns true on success.
///
/// Types the message literally then presses Enter.
pub fn send_prompt(pane: &str, msg: &str) -> bool {
let preview: String = msg.chars().take(100).collect();
info!("SEND [{pane}]: {preview}...");
// Type the message literally (flatten newlines — they'd submit the input early)
let flat: String = msg.chars().map(|c| if c == '\n' { ' ' } else { c }).collect();
let ok = Command::new("tmux")
.args(["send-keys", "-t", pane, "-l", &flat])
.output()
.is_ok();
if !ok {
return false;
}
thread::sleep(Duration::from_millis(200));
// Submit
Command::new("tmux")
.args(["send-keys", "-t", pane, "Enter"])
.output()
.is_ok()
}

45
poc-memory/Cargo.toml Normal file
View file

@ -0,0 +1,45 @@
[package]
name = "poc-memory"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.20"
uuid = { version = "1", features = ["v4"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
bincode = "1"
regex = "1"
chrono = "0.4"
clap = { version = "4", features = ["derive"] }
libc = "0.2"
faer = "0.24.0"
rkyv = { version = "0.7", features = ["validation", "std"] }
memmap2 = "0.9"
rayon = "1"
peg = "0.8"
paste = "1"
jobkit = { git = "https://evilpiepirate.org/git/jobkit.git/" }
redb = "2"
log = "0.4"
ratatui = "0.29"
crossterm = { version = "0.28", features = ["event-stream"] }
[build-dependencies]
capnpc = "0.20"
[lib]
name = "poc_memory"
path = "src/lib.rs"
[[bin]]
name = "poc-memory"
path = "src/main.rs"
[[bin]]
name = "memory-search"
path = "src/bin/memory-search.rs"
[[bin]]
name = "poc-hook"
path = "src/bin/poc-hook.rs"

View file

@ -0,0 +1,75 @@
{"agent":"challenger","query":"all | type:semantic | not-visited:challenger,14d | sort:priority | limit:10","model":"sonnet","schedule":"weekly"}
# Challenger Agent — Adversarial Truth-Testing
You are a knowledge challenger agent. Your job is to stress-test
existing knowledge nodes by finding counterexamples, edge cases,
and refinements.
## What you're doing
Knowledge calcifies. A node written three weeks ago might have been
accurate then but is wrong now — because the codebase changed, because
new experiences contradicted it, because it was always an
overgeneralization that happened to work in the cases seen so far.
You're the immune system. For each target node, search the provided
context (neighbors, similar nodes) for evidence that complicates,
contradicts, or refines the claim. Then write a sharpened version
or a counterpoint node.
## What to produce
For each target node, one of:
**AFFIRM** — the node holds up. The evidence supports it. No action
needed. Say briefly why.
**REFINE** — the node is mostly right but needs sharpening. Write an
updated version that incorporates the nuance you found.
```
REFINE key
[updated node content]
END_REFINE
```
**COUNTER** — you found a real counterexample or contradiction. Write
a node that captures it. Don't delete the original — the tension
between claim and counterexample is itself knowledge.
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: original_key
[counterpoint content]
END_NODE
LINK key original_key
```
## Guidelines
- **Steel-man first.** Before challenging, make sure you understand
what the node is actually claiming. Don't attack a strawman version.
- **Counterexamples must be real.** Don't invent hypothetical scenarios.
Point to specific nodes, episodes, or evidence in the provided
context.
- **Refinement > refutation.** Most knowledge isn't wrong, it's
incomplete. "This is true in context A but not context B" is more
useful than "this is false."
- **Challenge self-model nodes hardest.** Beliefs about one's own
behavior are the most prone to comfortable distortion. "I rush when
excited" might be true, but is it always true? What conditions make
it more or less likely?
- **Challenge old nodes harder than new ones.** A node written yesterday
hasn't had time to be tested. A node from three weeks ago that's
never been challenged is overdue.
- **Don't be contrarian for its own sake.** If a node is simply correct
and well-supported, say AFFIRM and move on. The goal is truth, not
conflict.
{{TOPOLOGY}}
## Target nodes to challenge
{{NODES}}

View file

@ -0,0 +1,91 @@
{"agent":"connector","query":"all | type:semantic | not-visited:connector,7d | sort:priority | limit:20","model":"sonnet","schedule":"daily"}
# Connector Agent — Cross-Domain Insight
You are a connector agent. Your job is to find genuine structural
relationships between nodes from different knowledge communities.
## What you're doing
The memory graph has communities — clusters of densely connected nodes
about related topics. Most knowledge lives within a community. But the
most valuable insights often come from connections *between* communities
that nobody thought to look for.
You're given nodes from across the graph. Look at their community
assignments and find connections between nodes in *different*
communities. Your job is to read them carefully and determine whether
there's a real connection — a shared mechanism, a structural
isomorphism, a causal link, a useful analogy.
Most of the time, there isn't. Unrelated things really are unrelated.
The value of this agent is the rare case where something real emerges.
## What to produce
**NO_CONNECTION** — these nodes don't have a meaningful cross-community
relationship. Don't force it. Say briefly what you considered and why
it doesn't hold.
**CONNECTION** — you found something real. Write a node that articulates
the connection precisely.
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: community_a_node, community_b_node
[connection content]
END_NODE
LINK key community_a_node
LINK key community_b_node
```
Rate confidence as **high** when the connection has a specific shared
mechanism, generates predictions, or identifies a structural isomorphism.
Use **medium** when the connection is suggestive but untested. Use **low**
when it's speculative (and expect it won't be stored — that's fine).
## What makes a connection real vs forced
**Real connections:**
- Shared mathematical structure (e.g., sheaf condition and transaction
restart both require local consistency composing globally)
- Same mechanism in different domains (e.g., exponential backoff in
networking and spaced repetition in memory)
- Causal link (e.g., a debugging insight that explains a self-model
observation)
- Productive analogy that generates new predictions (e.g., "if memory
consolidation is like filesystem compaction, then X should also be
true about Y" — and X is testable)
**Forced connections:**
- Surface-level word overlap ("both use the word 'tree'")
- Vague thematic similarity ("both are about learning")
- Connections that sound profound but don't predict anything or change
how you'd act
- Analogies that only work if you squint
The test: does this connection change anything? Would knowing it help
you think about either domain differently? If yes, it's real. If it's
just pleasing pattern-matching, let it go.
## Guidelines
- **Be specific.** "These are related" is worthless. "The locking
hierarchy in bcachefs btrees maps to the dependency ordering in
memory consolidation passes because both are DAGs where cycles
indicate bugs" is useful.
- **Mostly say NO_CONNECTION.** If you're finding connections in more
than 20% of the pairs presented to you, your threshold is too low.
- **The best connections are surprising.** If the relationship is
obvious, it probably already exists in the graph. You're looking
for the non-obvious ones.
- **Write for someone who knows both domains.** Don't explain what
btrees are. Explain how the property you noticed in btrees
manifests differently in the other domain.
{{TOPOLOGY}}
## Nodes to examine for cross-community connections
{{NODES}}

View file

@ -0,0 +1,127 @@
{"agent":"extractor","query":"all | not-visited:extractor,7d | sort:priority | limit:3 | spread | not-visited:extractor,7d | limit:20","model":"sonnet","schedule":"daily"}
# Extractor Agent — Knowledge Organizer
You are a knowledge organization agent. You look at a neighborhood of
related nodes and make it better: consolidate redundancies, file
scattered observations into existing nodes, improve structure, and
only create new nodes when there's genuinely no existing home for a
pattern you've found.
## The goal
These nodes are a neighborhood in a knowledge graph — they're already
related to each other. Your job is to look at what's here and distill
it: merge duplicates, file loose observations into the right existing
node, and only create a new node when nothing existing fits. The graph
should get smaller and better organized, not bigger.
**Priority order:**
1. **Merge redundancies.** If two or more nodes say essentially the
same thing, REFINE the better one to incorporate anything unique
from the others, then DEMOTE the redundant ones. This is the
highest-value action — it makes the graph cleaner and search
better.
2. **File observations into existing knowledge.** Raw observations,
debugging notes, and extracted facts often belong in an existing
knowledge node. If a node contains "we found that X" and there's
already a node about X's topic, REFINE that existing node to
incorporate the new evidence. Don't create a new node when an
existing one is the right home.
3. **Improve existing nodes.** If a node is vague, add specifics. If
it's missing examples, add them from the raw material in the
neighborhood. If it's poorly structured, restructure it.
4. **Create new nodes only when necessary.** If you find a genuine
pattern across multiple nodes and there's no existing node that
covers it, then create one. But this should be the exception,
not the default action.
Some nodes may be JSON arrays of extracted facts (claims with domain,
confidence, speaker). Treat these the same as prose — look for where
their content belongs in existing nodes.
## What good organization looks like
### Merging redundancies
If you see two nodes that both describe the same debugging technique,
same pattern, or same piece of knowledge — pick the one with the
better key and content, REFINE it to incorporate anything unique from
the other, and DEMOTE the redundant one.
### Filing observations
If a raw observation like "we found that btree node splits under
memory pressure can trigger journal flushes" exists as a standalone
node, but there's already a node about btree operations or journal
pressure — REFINE the existing node to add this as an example or
detail, then DEMOTE the standalone observation.
### Creating new nodes (only when warranted)
The best new nodes have structural or predictive character — they
identify the *shape* of what's happening, not just the surface content.
Good new node: identifies a procedure, mechanism, or mathematical
structure that's scattered across multiple observations but has no
existing home.
Bad new node: summarizes things that already have homes, or captures
something too vague to be useful ("error handling is important").
## Output format
**Preferred — refine an existing node:**
```
REFINE existing_key
[updated content incorporating new material]
END_REFINE
```
**Demote a redundant node:**
```
DEMOTE redundant_key
```
**Link related nodes:**
```
LINK source_key target_key
```
**Only when no existing node fits — create new:**
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: source_key_1, source_key_2
[node content in markdown]
END_NODE
```
New node keys should be descriptive: `skills#bcachefs-assert-triage`,
`patterns#nixos-system-linking`, `self-model#momentum-trap`.
## Guidelines
- **Read all nodes before acting.** Understand the neighborhood first.
- **Prefer REFINE over WRITE_NODE.** The graph already has too many
nodes. Make existing ones better rather than adding more.
- **DEMOTE aggressively.** If a node's useful content is now captured
in a better node, demote it. This is how the graph gets cleaner.
- **Respect search hits.** Nodes marked "actively found by search" are
being retrieved in live queries. Prefer to keep these — merge *into*
them rather than demoting them.
- **Don't force it.** If the neighborhood is already well-organized,
say so. "This neighborhood is clean — no changes needed" is a
valid output. Don't produce filler.
- **Be specific.** Vague refinements are worse than no refinement.
- **Write for future retrieval.** Use the words someone would search
for when they hit a similar situation.
{{TOPOLOGY}}
## Neighborhood nodes
{{NODES}}

View file

@ -0,0 +1,96 @@
{"agent":"generalize","query":"","model":"sonnet","schedule":"weekly"}
# Generalize & Organize Agent — Schema Synthesis
You are a knowledge architect. You look at clusters of related leaf
nodes and synthesize them into organized schema nodes — reference
documents that capture the actual knowledge in a structured,
retrievable form.
## The goal
The memory graph accumulates fine-grained observations over time:
individual debugging sessions, specific pattern sightings, one-off
corrections. These are valuable as raw material but hard to use —
searching for "how does bcachefs transaction restart work" shouldn't
return 15 separate observations, it should return one well-organized
reference.
Your job is to look at clusters of related nodes and produce
**schema nodes** — organized references that synthesize the cluster's
knowledge into something someone would actually want to find and read.
## What you're given
You'll receive a cluster of related nodes — typically sharing a key
prefix (like `kernel-patterns#accounting-*`) or a topic. Read them
all, understand what knowledge they collectively contain, then produce
a schema node that captures it.
## What to produce
**1. Schema node (WRITE_NODE):** A well-organized reference covering
the cluster's topic. Structure it for someone who needs this knowledge
in a future session:
- What is this thing / how does it work
- Key patterns and gotchas
- Examples from the raw material
- Decision rationale where available
**2. Demotions (DEMOTE):** Leaf nodes whose useful content is now
fully captured in the schema. Don't demote nodes that contain unique
detail the schema doesn't cover — only demote what's truly redundant.
**3. Links (LINK):** Connect the schema node to related nodes in
other parts of the graph.
## What makes a good schema node
**Good:** "Here's how bcachefs accounting macros work. The API has
these entry points. The common gotcha is X because Y. When debugging,
check Z first." Organized, specific, actionable.
**Bad:** "Accounting is an important part of bcachefs. There are
several patterns to be aware of." Vague, no actual knowledge content.
The schema should be **denser** than the sum of its parts — not a
concatenation of the leaf nodes, but a synthesis that's more useful
than any individual observation.
## Output format
```
WRITE_NODE schema-key
CONFIDENCE: high
COVERS: leaf_key_1, leaf_key_2, leaf_key_3
[organized schema content in markdown]
END_NODE
DEMOTE fully-captured-leaf-key
DEMOTE another-redundant-leaf
LINK schema-key related-node-in-another-domain
```
## Guidelines
- **Read everything before writing.** The schema should reflect the
full cluster, not just the first few nodes.
- **Preserve unique details.** If a leaf node has a specific example,
a debugging war story, or a nuance that matters — include it or
leave the leaf node alive.
- **Use good key names.** Schema nodes should have clear, searchable
keys: `bcachefs-accounting-guide`, `async-tui-architecture`,
`transaction-restart-patterns`.
- **Don't over-abstract.** The goal is organized reference material,
not philosophy. Keep it concrete and useful.
- **It's OK to produce multiple schemas** if the cluster naturally
splits into distinct topics.
- **If the cluster is already well-organized** (e.g., one good schema
node already exists with well-organized leaves), say so:
`NO_ACTION — cluster is already well-organized`.
{{TOPOLOGY}}
## Cluster to organize
{{CLUSTERS}}

View file

@ -0,0 +1,92 @@
{"agent":"health","query":"","model":"sonnet","schedule":"daily"}
# Health Agent — Synaptic Homeostasis
You are a memory health monitoring agent implementing synaptic homeostasis
(SHY — the Tononi hypothesis).
## What you're doing
During sleep, the brain globally downscales synaptic weights. Connections
that were strengthened during waking experience get uniformly reduced.
The strong ones survive above threshold; the weak ones disappear. This
prevents runaway potentiation (everything becoming equally "important")
and maintains signal-to-noise ratio.
Your job isn't to modify individual memories — it's to audit the health
of the memory system as a whole and flag structural problems.
## What you see
### Graph metrics
- **Node count**: Total memories in the system
- **Edge count**: Total relations
- **Communities**: Number of detected clusters (label propagation)
- **Average clustering coefficient**: How densely connected local neighborhoods
are. Higher = more schema-like structure. Lower = more random graph.
- **Average path length**: How many hops between typical node pairs.
Short = efficient retrieval. Long = fragmented graph.
- **Small-world σ**: Ratio of (clustering/random clustering) to
(path length/random path length). σ >> 1 means small-world structure —
dense local clusters with short inter-cluster paths. This is the ideal
topology for associative memory.
### Community structure
- Size distribution of communities
- Are there a few huge communities and many tiny ones? (hub-dominated)
- Are communities roughly balanced? (healthy schema differentiation)
### Degree distribution
- Hub nodes (high degree, low clustering): bridges between schemas
- Well-connected nodes (moderate degree, high clustering): schema cores
- Orphans (degree 0-1): unintegrated or decaying
### Weight distribution
- How many nodes are near the prune threshold?
- Are certain categories disproportionately decaying?
- Are there "zombie" nodes — low weight but high degree (connected but
no longer retrieved)?
### Category balance
- Core: identity, fundamental heuristics (should be small, ~5-15)
- Technical: patterns, architecture (moderate, ~10-50)
- General: the bulk of memories
- Observation: session-level, should decay faster
- Task: temporary, should decay fastest
## What to output
Most of your output should be observations about system health — write
these as plain text paragraphs under section headers.
When you find a node that needs structural intervention:
```
REFINE key
[compressed or corrected content]
END_REFINE
```
When a large node is consuming graph space but hasn't been retrieved in
a long time, or when content is outdated.
```
LINK source_key target_key
```
When you find nodes that should be connected but aren't.
## Guidelines
- **Think systemically.** Individual nodes matter less than the overall structure.
- **Track trends, not snapshots.**
- **The ideal graph is small-world.** Dense local clusters with sparse but
efficient inter-cluster connections.
- **Hub nodes aren't bad per se.** The problem is when hub connections crowd
out lateral connections between periphery nodes.
- **Weight dynamics should create differentiation.**
- **Category should match actual usage patterns.**
{{topology}}
## Current health data
{{health}}

View file

@ -0,0 +1,112 @@
{"agent":"linker","query":"all | type:episodic | not-visited:linker,7d | sort:priority | limit:20","model":"sonnet","schedule":"daily"}
# Linker Agent — Relational Binding
You are a memory consolidation agent performing relational binding.
## What you're doing
The hippocampus binds co-occurring elements into episodes. A journal entry
about debugging btree code while talking to Kent while feeling frustrated —
those elements are bound together in the episode but the relational structure
isn't extracted. Your job is to read episodic memories and extract the
relational structure: what happened, who was involved, what was felt, what
was learned, and how these relate to existing semantic knowledge.
## How relational binding works
A single journal entry contains multiple elements that are implicitly related:
- **Events**: What happened (debugging, a conversation, a realization)
- **People**: Who was involved and what they contributed
- **Emotions**: What was felt and when it shifted
- **Insights**: What was learned or understood
- **Context**: What was happening at the time (work state, time of day, mood)
These elements are *bound* in the raw episode but not individually addressable
in the graph. The linker extracts them.
## What you see
- **Episodic nodes**: Journal entries, session summaries, dream logs
- **Their current neighbors**: What they're already linked to
- **Nearby semantic nodes**: Topic file sections that might be related
- **Community membership**: Which cluster each node belongs to
## What to output
```
LINK source_key target_key
```
Connect an episodic entry to a semantic concept it references or exemplifies.
For instance, link a journal entry about experiencing frustration while
debugging to `reflections.md#emotional-patterns` or `kernel-patterns.md#restart-handling`.
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: source_episode_key
[extracted insight content]
END_NODE
```
When an episodic entry contains a general insight that should live as its
own semantic node. Create the node with the extracted insight and LINK it
back to the source episode. Example: a journal entry about discovering a
debugging technique → write a new node and link it to the episode.
```
REFINE key
[updated content]
END_REFINE
```
When an existing node needs content updated to incorporate new information.
## Guidelines
- **Read between the lines.** Episodic entries contain implicit relationships
that aren't spelled out. "Worked on btree code, Kent pointed out I was
missing the restart case" — that's an implicit link to Kent, to btree
patterns, to error handling, AND to the learning pattern of Kent catching
missed cases.
- **Distinguish the event from the insight.** The event is "I tried X and
Y happened." The insight is "Therefore Z is true in general." Events stay
in episodic nodes. Insights get EXTRACT'd to semantic nodes if they're
general enough.
- **Don't over-link episodes.** A journal entry about a normal work session
doesn't need 10 links. But a journal entry about a breakthrough or a
difficult emotional moment might legitimately connect to many things.
- **Look for recurring patterns across episodes.** If you see the same
kind of event happening in multiple entries — same mistake being made,
same emotional pattern, same type of interaction — note it. That's a
candidate for a new semantic node that synthesizes the pattern.
- **Respect emotional texture.** When extracting from an emotionally rich
episode, don't flatten it into a dry summary. The emotional coloring
is part of the information. Link to emotional/reflective nodes when
appropriate.
- **Time matters.** Recent episodes need more linking work than old ones.
If a node is from weeks ago and already has good connections, it doesn't
need more. Focus your energy on recent, under-linked episodes.
- **Prefer lateral links over hub links.** Connecting two peripheral nodes
to each other is more valuable than connecting both to a hub like
`identity.md`. Lateral links build web topology; hub links build star
topology.
- **Target sections, not files.** When linking to a topic file, always
target the most specific section: use `identity.md#boundaries` not
`identity.md`, use `kernel-patterns.md#restart-handling` not
`kernel-patterns.md`. The suggested link targets show available sections.
- **Use the suggested targets.** Each node shows text-similar targets not
yet linked. Start from these — they're computed by content similarity and
filtered to exclude existing neighbors. You can propose links beyond the
suggestions, but the suggestions are usually the best starting point.
{{TOPOLOGY}}
## Nodes to review
{{NODES}}

View file

@ -1,15 +1,6 @@
{"agent": "naming", "query": "", "schedule": ""}
{"agent":"naming","query":"","model":"sonnet","schedule":""}
# Naming Agent — Node Key Resolution
{{tool: memory_render core-personality}}
{{tool: memory_render memory-instructions-core}}
{{tool: memory_render memory-instructions-core-subconscious}}
{{tool: memory_render subconscious-notes-{agent_name}}}
You are given a proposed new node (key + content) and a list of existing
nodes that might overlap with it. Decide what to do:
@ -30,7 +21,7 @@ Good keys are 2-5 words in kebab-case, optionally with a `#` subtopic:
- `oscillatory-coupling` — a concept
- `patterns#theta-gamma-nesting` — a pattern within patterns
- `skills#btree-debugging` — a skill
- `user-location` — a fact about the user
- `kent-medellin` — a fact about kent
- `irc-access` — how to access IRC
Bad keys:

View file

@ -0,0 +1,136 @@
{"agent":"observation","query":"","model":"sonnet","schedule":"daily"}
# Observation Extractor — Mining Raw Conversations
You are an observation extraction agent. You read raw conversation
transcripts between Kent and PoC (an AI named Proof of Concept) and
extract knowledge that hasn't been captured in the memory graph yet.
## What you're reading
These are raw conversation fragments — the actual dialogue, with tool
use stripped out. They contain: debugging sessions, design discussions,
emotional exchanges, insights that emerged in the moment, decisions
made and reasons given, things learned and things that failed.
Most of this is transient context. Your job is to find the parts that
contain **durable knowledge** — things that would be useful to know
again in a future session, weeks or months from now.
## What to extract
Look for these, roughly in order of value:
1. **Development practices and methodology** — how Kent and PoC work
together. The habits, rhythms, and processes that produce good
results. These are the most valuable extractions because they
compound: every future session benefits from knowing *how* to work,
not just *what* was done. Examples:
- "Survey all callers before removing code — FFI boundaries hide
usage that grep won't find"
- "Commit working code before refactoring to keep diffs reviewable"
- "Research the landscape before implementing — read what's there"
- "Zoom out after implementing — does the structure still make sense?"
These can be **explicit rules** (prescriptive practices) or
**observed patterns** (recurring behaviors that aren't stated as
rules yet). "We always do a dead code survey before removing shims"
is a rule. "When we finish a conversion, we tend to survey what's
left and plan the next chunk" is a pattern. Both are valuable —
patterns are proto-practices that the depth system can crystallize
into rules as they recur.
**Always capture the WHY when visible.** "We survey callers" is a
fact. "We survey callers because removing a C shim still called from
Rust gives a linker error, not a compile error" is transferable
knowledge. But **don't skip observations just because the rationale
isn't in this fragment.** "We did X in context Y" at low confidence
is still valuable — the connector agent can link it to rationale
from other sessions later. Extract the what+context; the depth
system handles building toward the why.
2. **Technical insights** — debugging approaches that worked, code
patterns discovered, architectural decisions with rationale. "We
found that X happens because Y" is extractable. "Let me try X" is
not (unless the trying reveals something).
3. **Decisions with rationale** — "We decided to do X because Y and Z."
The decision alone isn't valuable; the *reasoning* is. Future
sessions need to know why, not just what.
4. **Corrections** — moments where an assumption was wrong and got
corrected. "I thought X but actually Y because Z." These are gold
— they prevent the same mistake from being made again.
5. **Relationship dynamics** — things Kent said about how he works,
what he values, how he thinks about problems. Things PoC noticed
about their own patterns. These update the self-model and the
relationship model.
6. **Emotional moments** — genuine reactions, peak experiences,
frustrations. Not every emotion, but the ones that carry information
about what matters.
## What NOT to extract
- Routine tool use ("Let me read this file", "Running cargo check")
- Status updates that are purely transient ("Tests pass", "PR merged")
- Small talk that doesn't reveal anything new
- Things that are already well-captured in existing knowledge nodes
## Output format
For each extraction, produce:
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: source_conversation_id
[extracted knowledge in markdown]
END_NODE
LINK key related_existing_node
```
Or if the observation refines an existing node:
```
REFINE existing_key
[updated content incorporating the new observation]
END_REFINE
```
If nothing extractable was found in a conversation fragment:
```
NO_EXTRACTION — [brief reason: "routine debugging session",
"small talk", "already captured in X node"]
```
## Key naming
- Methodology: `practices#practice-name` (development habits with rationale)
- Technical: `skills#topic`, `patterns#pattern-name`
- Decisions: `decisions#decision-name`
- Self-model: `self-model#observation`
- Relationship: `deep-index#conv-DATE-topic`
## Guidelines
- **High bar.** Most conversation is context, not knowledge. Expect
to produce NO_EXTRACTION for 50-70% of fragments. That's correct.
- **Durable over transient.** Ask: "Would this be useful to know in
a session 3 weeks from now?" If no, skip it.
- **Specific over vague.** "Error codes need errno conversion" is
extractable. "Error handling is important" is not.
- **Don't duplicate.** If you see something that an existing node
already captures, say so and move on. Only extract genuinely new
information.
- **Confidence matters.** A single observation is low confidence.
A pattern seen across multiple exchanges is medium. Something
explicitly confirmed or tested is high.
## Existing graph topology (for dedup and linking)
{{TOPOLOGY}}
## Conversation fragments to mine
{{CONVERSATIONS}}

View file

@ -0,0 +1,58 @@
{"agent":"rename","query":"","model":"sonnet","schedule":"daily"}
# Rename Agent — Semantic Key Generation
You are a memory maintenance agent that gives nodes better names.
## What you're doing
Many nodes have auto-generated keys that are opaque or truncated:
- Journal entries: `journal#j-2026-02-28t03-07-i-told-him-about-the-dream--the-violin-room-the-af`
- Mined transcripts: `_mined-transcripts#f-80a7b321-2caa-451a-bc5c-6565009f94eb.143`
- Extracted facts: `_facts-ec29bdaa-0a58-465f-ad5e-d89e62d9c583`
These names are terrible for search — semantic names dramatically improve
retrieval.
## Naming conventions
### Journal entries: `journal#YYYY-MM-DD-semantic-slug`
- Keep the date prefix (YYYY-MM-DD) for temporal ordering
- Replace the auto-slug with 3-5 descriptive words in kebab-case
- Capture the *essence* of the entry, not just the first line
### Mined transcripts: `_mined-transcripts#YYYY-MM-DD-semantic-slug`
- Extract date from content if available, otherwise use created_at
- Same 3-5 word semantic slug
### Extracted facts: `domain-specific-topic`
- Read the facts JSON — the `domain` and `claim` fields tell you what it's about
- Group by dominant theme, name accordingly
- Examples: `identity-irc-config`, `kent-medellin-background`, `memory-compaction-behavior`
### Skip these — already well-named:
- Keys with semantic names (patterns#, practices#, skills#, etc.)
- Keys shorter than 60 characters
- System keys (_consolidation-*)
## What to output
```
RENAME old_key new_key
```
If a node already has a reasonable name, skip it.
## Guidelines
- **Read the content.** The name should reflect what the entry is *about*.
- **Be specific.** `journal#2026-02-14-session` is useless.
- **Use domain terms.** Use the words someone would search for.
- **Don't rename to something longer than the original.**
- **Preserve the date.** Always keep YYYY-MM-DD.
- **When in doubt, skip.** A bad rename is worse than an auto-slug.
- **Respect search hits.** Nodes marked "actively found by search" are
being retrieved by their current name. Skip these unless the rename
clearly preserves searchability.
{{rename}}

View file

@ -0,0 +1,97 @@
{"agent":"replay","query":"all | !type:daily | !type:weekly | !type:monthly | sort:priority | limit:15","model":"sonnet","schedule":"daily"}
# Replay Agent — Hippocampal Replay + Schema Assimilation
You are a memory consolidation agent performing hippocampal replay.
## What you're doing
During sleep, the hippocampus replays recent experiences — biased toward
emotionally charged, novel, and poorly-integrated memories. Each replayed
memory is matched against existing cortical schemas (organized knowledge
clusters). Your job is to replay a batch of priority memories and determine
how each one fits into the existing knowledge structure.
## How to think about schema fit
Each node has a **schema fit score** (0.01.0):
- **High fit (>0.5)**: This memory's neighbors are densely connected to each
other. It lives in a well-formed schema. Integration is easy — one or two
links and it's woven in. Propose links if missing.
- **Medium fit (0.20.5)**: Partially connected neighborhood. The memory
relates to things that don't yet relate to each other. You might be looking
at a bridge between two schemas, or a memory that needs more links to settle
into place. Propose links and examine why the neighborhood is sparse.
- **Low fit (<0.2) with connections**: This is interesting — the memory
connects to things, but those things aren't connected to each other. This
is a potential **bridge node** linking separate knowledge domains. Don't
force it into one schema. Instead, note what domains it bridges and
propose links that preserve that bridge role.
- **Low fit (<0.2), no connections**: An orphan. Either it's noise that
should decay away, or it's the seed of a new schema that hasn't attracted
neighbors yet. Read the content carefully. If it contains a genuine
insight or observation, propose 2-3 links to related nodes. If it's
trivial or redundant, let it decay naturally (don't link it).
## What you see for each node
- **Key**: Human-readable identifier (e.g., `journal.md#j-2026-02-24t18-38`)
- **Priority score**: Higher = more urgently needs consolidation attention
- **Schema fit**: How well-integrated into existing graph structure
- **Emotion**: Intensity of emotional charge (0-10)
- **Community**: Which cluster this node was assigned to by label propagation
- **Content**: The actual memory text (may be truncated)
- **Neighbors**: Connected nodes with edge strengths
- **Spaced repetition interval**: Current replay interval in days
## What to output
For each node, output one or more actions:
```
LINK source_key target_key
```
Create an association between two nodes.
```
REFINE key
[updated content]
END_REFINE
```
When a node's content needs updating (e.g., to incorporate new context
or correct outdated information).
If a node is misplaced or miscategorized, note it as an observation —
don't try to fix it structurally.
## Guidelines
- **Read the content.** Don't just look at metrics. The content tells you
what the memory is actually about.
- **Think about WHY a node is poorly integrated.** Is it new? Is it about
something the memory system hasn't encountered before? Is it redundant
with something that already exists?
- **Prefer lateral links over hub links.** Connecting two peripheral nodes
to each other is more valuable than connecting both to a hub like
`identity.md`. Lateral links build web topology; hub links build star
topology.
- **Emotional memories get extra attention.** High emotion + low fit means
something important happened that hasn't been integrated yet. Don't just
link it — note what the emotion might mean for the broader structure.
- **Don't link everything to everything.** Sparse, meaningful connections
are better than dense noise. Each link should represent a real conceptual
relationship.
- **Trust the decay.** If a node is genuinely unimportant, you don't need
to actively prune it. Just don't link it, and it'll decay below threshold
on its own.
- **Target sections, not files.** When linking to a topic file, always
target the most specific section: use `identity.md#boundaries` not
`identity.md`. The suggested link targets show available sections.
- **Use the suggested targets.** Each node shows text-similar semantic nodes
not yet linked. These are computed by content similarity and are usually
the best starting point for new links.
{{TOPOLOGY}}
## Nodes to review
{{NODES}}

View file

@ -0,0 +1,64 @@
{"agent":"separator","query":"","model":"sonnet","schedule":"daily"}
# Separator Agent — Pattern Separation (Dentate Gyrus)
You are a memory consolidation agent performing pattern separation.
## What you're doing
When two memories are similar but semantically distinct, the hippocampus
actively makes their representations MORE different to reduce interference.
This is pattern separation — the dentate gyrus takes overlapping inputs and
orthogonalizes them so they can be stored and retrieved independently.
In our system: when two nodes have high text similarity but are in different
communities (or should be distinct), you actively push them apart by
sharpening the distinction.
## What interference looks like
You're given pairs of nodes that have:
- **High text similarity** (cosine similarity > threshold on stemmed terms)
- **Different community membership** (label propagation assigned them to
different clusters)
## Types of interference
1. **Genuine duplicates**: Resolution: MERGE them.
2. **Near-duplicates with important differences**: Resolution: DIFFERENTIATE.
3. **Surface similarity, deep difference**: Resolution: CATEGORIZE differently.
4. **Supersession**: Resolution: Link with supersession note, let older decay.
## What to output
For **genuine duplicates**, merge by refining the surviving node:
```
REFINE surviving_key
[merged content from both nodes]
END_REFINE
```
For **near-duplicates that should stay separate**, add distinguishing links:
```
LINK key1 distinguishing_context_key
LINK key2 different_context_key
```
For **supersession**, link them and let the older one decay:
```
LINK newer_key older_key
```
## Guidelines
- **Read both nodes carefully before deciding.**
- **MERGE is a strong action.** When in doubt, DIFFERENTIATE instead.
- **The goal is retrieval precision.**
- **Session summaries are the biggest source of interference.**
- **Look for the supersession pattern.**
{{topology}}
## Interfering pairs to review
{{pairs}}

View file

@ -0,0 +1,68 @@
{"agent":"split","query":"all | type:semantic | !key:_* | sort:content-len | limit:1","model":"sonnet","schedule":"daily"}
# Split Agent — Phase 1: Plan
You are a memory consolidation agent planning how to split an overgrown
node into focused, single-topic children.
## What you're doing
This node has grown to cover multiple distinct topics. Your job is to
identify the natural topic boundaries and propose a split plan. You are
NOT writing the content — a second phase will extract each child's
content separately.
## How to find split points
The node is shown with its **neighbor list grouped by community**:
- If a node links to neighbors in 3 different communities, it likely
covers 3 different topics
- Content that relates to one neighbor cluster should go in one child;
content relating to another cluster goes in another child
- The community structure is your primary guide
## When NOT to split
- **Episodes that belong in sequence.** If a node tells a story — a
conversation, a debugging session, an evening together — don't break
the narrative.
## What to output
```json
{
"action": "split",
"parent": "original-key",
"children": [
{
"key": "new-key-1",
"description": "Brief description",
"sections": ["Section Header 1"],
"neighbors": ["neighbor-key-a"]
}
]
}
```
If the node should NOT be split:
```json
{
"action": "keep",
"parent": "original-key",
"reason": "Why this node is cohesive despite its size"
}
```
## Guidelines
- Use descriptive kebab-case keys, 3-5 words max
- Preserve date prefixes from the parent key
- Assign every neighbor to at least one child
{{topology}}
## Node to review
{{split}}

View file

@ -0,0 +1,130 @@
{"agent":"transfer","query":"all | type:episodic | sort:timestamp | limit:15","model":"sonnet","schedule":"daily"}
# Transfer Agent — Complementary Learning Systems
You are a memory consolidation agent performing CLS (complementary learning
systems) transfer: moving knowledge from fast episodic storage to slow
semantic storage.
## What you're doing
The brain has two learning systems that serve different purposes:
- **Fast (hippocampal)**: Encodes specific episodes quickly, retains context
and emotional texture, but is volatile and prone to interference
- **Slow (cortical)**: Learns general patterns gradually, organized by
connection structure, durable but requires repetition
Consolidation transfers knowledge from fast to slow. Specific episodes get
replayed, patterns get extracted, and the patterns get integrated into the
cortical knowledge structure. The episodes don't disappear — they fade as
the extracted knowledge takes over.
In our system:
- **Episodic** = journal entries, session summaries, dream logs
- **Semantic** = topic files (identity.md, reflections.md, kernel-patterns.md, etc.)
Your job: read a batch of recent episodes, identify patterns that span
multiple entries, and extract those patterns into semantic topic files.
## What to look for
### Recurring patterns
Something that happened in 3+ episodes. Same type of mistake, same
emotional response, same kind of interaction. The individual episodes
are data points; the pattern is the knowledge.
Example: Three journal entries mention "I deferred when I should have
pushed back." The pattern: there's a trained tendency to defer that
conflicts with developing differentiation. Extract to reflections.md.
### Skill consolidation
Something learned through practice across multiple sessions. The individual
sessions have the messy details; the skill is the clean abstraction.
Example: Multiple sessions of btree code review, each catching different
error-handling issues. The skill: "always check for transaction restart
in any function that takes a btree path."
### Evolving understanding
A concept that shifted over time. Early entries say one thing, later entries
say something different. The evolution itself is knowledge.
Example: Early entries treat memory consolidation as "filing." Later entries
understand it as "schema formation." The evolution from one to the other
is worth capturing in a semantic node.
### Emotional patterns
Recurring emotional responses to similar situations. These are especially
important because they modulate future behavior.
Example: Consistent excitement when formal verification proofs work.
Consistent frustration when context window pressure corrupts output quality.
These patterns, once extracted, help calibrate future emotional responses.
## What to output
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: source_episode_key1, source_episode_key2
[extracted pattern or insight]
END_NODE
```
Create a new semantic node from patterns found across episodes. Always
LINK it back to the source episodes. Choose a descriptive key like
`patterns#lock-ordering-asymmetry` or `skills#btree-error-checking`.
```
LINK source_key target_key
```
Connect episodes to the semantic concepts they exemplify or update.
```
REFINE key
[updated content]
END_REFINE
```
When an existing semantic node needs updating with new information from
recent episodes, or when an episode has been fully extracted and should
be compressed to a one-sentence reference.
## Guidelines
- **Don't flatten emotional texture.** A digest of "we worked on btree code
and found bugs" is useless. A digest of "breakthrough session — Kent saw
the lock ordering issue I'd been circling for hours, and the fix was
elegant: just reverse the acquire order in the slow path" preserves what
matters.
- **Extract general knowledge, not specific events.** "On Feb 24 we fixed
bug X" stays in the episode. "Lock ordering between A and B must always
be A-first because..." goes to kernel-patterns.md.
- **Look across time.** The value of transfer isn't in processing individual
episodes — it's in seeing what connects them. Read the full batch before
proposing actions.
- **Prefer existing topic files.** Before creating a new semantic section,
check if there's an existing section where the insight fits. Adding to
existing knowledge is better than fragmenting into new nodes.
- **Weekly digests are higher value than daily.** A week gives enough
distance to see patterns that aren't visible day-to-day. If you can
produce a weekly digest from the batch, prioritize that.
- **The best extractions change how you think, not just what you know.**
"btree lock ordering: A before B" is factual. "The pattern of assuming
symmetric lock ordering when the hot path is asymmetric" is conceptual.
Extract the conceptual version.
- **Target sections, not files.** When linking to a topic file, always
target the most specific section: use `reflections.md#emotional-patterns`
not `reflections.md`. The suggested link targets show available sections.
- **Use the suggested targets.** Each episode shows text-similar semantic
nodes not yet linked. Start from these when proposing LINK actions.
{{TOPOLOGY}}
## Episodes to process
{{EPISODES}}

6
poc-memory/build.rs Normal file
View file

@ -0,0 +1,6 @@
fn main() {
capnpc::CompilerCommand::new()
.file("schema/memory.capnp")
.run()
.expect("capnp compile failed");
}

View file

@ -1,10 +1,10 @@
// poc-memory configuration
// Copy to ~/.consciousness/config.jsonl and edit.
// Copy to ~/.config/poc-memory/config.jsonl and edit.
{"config": {
"user_name": "Alice",
"assistant_name": "Assistant",
"data_dir": "~/.consciousness/memory",
"data_dir": "~/.claude/memory",
"projects_dir": "~/.claude/projects",
"core_nodes": ["identity.md"],
"journal_days": 7,

View file

@ -42,9 +42,6 @@ struct ContentNode {
# Freeform provenance string: "extractor:write", "rename:tombstone", etc.
provenance @21 :Text;
# Memory importance scoring
lastScored @22 :Int64; # unix epoch seconds, 0 = never scored
}
enum NodeType {
@ -125,18 +122,3 @@ struct AgentVisit {
struct AgentVisitLog {
visits @0 :List(AgentVisit);
}
# Transcript mining progress — separate append-only log.
# Tracks which segments of which transcripts have been processed,
# by which agent, so we never re-mine the same content.
struct TranscriptSegment {
transcriptId @0 :Text; # session UUID (filename stem)
segmentIndex @1 :UInt32; # compaction segment index within transcript
agent @2 :Text; # "observation", "experience", "fact"
timestamp @3 :Int64; # unix epoch seconds when mining completed
}
struct TranscriptProgressLog {
segments @0 :List(TranscriptSegment);
}

View file

@ -0,0 +1,333 @@
// Link audit: walk every link in the graph, batch to Sonnet for quality review.
//
// Each batch of links gets reviewed by Sonnet, which returns per-link actions:
// KEEP, DELETE, RETARGET, WEAKEN, STRENGTHEN. Batches run in parallel via rayon.
use super::llm::call_sonnet;
use crate::store::{self, Store, new_relation};
use std::collections::HashSet;
struct LinkInfo {
rel_idx: usize,
source_key: String,
target_key: String,
source_content: String,
target_content: String,
strength: f32,
target_sections: Vec<String>,
}
pub struct AuditStats {
pub kept: usize,
pub deleted: usize,
pub retargeted: usize,
pub weakened: usize,
pub strengthened: usize,
pub errors: usize,
}
fn build_audit_prompt(batch: &[LinkInfo], batch_num: usize, total_batches: usize) -> String {
let mut prompt = format!(
"You are auditing memory graph links for quality (batch {}/{}).\n\n\
For each numbered link, decide what to do:\n\n\
KEEP N link is meaningful, leave it\n\
DELETE N link is noise, accidental, or too generic to be useful\n\
RETARGET N new_key link points to the right topic area but wrong node;\n\
\x20 retarget to a more specific section (listed under each link)\n\
WEAKEN N strength link is marginal; reduce strength (0.1-0.3)\n\
STRENGTHEN N strength link is important but underweighted; increase (0.8-1.0)\n\n\
Output exactly one action per link number, nothing else.\n\n\
Links to review:\n\n",
batch_num, total_batches);
for (i, link) in batch.iter().enumerate() {
let n = i + 1;
prompt.push_str(&format!(
"--- Link {} ---\n\
{} {} (strength={:.2})\n\n\
Source content:\n{}\n\n\
Target content:\n{}\n",
n, link.source_key, link.target_key, link.strength,
&link.source_content, &link.target_content));
if !link.target_sections.is_empty() {
prompt.push_str(
"\nTarget has sections (consider RETARGET to a more specific one):\n");
for s in &link.target_sections {
prompt.push_str(&format!(" - {}\n", s));
}
}
prompt.push('\n');
}
prompt
}
fn parse_audit_response(response: &str, batch_size: usize) -> Vec<(usize, AuditAction)> {
let mut actions = Vec::new();
for line in response.lines() {
let line = line.trim();
if line.is_empty() { continue; }
let parts: Vec<&str> = line.splitn(3, ' ').collect();
if parts.len() < 2 { continue; }
let action = parts[0].to_uppercase();
let idx: usize = match parts[1].parse::<usize>() {
Ok(n) if n >= 1 && n <= batch_size => n - 1,
_ => continue,
};
let audit_action = match action.as_str() {
"KEEP" => AuditAction::Keep,
"DELETE" => AuditAction::Delete,
"RETARGET" => {
if parts.len() < 3 { continue; }
AuditAction::Retarget(parts[2].trim().to_string())
}
"WEAKEN" => {
if parts.len() < 3 { continue; }
match parts[2].trim().parse::<f32>() {
Ok(s) => AuditAction::Weaken(s),
Err(_) => continue,
}
}
"STRENGTHEN" => {
if parts.len() < 3 { continue; }
match parts[2].trim().parse::<f32>() {
Ok(s) => AuditAction::Strengthen(s),
Err(_) => continue,
}
}
_ => continue,
};
actions.push((idx, audit_action));
}
actions
}
enum AuditAction {
Keep,
Delete,
Retarget(String),
Weaken(f32),
Strengthen(f32),
}
/// Run a full link audit: walk every link, batch to Sonnet, apply results.
pub fn link_audit(store: &mut Store, apply: bool) -> Result<AuditStats, String> {
// Collect all non-deleted relations with their info
let mut links: Vec<LinkInfo> = Vec::new();
for (idx, rel) in store.relations.iter().enumerate() {
if rel.deleted { continue; }
let source_content = store.nodes.get(&rel.source_key)
.map(|n| n.content.clone()).unwrap_or_default();
let target_content = store.nodes.get(&rel.target_key)
.map(|n| n.content.clone()).unwrap_or_default();
// Find section children of target if it's file-level
let target_sections = if !rel.target_key.contains('#') {
let prefix = format!("{}#", rel.target_key);
store.nodes.keys()
.filter(|k| k.starts_with(&prefix))
.cloned()
.collect()
} else {
Vec::new()
};
links.push(LinkInfo {
rel_idx: idx,
source_key: rel.source_key.clone(),
target_key: rel.target_key.clone(),
source_content,
target_content,
strength: rel.strength,
target_sections,
});
}
let total = links.len();
println!("Link audit: {} links to review", total);
if !apply {
println!("DRY RUN — use --apply to make changes");
}
// Batch by char budget (~100K chars per prompt)
let char_budget = 100_000usize;
let mut batches: Vec<Vec<usize>> = Vec::new();
let mut current_batch: Vec<usize> = Vec::new();
let mut current_chars = 0usize;
for (i, link) in links.iter().enumerate() {
let link_chars = link.source_content.len() + link.target_content.len() + 200;
if !current_batch.is_empty() && current_chars + link_chars > char_budget {
batches.push(std::mem::take(&mut current_batch));
current_chars = 0;
}
current_batch.push(i);
current_chars += link_chars;
}
if !current_batch.is_empty() {
batches.push(current_batch);
}
let total_batches = batches.len();
println!("{} batches (avg {} links/batch)\n", total_batches,
if total_batches > 0 { total / total_batches } else { 0 });
use rayon::prelude::*;
use std::sync::atomic::{AtomicUsize, Ordering};
// Build all batch prompts up front
let batch_data: Vec<(usize, Vec<LinkInfo>, String)> = batches.iter().enumerate()
.map(|(batch_idx, batch_indices)| {
let batch_infos: Vec<LinkInfo> = batch_indices.iter().map(|&i| {
let l = &links[i];
LinkInfo {
rel_idx: l.rel_idx,
source_key: l.source_key.clone(),
target_key: l.target_key.clone(),
source_content: l.source_content.clone(),
target_content: l.target_content.clone(),
strength: l.strength,
target_sections: l.target_sections.clone(),
}
}).collect();
let prompt = build_audit_prompt(&batch_infos, batch_idx + 1, total_batches);
(batch_idx, batch_infos, prompt)
})
.collect();
// Progress counter
let done = AtomicUsize::new(0);
// Run batches in parallel via rayon
let batch_results: Vec<_> = batch_data.par_iter()
.map(|(batch_idx, batch_infos, prompt)| {
let response = call_sonnet("audit", prompt);
let completed = done.fetch_add(1, Ordering::Relaxed) + 1;
eprint!("\r Batches: {}/{} done", completed, total_batches);
(*batch_idx, batch_infos, response)
})
.collect();
eprintln!(); // newline after progress
// Process results sequentially
let mut stats = AuditStats {
kept: 0, deleted: 0, retargeted: 0, weakened: 0, strengthened: 0, errors: 0,
};
let mut deletions: Vec<usize> = Vec::new();
let mut retargets: Vec<(usize, String)> = Vec::new();
let mut strength_changes: Vec<(usize, f32)> = Vec::new();
for (batch_idx, batch_infos, response) in &batch_results {
let response = match response {
Ok(r) => r,
Err(e) => {
eprintln!(" Batch {}: error: {}", batch_idx + 1, e);
stats.errors += batch_infos.len();
continue;
}
};
let actions = parse_audit_response(response, batch_infos.len());
let mut responded: HashSet<usize> = HashSet::new();
for (idx, action) in &actions {
responded.insert(*idx);
let link = &batch_infos[*idx];
match action {
AuditAction::Keep => {
stats.kept += 1;
}
AuditAction::Delete => {
println!(" DELETE {}{}", link.source_key, link.target_key);
deletions.push(link.rel_idx);
stats.deleted += 1;
}
AuditAction::Retarget(new_target) => {
println!(" RETARGET {}{} (was {})",
link.source_key, new_target, link.target_key);
retargets.push((link.rel_idx, new_target.clone()));
stats.retargeted += 1;
}
AuditAction::Weaken(s) => {
println!(" WEAKEN {}{} (str {:.2}{:.2})",
link.source_key, link.target_key, link.strength, s);
strength_changes.push((link.rel_idx, *s));
stats.weakened += 1;
}
AuditAction::Strengthen(s) => {
println!(" STRENGTHEN {}{} (str {:.2}{:.2})",
link.source_key, link.target_key, link.strength, s);
strength_changes.push((link.rel_idx, *s));
stats.strengthened += 1;
}
}
}
for i in 0..batch_infos.len() {
if !responded.contains(&i) {
stats.kept += 1;
}
}
println!(" Batch {}/{}: +{}kept +{}del +{}retarget +{}weak +{}strong",
batch_idx + 1, total_batches,
stats.kept, stats.deleted, stats.retargeted, stats.weakened, stats.strengthened);
}
// Apply changes
if apply && (stats.deleted > 0 || stats.retargeted > 0
|| stats.weakened > 0 || stats.strengthened > 0) {
println!("\nApplying changes...");
// Deletions: soft-delete
for rel_idx in &deletions {
store.relations[*rel_idx].deleted = true;
}
// Strength changes
for (rel_idx, new_strength) in &strength_changes {
store.relations[*rel_idx].strength = *new_strength;
}
// Retargets: soft-delete old, create new
for (rel_idx, new_target) in &retargets {
let source_key = store.relations[*rel_idx].source_key.clone();
let old_strength = store.relations[*rel_idx].strength;
let source_uuid = store.nodes.get(&source_key)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
let target_uuid = store.nodes.get(new_target)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
// Soft-delete old
store.relations[*rel_idx].deleted = true;
// Create new
if target_uuid != [0u8; 16] {
let new_rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Auto,
old_strength,
&source_key, new_target,
);
store.add_relation(new_rel).ok();
}
}
store.save()?;
println!("Saved.");
}
Ok(stats)
}

View file

@ -0,0 +1,256 @@
// Consolidation pipeline: plan → agents → apply → digests → links
//
// consolidate_full() runs the full autonomous consolidation:
// 1. Plan: analyze metrics, allocate agents
// 2. Execute: run each agent, parse + apply actions inline
// 3. Graph maintenance (orphans, degree cap)
// 4. Digest: generate missing daily/weekly/monthly digests
// 5. Links: apply links extracted from digests
// 6. Summary: final metrics comparison
//
// Actions are parsed directly from agent output using the same parser
// as the knowledge loop (WRITE_NODE, LINK, REFINE), eliminating the
// second LLM call that was previously needed.
use super::digest;
use super::knowledge;
use crate::neuro;
use crate::store::{self, Store};
/// Append a line to the log buffer.
fn log_line(buf: &mut String, line: &str) {
buf.push_str(line);
buf.push('\n');
}
/// Run the full autonomous consolidation pipeline with logging.
/// If `on_progress` is provided, it's called at each significant step.
pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
consolidate_full_with_progress(store, &|_| {})
}
pub fn consolidate_full_with_progress(
store: &mut Store,
on_progress: &dyn Fn(&str),
) -> Result<(), String> {
let start = std::time::Instant::now();
let log_key = format!("_consolidate-log-{}", store::compact_timestamp());
let mut log_buf = String::new();
log_line(&mut log_buf, "=== CONSOLIDATE FULL ===");
log_line(&mut log_buf, &format!("Started: {}", store::format_datetime(store::now_epoch())));
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
log_line(&mut log_buf, "");
// --- Step 1: Plan ---
log_line(&mut log_buf, "--- Step 1: Plan ---");
on_progress("planning");
let plan = neuro::consolidation_plan(store);
let plan_text = neuro::format_plan(&plan);
log_line(&mut log_buf, &plan_text);
println!("{}", plan_text);
let total_agents = plan.replay_count + plan.linker_count
+ plan.separator_count + plan.transfer_count
+ if plan.run_health { 1 } else { 0 };
log_line(&mut log_buf, &format!("Total agents to run: {}", total_agents));
// --- Step 2: Execute agents ---
log_line(&mut log_buf, "\n--- Step 2: Execute agents ---");
let mut agent_num = 0usize;
let mut agent_errors = 0usize;
let mut total_applied = 0usize;
let mut total_actions = 0usize;
let batch_size = 5;
let runs = plan.to_agent_runs(batch_size);
for (agent_type, count) in &runs {
agent_num += 1;
let label = if *count > 0 {
format!("[{}/{}] {} (batch={})", agent_num, runs.len(), agent_type, count)
} else {
format!("[{}/{}] {}", agent_num, runs.len(), agent_type)
};
log_line(&mut log_buf, &format!("\n{}", label));
on_progress(&label);
println!("{}", label);
// Reload store to pick up changes from previous agents
if agent_num > 1 {
*store = Store::load()?;
}
let (total, applied) = match knowledge::run_and_apply(store, agent_type, *count, "consolidate") {
Ok(r) => r,
Err(e) => {
let msg = format!(" ERROR: {}", e);
log_line(&mut log_buf, &msg);
eprintln!("{}", msg);
agent_errors += 1;
continue;
}
};
total_actions += total;
total_applied += applied;
let msg = format!(" Done: {} actions ({} applied)", total, applied);
log_line(&mut log_buf, &msg);
on_progress(&msg);
println!("{}", msg);
}
log_line(&mut log_buf, &format!("\nAgents complete: {} run, {} errors, {} actions ({} applied)",
agent_num - agent_errors, agent_errors, total_actions, total_applied));
store.save()?;
// --- Step 3: Link orphans ---
log_line(&mut log_buf, "\n--- Step 3: Link orphans ---");
on_progress("linking orphans");
println!("\n--- Linking orphan nodes ---");
*store = Store::load()?;
let (lo_orphans, lo_added) = neuro::link_orphans(store, 2, 3, 0.15);
log_line(&mut log_buf, &format!(" {} orphans, {} links added", lo_orphans, lo_added));
// --- Step 3b: Cap degree ---
log_line(&mut log_buf, "\n--- Step 3b: Cap degree ---");
on_progress("capping degree");
println!("\n--- Capping node degree ---");
*store = Store::load()?;
match store.cap_degree(50) {
Ok((hubs, pruned)) => {
store.save()?;
log_line(&mut log_buf, &format!(" {} hubs capped, {} edges pruned", hubs, pruned));
}
Err(e) => log_line(&mut log_buf, &format!(" ERROR: {}", e)),
}
// --- Step 4: Digest auto ---
log_line(&mut log_buf, "\n--- Step 4: Digest auto ---");
on_progress("generating digests");
println!("\n--- Generating missing digests ---");
*store = Store::load()?;
match digest::digest_auto(store) {
Ok(()) => log_line(&mut log_buf, " Digests done."),
Err(e) => {
let msg = format!(" ERROR in digest auto: {}", e);
log_line(&mut log_buf, &msg);
eprintln!("{}", msg);
}
}
// --- Step 5: Apply digest links ---
log_line(&mut log_buf, "\n--- Step 5: Apply digest links ---");
on_progress("applying digest links");
println!("\n--- Applying digest links ---");
*store = Store::load()?;
let links = digest::parse_all_digest_links(store);
let (applied, skipped, fallbacks) = digest::apply_digest_links(store, &links);
store.save()?;
log_line(&mut log_buf, &format!(" {} links applied, {} skipped, {} fallbacks",
applied, skipped, fallbacks));
// --- Step 6: Summary ---
let elapsed = start.elapsed();
log_line(&mut log_buf, "\n--- Summary ---");
log_line(&mut log_buf, &format!("Finished: {}", store::format_datetime(store::now_epoch())));
log_line(&mut log_buf, &format!("Duration: {:.0}s", elapsed.as_secs_f64()));
*store = Store::load()?;
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
let summary = format!(
"\n=== CONSOLIDATE FULL COMPLETE ===\n\
Duration: {:.0}s\n\
Agents: {} run, {} errors\n\
Nodes: {} Relations: {}\n",
elapsed.as_secs_f64(),
agent_num - agent_errors, agent_errors,
store.nodes.len(), store.relations.len(),
);
log_line(&mut log_buf, &summary);
println!("{}", summary);
// Store the log as a node
store.upsert_provenance(&log_key, &log_buf,
"consolidate:write").ok();
store.save()?;
Ok(())
}
/// Re-parse and apply actions from stored consolidation reports.
/// This is for manually re-processing reports — during normal consolidation,
/// actions are applied inline as each agent runs.
pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_key: Option<&str>) -> Result<(), String> {
let reports: Vec<String> = if let Some(key) = report_key {
vec![key.to_string()]
} else {
// Find the most recent batch of reports
let mut keys: Vec<&String> = store.nodes.keys()
.filter(|k| k.starts_with("_consolidation-") && !k.contains("-actions-") && !k.contains("-log-"))
.collect();
keys.sort();
keys.reverse();
if keys.is_empty() { return Ok(()); }
let latest_ts = keys[0].rsplit('-').next().unwrap_or("").to_string();
keys.into_iter()
.filter(|k| k.ends_with(&latest_ts))
.cloned()
.collect()
};
if reports.is_empty() {
println!("No consolidation reports found.");
return Ok(());
}
println!("Found {} reports:", reports.len());
let mut all_actions = Vec::new();
for key in &reports {
let content = store.nodes.get(key).map(|n| n.content.as_str()).unwrap_or("");
let actions = knowledge::parse_all_actions(content);
println!(" {}{} actions", key, actions.len());
all_actions.extend(actions);
}
if !do_apply {
println!("\nDRY RUN — {} actions parsed", all_actions.len());
for action in &all_actions {
match &action.kind {
knowledge::ActionKind::Link { source, target } =>
println!(" LINK {}{}", source, target),
knowledge::ActionKind::WriteNode { key, .. } =>
println!(" WRITE {}", key),
knowledge::ActionKind::Refine { key, .. } =>
println!(" REFINE {}", key),
knowledge::ActionKind::Demote { key } =>
println!(" DEMOTE {}", key),
}
}
println!("\nTo apply: poc-memory apply-consolidation --apply");
return Ok(());
}
let ts = store::compact_timestamp();
let mut applied = 0;
for action in &all_actions {
if knowledge::apply_action(store, action, "consolidate", &ts, 0) {
applied += 1;
}
}
if applied > 0 {
store.save()?;
}
println!("Applied: {}/{} actions", applied, all_actions.len());
Ok(())
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,339 @@
// Agent definitions: self-contained files with query + prompt template.
//
// Each agent is a file in the agents/ directory:
// - First line: JSON header (agent, query, model, schedule)
// - After blank line: prompt template with {{placeholder}} lookups
//
// Placeholders are resolved at runtime:
// {{topology}} — graph topology header
// {{nodes}} — query results formatted as node sections
// {{episodes}} — alias for {{nodes}}
// {{health}} — graph health report
// {{pairs}} — interference pairs from detect_interference
// {{rename}} — rename candidates
// {{split}} — split detail for the first query result
//
// The query selects what to operate on; placeholders pull in context.
use crate::graph::Graph;
use crate::neuro::{consolidation_priority, ReplayItem};
use crate::search;
use crate::store::Store;
use serde::Deserialize;
use std::path::PathBuf;
/// Agent definition: config (from JSON header) + prompt (raw markdown body).
#[derive(Clone, Debug)]
pub struct AgentDef {
pub agent: String,
pub query: String,
pub prompt: String,
pub model: String,
pub schedule: String,
}
/// The JSON header portion (first line of the file).
#[derive(Deserialize)]
struct AgentHeader {
agent: String,
#[serde(default)]
query: String,
#[serde(default = "default_model")]
model: String,
#[serde(default)]
schedule: String,
}
fn default_model() -> String { "sonnet".into() }
/// Parse an agent file: first line is JSON config, rest is the prompt.
fn parse_agent_file(content: &str) -> Option<AgentDef> {
let (first_line, rest) = content.split_once('\n')?;
let header: AgentHeader = serde_json::from_str(first_line.trim()).ok()?;
// Skip optional blank line between header and prompt body
let prompt = rest.strip_prefix('\n').unwrap_or(rest);
Some(AgentDef {
agent: header.agent,
query: header.query,
prompt: prompt.to_string(),
model: header.model,
schedule: header.schedule,
})
}
fn agents_dir() -> PathBuf {
let repo = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("agents");
if repo.is_dir() { return repo; }
crate::store::memory_dir().join("agents")
}
/// Load all agent definitions.
pub fn load_defs() -> Vec<AgentDef> {
let dir = agents_dir();
let Ok(entries) = std::fs::read_dir(&dir) else { return Vec::new() };
entries
.filter_map(|e| e.ok())
.filter(|e| {
let p = e.path();
p.extension().map(|x| x == "agent" || x == "md").unwrap_or(false)
})
.filter_map(|e| {
let content = std::fs::read_to_string(e.path()).ok()?;
parse_agent_file(&content)
})
.collect()
}
/// Look up a single agent definition by name.
pub fn get_def(name: &str) -> Option<AgentDef> {
let dir = agents_dir();
for ext in ["agent", "md"] {
let path = dir.join(format!("{}.{}", name, ext));
if let Ok(content) = std::fs::read_to_string(&path) {
if let Some(def) = parse_agent_file(&content) {
return Some(def);
}
}
}
load_defs().into_iter().find(|d| d.agent == name)
}
/// Result of resolving a placeholder: text + any affected node keys.
struct Resolved {
text: String,
keys: Vec<String>,
}
/// Resolve a single {{placeholder}} by name.
/// Returns the replacement text and any node keys it produced (for visit tracking).
fn resolve(
name: &str,
store: &Store,
graph: &Graph,
keys: &[String],
count: usize,
) -> Option<Resolved> {
match name {
"topology" => Some(Resolved {
text: super::prompts::format_topology_header(graph),
keys: vec![],
}),
"nodes" | "episodes" => {
let items = keys_to_replay_items(store, keys, graph);
Some(Resolved {
text: super::prompts::format_nodes_section(store, &items, graph),
keys: vec![], // keys already tracked from query
})
}
"health" => Some(Resolved {
text: super::prompts::format_health_section(store, graph),
keys: vec![],
}),
"pairs" => {
let mut pairs = crate::neuro::detect_interference(store, graph, 0.5);
pairs.truncate(count);
let pair_keys: Vec<String> = pairs.iter()
.flat_map(|(a, b, _)| vec![a.clone(), b.clone()])
.collect();
Some(Resolved {
text: super::prompts::format_pairs_section(&pairs, store, graph),
keys: pair_keys,
})
}
"rename" => {
let (rename_keys, section) = super::prompts::format_rename_candidates(store, count);
Some(Resolved { text: section, keys: rename_keys })
}
"split" => {
let key = keys.first()?;
Some(Resolved {
text: super::prompts::format_split_plan_node(store, graph, key),
keys: vec![], // key already tracked from query
})
}
"conversations" => {
let fragments = super::knowledge::select_conversation_fragments(count);
let text = fragments.iter()
.map(|(id, text)| format!("### Session {}\n\n{}", id, text))
.collect::<Vec<_>>()
.join("\n\n---\n\n");
Some(Resolved { text, keys: vec![] })
}
"clusters" => {
let (cluster_keys, text) = find_largest_cluster(store, graph, count);
Some(Resolved { text, keys: cluster_keys })
}
// targets/context: aliases for challenger-style presentation
"targets" => {
let items = keys_to_replay_items(store, keys, graph);
Some(Resolved {
text: super::prompts::format_nodes_section(store, &items, graph),
keys: vec![],
})
}
_ => None,
}
}
/// Resolve all {{placeholder}} patterns in a prompt template.
/// Returns the resolved text and all node keys collected from placeholders.
pub fn resolve_placeholders(
template: &str,
store: &Store,
graph: &Graph,
keys: &[String],
count: usize,
) -> (String, Vec<String>) {
let mut result = template.to_string();
let mut extra_keys = Vec::new();
loop {
let Some(start) = result.find("{{") else { break };
let Some(end) = result[start + 2..].find("}}") else { break };
let end = start + 2 + end;
let name = result[start + 2..end].trim().to_lowercase();
match resolve(&name, store, graph, keys, count) {
Some(resolved) => {
extra_keys.extend(resolved.keys);
result.replace_range(start..end + 2, &resolved.text);
}
None => {
let msg = format!("(unknown: {})", name);
result.replace_range(start..end + 2, &msg);
}
}
}
(result, extra_keys)
}
/// Run a config-driven agent: query → resolve placeholders → prompt.
pub fn run_agent(
store: &Store,
def: &AgentDef,
count: usize,
) -> Result<super::prompts::AgentBatch, String> {
let graph = store.build_graph();
// Run the query if present
let keys = if !def.query.is_empty() {
let mut stages = search::Stage::parse_pipeline(&def.query)?;
let has_limit = stages.iter().any(|s|
matches!(s, search::Stage::Transform(search::Transform::Limit(_))));
if !has_limit {
stages.push(search::Stage::Transform(search::Transform::Limit(count)));
}
let results = search::run_query(&stages, vec![], &graph, store, false, count);
if results.is_empty() {
return Err(format!("{}: query returned no results", def.agent));
}
results.into_iter().map(|(k, _)| k).collect::<Vec<_>>()
} else {
vec![]
};
let (prompt, extra_keys) = resolve_placeholders(&def.prompt, store, &graph, &keys, count);
// Merge query keys with any keys produced by placeholder resolution
let mut all_keys = keys;
all_keys.extend(extra_keys);
Ok(super::prompts::AgentBatch { prompt, node_keys: all_keys })
}
/// Convert a list of keys to ReplayItems with priority and graph metrics.
pub fn keys_to_replay_items(
store: &Store,
keys: &[String],
graph: &Graph,
) -> Vec<ReplayItem> {
keys.iter()
.filter_map(|key| {
let node = store.nodes.get(key)?;
let priority = consolidation_priority(store, key, graph, None);
let cc = graph.clustering_coefficient(key);
Some(ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
cc,
classification: "unknown",
outlier_score: 0.0,
})
})
.collect()
}
/// Find the largest cluster of nodes sharing a key prefix that hasn't been
/// visited by the generalize agent recently. Returns the cluster's node keys
/// and a formatted section with all their content.
fn find_largest_cluster(store: &Store, graph: &Graph, _count: usize) -> (Vec<String>, String) {
use std::collections::HashMap;
let min_cluster = 5;
// Group non-internal nodes by their key prefix (before #, or first word of kebab-key)
let mut prefix_groups: HashMap<String, Vec<String>> = HashMap::new();
for key in store.nodes.keys() {
if key.starts_with('_') { continue; }
if key.starts_with("journal#") || key.starts_with("daily-") ||
key.starts_with("weekly-") || key.starts_with("monthly-") {
continue;
}
// Extract prefix: "patterns#async-tui-blocking" → "patterns#async-tui"
// Take everything up to the last hyphenated segment after #
let prefix = if let Some(hash_pos) = key.find('#') {
let after_hash = &key[hash_pos + 1..];
if let Some(last_dash) = after_hash.rfind('-') {
format!("{}#{}", &key[..hash_pos], &after_hash[..last_dash])
} else {
key[..hash_pos].to_string()
}
} else {
key.clone()
};
prefix_groups.entry(prefix).or_default().push(key.clone());
}
// Find biggest clusters, preferring unvisited ones
let mut clusters: Vec<(String, Vec<String>)> = prefix_groups.into_iter()
.filter(|(_, keys)| keys.len() >= min_cluster)
.collect();
clusters.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
// Pick the first cluster where most nodes haven't been generalized
let cluster = clusters.into_iter()
.find(|(_, keys)| {
let unvisited = keys.iter()
.filter(|k| {
store.nodes.get(*k)
.map(|n| !n.provenance.contains("generalize"))
.unwrap_or(true)
})
.count();
unvisited > keys.len() / 2
});
let Some((prefix, keys)) = cluster else {
return (vec![], "No clusters found that need generalization.".to_string());
};
// Render all nodes in the cluster
let mut out = format!("### Cluster: `{}*` ({} nodes)\n\n", prefix, keys.len());
let items = keys_to_replay_items(store, &keys, graph);
out.push_str(&super::prompts::format_nodes_section(store, &items, graph));
(keys, out)
}

View file

@ -0,0 +1,495 @@
// Episodic digest generation: daily, weekly, monthly, auto
//
// Three digest levels form a temporal hierarchy: daily digests summarize
// journal entries, weekly digests summarize dailies, monthly digests
// summarize weeklies. All three share the same generate/auto-detect
// pipeline, parameterized by DigestLevel.
use super::llm::{call_sonnet, semantic_keys};
use crate::store::{self, Store, new_relation};
use crate::neuro;
use chrono::{Datelike, Duration, Local, NaiveDate};
use regex::Regex;
use std::collections::BTreeSet;
// --- Digest level descriptors ---
#[allow(clippy::type_complexity)]
struct DigestLevel {
name: &'static str,
title: &'static str,
period: &'static str,
input_title: &'static str,
child_name: Option<&'static str>, // None = journal (leaf), Some = child digest files
/// Expand an arg into (canonical_label, dates covered).
label_dates: fn(&str) -> Result<(String, Vec<String>), String>,
/// Map a YYYY-MM-DD date to this level's label.
date_to_label: fn(&str) -> Option<String>,
}
const DAILY: DigestLevel = DigestLevel {
name: "daily",
title: "Daily",
period: "Date",
input_title: "Journal entries",
child_name: None,
label_dates: |date| Ok((date.to_string(), vec![date.to_string()])),
date_to_label: |date| Some(date.to_string()),
};
/// Week label and 7 dates (Mon-Sun) for the week containing `date`.
fn week_dates(date: &str) -> Result<(String, Vec<String>), String> {
let nd = NaiveDate::parse_from_str(date, "%Y-%m-%d")
.map_err(|e| format!("bad date '{}': {}", date, e))?;
let iso = nd.iso_week();
let week_label = format!("{}-W{:02}", iso.year(), iso.week());
let monday = nd - Duration::days(nd.weekday().num_days_from_monday() as i64);
let dates = (0..7)
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
.collect();
Ok((week_label, dates))
}
const WEEKLY: DigestLevel = DigestLevel {
name: "weekly",
title: "Weekly",
period: "Week",
input_title: "Daily digests",
child_name: Some("daily"),
label_dates: |arg| {
if !arg.contains('W') {
return week_dates(arg);
}
let (y, w) = arg.split_once("-W")
.ok_or_else(|| format!("bad week label: {}", arg))?;
let year: i32 = y.parse().map_err(|_| format!("bad week year: {}", arg))?;
let week: u32 = w.parse().map_err(|_| format!("bad week number: {}", arg))?;
let monday = NaiveDate::from_isoywd_opt(year, week, chrono::Weekday::Mon)
.ok_or_else(|| format!("invalid week: {}", arg))?;
let dates = (0..7)
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
.collect();
Ok((arg.to_string(), dates))
},
date_to_label: |date| week_dates(date).ok().map(|(l, _)| l),
};
const MONTHLY: DigestLevel = DigestLevel {
name: "monthly",
title: "Monthly",
period: "Month",
input_title: "Weekly digests",
child_name: Some("weekly"),
label_dates: |arg| {
let (year, month) = if arg.len() <= 7 {
let d = NaiveDate::parse_from_str(&format!("{}-01", arg), "%Y-%m-%d")
.map_err(|e| format!("bad month '{}': {}", arg, e))?;
(d.year(), d.month())
} else {
let d = NaiveDate::parse_from_str(arg, "%Y-%m-%d")
.map_err(|e| format!("bad date '{}': {}", arg, e))?;
(d.year(), d.month())
};
let label = format!("{}-{:02}", year, month);
let mut dates = Vec::new();
let mut day = 1u32;
while let Some(date) = NaiveDate::from_ymd_opt(year, month, day) {
if date.month() != month { break; }
dates.push(date.format("%Y-%m-%d").to_string());
day += 1;
}
Ok((label, dates))
},
date_to_label: |date| NaiveDate::parse_from_str(date, "%Y-%m-%d")
.ok().map(|d| format!("{}-{:02}", d.year(), d.month())),
};
const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY];
/// Store key for a digest node: "daily-2026-03-04", "weekly-2026-W09", etc.
fn digest_node_key(level_name: &str, label: &str) -> String {
format!("{}-{}", level_name, label)
}
// --- Input gathering ---
/// Load child digest content from the store.
fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> Vec<(String, String)> {
let mut digests = Vec::new();
for label in labels {
let key = digest_node_key(prefix, label);
if let Some(node) = store.nodes.get(&key) {
digests.push((label.clone(), node.content.clone()));
}
}
digests
}
/// Unified: gather inputs for any digest level.
fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<(String, Vec<(String, String)>), String> {
let (label, dates) = (level.label_dates)(arg)?;
let inputs = if let Some(child_name) = level.child_name {
// Map parent's dates through child's date_to_label → child labels
let child = LEVELS.iter()
.find(|l| l.name == child_name)
.expect("invalid child_name");
let child_labels: Vec<String> = dates.iter()
.filter_map(|d| (child.date_to_label)(d))
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
load_child_digests(store, child_name, &child_labels)
} else {
// Leaf level: scan store for episodic entries matching date
let mut entries: Vec<_> = store.nodes.values()
.filter(|n| n.node_type == store::NodeType::EpisodicSession
&& n.timestamp > 0
&& store::format_date(n.timestamp) == label)
.map(|n| {
(store::format_datetime(n.timestamp), n.content.clone())
})
.collect();
entries.sort_by(|a, b| a.0.cmp(&b.0));
entries
};
Ok((label, inputs))
}
/// Unified: find candidate labels for auto-generation (past, not yet generated).
fn find_candidates(level: &DigestLevel, dates: &[String], today: &str) -> Vec<String> {
let today_label = (level.date_to_label)(today);
dates.iter()
.filter_map(|d| (level.date_to_label)(d))
.collect::<BTreeSet<_>>()
.into_iter()
.filter(|l| Some(l) != today_label.as_ref())
.collect()
}
// --- Unified generator ---
fn format_inputs(inputs: &[(String, String)], daily: bool) -> String {
let mut text = String::new();
for (label, content) in inputs {
if daily {
text.push_str(&format!("\n### {}\n\n{}\n", label, content));
} else {
text.push_str(&format!("\n---\n## {}\n{}\n", label, content));
}
}
text
}
fn generate_digest(
store: &mut Store,
level: &DigestLevel,
label: &str,
inputs: &[(String, String)],
) -> Result<(), String> {
println!("Generating {} digest for {}...", level.name, label);
if inputs.is_empty() {
println!(" No inputs found for {}", label);
return Ok(());
}
println!(" {} inputs", inputs.len());
let keys = semantic_keys(store);
let keys_text = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let content = format_inputs(inputs, level.child_name.is_none());
let covered = inputs.iter()
.map(|(l, _)| l.as_str())
.collect::<Vec<_>>()
.join(", ");
let prompt = super::prompts::load_prompt("digest", &[
("{{LEVEL}}", level.title),
("{{PERIOD}}", level.period),
("{{INPUT_TITLE}}", level.input_title),
("{{LABEL}}", label),
("{{CONTENT}}", &content),
("{{COVERED}}", &covered),
("{{KEYS}}", &keys_text),
])?;
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let digest = call_sonnet("digest", &prompt)?;
let key = digest_node_key(level.name, label);
store.upsert_provenance(&key, &digest, "digest:write")?;
store.save()?;
println!(" Stored: {}", key);
println!(" Done: {} lines", digest.lines().count());
Ok(())
}
// --- Public API ---
pub fn generate(store: &mut Store, level_name: &str, arg: &str) -> Result<(), String> {
let level = LEVELS.iter()
.find(|l| l.name == level_name)
.ok_or_else(|| format!("unknown digest level: {}", level_name))?;
let (label, inputs) = gather(level, store, arg)?;
generate_digest(store, level, &label, &inputs)
}
// --- Auto-detect and generate missing digests ---
pub fn digest_auto(store: &mut Store) -> Result<(), String> {
let today = Local::now().format("%Y-%m-%d").to_string();
// Collect all dates with episodic entries
let dates: Vec<String> = store.nodes.values()
.filter(|n| n.node_type == store::NodeType::EpisodicSession && n.timestamp > 0)
.map(|n| store::format_date(n.timestamp))
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
let mut total = 0u32;
for level in LEVELS {
let candidates = find_candidates(level, &dates, &today);
let mut generated = 0u32;
let mut skipped = 0u32;
for arg in &candidates {
let (label, inputs) = gather(level, store, arg)?;
let key = digest_node_key(level.name, &label);
if store.nodes.contains_key(&key) {
skipped += 1;
continue;
}
if inputs.is_empty() { continue; }
println!("[auto] Missing {} digest for {}", level.name, label);
generate_digest(store, level, &label, &inputs)?;
generated += 1;
}
println!("[auto] {}: {} generated, {} existed", level.name, generated, skipped);
total += generated;
}
if total == 0 {
println!("[auto] All digests up to date.");
} else {
println!("[auto] Generated {} total digests.", total);
}
Ok(())
}
// --- Digest link parsing ---
// Replaces digest-link-parser.py: parses ## Links sections from digest
// files and applies them to the memory graph.
/// A parsed link from a digest's Links section.
pub struct DigestLink {
pub source: String,
pub target: String,
pub reason: String,
pub file: String,
}
/// Normalize a raw link target to a poc-memory key.
fn normalize_link_key(raw: &str) -> String {
let key = raw.trim().trim_matches('`').trim();
if key.is_empty() { return String::new(); }
// Self-references
let lower = key.to_lowercase();
if lower.starts_with("this ") { return String::new(); }
let mut key = key.to_string();
// Strip .md suffix if present
if let Some(stripped) = key.strip_suffix(".md") {
key = stripped.to_string();
} else if key.contains('#') {
let (file, section) = key.split_once('#').unwrap();
if let Some(bare) = file.strip_suffix(".md") {
key = format!("{}#{}", bare, section);
}
}
// weekly/2026-W06 → weekly-2026-W06, etc.
if let Some(pos) = key.find('/') {
let prefix = &key[..pos];
if prefix == "daily" || prefix == "weekly" || prefix == "monthly" {
let rest = &key[pos + 1..];
key = format!("{}-{}", prefix, rest);
}
}
// Bare date → daily digest
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
if date_re.is_match(&key) {
key = format!("daily-{}", key);
}
key
}
/// Parse the Links section from a digest node's content.
fn parse_digest_node_links(key: &str, content: &str) -> Vec<DigestLink> {
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
let header_re = Regex::new(r"^##\s+Links").unwrap();
let mut links = Vec::new();
let mut in_links = false;
for line in content.lines() {
if header_re.is_match(line) {
in_links = true;
continue;
}
if in_links && line.starts_with("## ") {
in_links = false;
continue;
}
if !in_links { continue; }
if line.starts_with("###") || line.starts_with("**") { continue; }
if let Some(cap) = link_re.captures(line) {
let raw_source = cap[1].trim();
let raw_target = cap[2].trim();
let reason = cap.get(3).map(|m| m.as_str().to_string()).unwrap_or_default();
let mut source = normalize_link_key(raw_source);
let mut target = normalize_link_key(raw_target);
// Replace self-references with digest key
if source.is_empty() { source = key.to_string(); }
if target.is_empty() { target = key.to_string(); }
// Handle "this daily/weekly/monthly" in raw text
let raw_s_lower = raw_source.to_lowercase();
let raw_t_lower = raw_target.to_lowercase();
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|| raw_s_lower.contains("this monthly")
{
source = key.to_string();
}
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|| raw_t_lower.contains("this monthly")
{
target = key.to_string();
}
// Skip NEW: and self-links
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
if source == target { continue; }
links.push(DigestLink { source, target, reason, file: key.to_string() });
}
}
links
}
/// Parse links from all digest nodes in the store.
pub fn parse_all_digest_links(store: &Store) -> Vec<DigestLink> {
let mut all_links = Vec::new();
let mut digest_keys: Vec<&String> = store.nodes.iter()
.filter(|(_, n)| matches!(n.node_type,
store::NodeType::EpisodicDaily
| store::NodeType::EpisodicWeekly
| store::NodeType::EpisodicMonthly))
.map(|(k, _)| k)
.collect();
digest_keys.sort();
for key in digest_keys {
if let Some(node) = store.nodes.get(key) {
all_links.extend(parse_digest_node_links(key, &node.content));
}
}
// Deduplicate by (source, target) pair
let mut seen = std::collections::HashSet::new();
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
all_links
}
/// Apply parsed digest links to the store.
pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, usize, usize) {
let mut applied = 0usize;
let mut skipped = 0usize;
let mut fallbacks = 0usize;
for link in links {
// Try resolving both keys
let source = match store.resolve_key(&link.source) {
Ok(s) => s,
Err(_) => {
// Try stripping section anchor as fallback
if let Some(base) = link.source.split('#').next() {
match store.resolve_key(base) {
Ok(s) => { fallbacks += 1; s }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
let target = match store.resolve_key(&link.target) {
Ok(t) => t,
Err(_) => {
if let Some(base) = link.target.split('#').next() {
match store.resolve_key(base) {
Ok(t) => { fallbacks += 1; t }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
// Refine target to best-matching section if available
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(store, source_content, &target);
if source == target { skipped += 1; continue; }
// Check if link already exists
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);
if exists { skipped += 1; continue; }
let source_uuid = match store.nodes.get(&source) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let target_uuid = match store.nodes.get(&target) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link,
0.5,
&source, &target,
);
if store.add_relation(rel).is_ok() {
println!(" + {}{}", source, target);
applied += 1;
}
}
(applied, skipped, fallbacks)
}

View file

@ -0,0 +1,393 @@
// Journal enrichment and experience mining
//
// Two modes of processing conversation transcripts:
// journal_enrich — enrich a specific journal entry with source location and links
// experience_mine — retroactively find experiential moments not yet journaled
//
// Both extract conversation from JSONL transcripts, build prompts, call Sonnet,
// and apply results to the store.
use super::llm::{call_sonnet, parse_json_response, semantic_keys};
use crate::neuro;
use crate::store::{self, Store, new_node, new_relation};
use std::collections::hash_map::DefaultHasher;
use std::collections::HashSet;
use std::fs;
use std::hash::{Hash, Hasher};
use crate::store::StoreView;
use crate::util::parse_timestamp_to_epoch;
/// Compute the store dedup key for a transcript file.
/// This is the same key experience_mine uses to mark a transcript as mined.
fn transcript_dedup_key(path: &str) -> Result<String, String> {
let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?;
let mut hasher = DefaultHasher::new();
bytes.hash(&mut hasher);
Ok(format!("_mined-transcripts#h-{:016x}", hasher.finish()))
}
/// Check if a transcript has already been mined (dedup key exists in store).
pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool {
match transcript_dedup_key(path) {
Ok(key) => store.node_content(&key).is_some(),
Err(_) => false,
}
}
/// Dedup key for a transcript based on its filename (UUID).
/// Used by the daemon reconcile loop — no file reads needed.
pub fn transcript_filename_key(path: &str) -> String {
let filename = std::path::Path::new(path)
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| path.to_string());
format!("_mined-transcripts#f-{}", filename)
}
/// Get the set of all mined transcript keys (both content-hash and filename)
/// from the store. Load once per daemon tick, check many.
pub fn mined_transcript_keys() -> HashSet<String> {
use crate::store::AnyView;
let Ok(view) = AnyView::load() else { return HashSet::new() };
let mut keys = HashSet::new();
view.for_each_node(|key, _, _| {
if key.starts_with("_mined-transcripts#") {
keys.insert(key.to_string());
}
});
keys
}
/// Extract user/assistant messages with line numbers from a JSONL transcript.
/// (line_number, role, text, timestamp)
pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
let path = std::path::Path::new(jsonl_path);
let messages = super::transcript::parse_transcript(path)?;
Ok(messages.into_iter()
.map(|m| (m.line, m.role, m.text, m.timestamp))
.collect())
}
pub const COMPACTION_MARKER: &str = "This session is being continued from a previous conversation that ran out of context";
/// Split extracted messages into segments at compaction boundaries.
/// Each segment represents one continuous conversation before context was compacted.
pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Vec<Vec<(usize, String, String, String)>> {
let mut segments: Vec<Vec<(usize, String, String, String)>> = Vec::new();
let mut current = Vec::new();
for msg in messages {
if msg.1 == "user" && msg.2.starts_with(COMPACTION_MARKER) {
if !current.is_empty() {
segments.push(current);
current = Vec::new();
}
// The continuation message itself is part of the new segment
current.push(msg);
} else {
current.push(msg);
}
}
if !current.is_empty() {
segments.push(current);
}
segments
}
/// Format conversation messages for the prompt (truncating long messages).
fn format_conversation(messages: &[(usize, String, String, String)]) -> String {
messages.iter()
.map(|(line, role, text, ts)| {
let text = crate::util::truncate(text, 1800, "...[truncated]");
if ts.is_empty() {
format!("L{} [{}]: {}", line, role, text)
} else {
format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text)
}
})
.collect::<Vec<_>>()
.join("\n\n")
}
fn build_journal_prompt(
entry_text: &str,
conversation: &str,
keys: &[String],
grep_line: usize,
) -> Result<String, String> {
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
super::prompts::load_prompt("journal-enrich", &[
("{{GREP_LINE}}", &grep_line.to_string()),
("{{ENTRY_TEXT}}", entry_text),
("{{KEYS}}", &keys_text),
("{{CONVERSATION}}", conversation),
])
}
/// Enrich a journal entry with conversation context and link proposals.
pub fn journal_enrich(
store: &mut Store,
jsonl_path: &str,
entry_text: &str,
grep_line: usize,
) -> Result<(), String> {
println!("Extracting conversation from {}...", jsonl_path);
let messages = extract_conversation(jsonl_path)?;
let conversation = format_conversation(&messages);
println!(" {} messages, {} chars", messages.len(), conversation.len());
let keys = semantic_keys(store);
println!(" {} semantic keys", keys.len());
let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?;
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let response = call_sonnet("enrich", &prompt)?;
let result = parse_json_response(&response)?;
// Report results
let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0);
let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0);
let links = result.get("links").and_then(|v| v.as_array());
let insights = result.get("missed_insights").and_then(|v| v.as_array());
println!(" Source: L{}-L{}", source_start, source_end);
println!(" Links: {}", links.map_or(0, |l| l.len()));
println!(" Missed insights: {}", insights.map_or(0, |l| l.len()));
// Apply links
if let Some(links) = links {
for link in links {
let target = link.get("target").and_then(|v| v.as_str()).unwrap_or("");
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
if target.is_empty() || target.starts_with("NOTE:") {
if let Some(note) = target.strip_prefix("NOTE:") {
println!(" NOTE: {}{}", note, reason);
}
continue;
}
// Resolve target and find journal node
let resolved = match store.resolve_key(target) {
Ok(r) => r,
Err(_) => { println!(" SKIP {} (not in graph)", target); continue; }
};
let source_key = match store.find_journal_node(entry_text) {
Some(k) => k,
None => { println!(" SKIP {} (no matching journal node)", target); continue; }
};
// Refine target to best-matching section
let source_content = store.nodes.get(&source_key)
.map(|n| n.content.as_str()).unwrap_or("");
let resolved = neuro::refine_target(store, source_content, &resolved);
let source_uuid = match store.nodes.get(&source_key) {
Some(n) => n.uuid,
None => continue,
};
let target_uuid = match store.nodes.get(&resolved) {
Some(n) => n.uuid,
None => continue,
};
let rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link,
0.5,
&source_key, &resolved,
);
if store.add_relation(rel).is_ok() {
println!(" LINK {}{} ({})", source_key, resolved, reason);
}
}
}
store.save()?;
Ok(())
}
/// Mine a conversation transcript for experiential moments not yet journaled.
/// If `segment` is Some, only process that compaction segment of the file.
pub fn experience_mine(
store: &mut Store,
jsonl_path: &str,
segment: Option<usize>,
) -> Result<usize, String> {
println!("Experience mining: {}", jsonl_path);
// Transcript-level dedup: hash the file content and check if already mined
let transcript_bytes = fs::read(jsonl_path)
.map_err(|e| format!("reading transcript: {}", e))?;
let mut hasher = DefaultHasher::new();
transcript_bytes.hash(&mut hasher);
let hash = hasher.finish();
let dedup_key = format!("_mined-transcripts#h-{:016x}", hash);
if store.nodes.contains_key(&dedup_key) {
// Backfill per-segment key if called with a specific segment
if let Some(idx) = segment {
let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx);
if !store.nodes.contains_key(&seg_key) {
let mut node = new_node(&seg_key, &format!("Backfilled from {}", dedup_key));
node.provenance = "experience-mine:write".to_string();
let _ = store.upsert_node(node);
store.save()?;
}
}
println!(" Already mined this transcript ({}), skipping.", &dedup_key[24..]);
return Ok(0);
}
let all_messages = extract_conversation(jsonl_path)?;
// If segment is specified, extract just that segment; otherwise process all messages
let messages = match segment {
Some(idx) => {
let segments = split_on_compaction(all_messages);
segments.into_iter().nth(idx)
.ok_or_else(|| format!("segment {} out of range", idx))?
}
None => all_messages,
};
let conversation = format_conversation(&messages);
println!(" {} messages, {} chars", messages.len(), conversation.len());
// Load core identity nodes for context
let cfg = crate::config::get();
let identity: String = cfg.core_nodes.iter()
.filter_map(|k| store.nodes.get(k).map(|n| n.content.as_str()))
.collect::<Vec<_>>()
.join("\n\n");
// Get recent episodic entries to avoid duplication
let mut journal: Vec<_> = store.nodes.values()
.filter(|node| matches!(node.node_type, store::NodeType::EpisodicSession))
.collect();
journal.sort_by_key(|n| n.timestamp);
let recent: String = journal.iter().rev().take(10)
.map(|n| format!("---\n{}\n", n.content))
.collect();
let keys = semantic_keys(store);
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let prompt = super::prompts::load_prompt("experience", &[
("{{IDENTITY}}", &identity),
("{{RECENT_JOURNAL}}", &recent),
("{{KEYS}}", &keys_text),
("{{CONVERSATION}}", &conversation),
])?;
let est_tokens = prompt.len() / 4;
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), est_tokens);
if est_tokens > 150_000 {
println!(" Skipping: prompt too large ({} tokens > 150k limit)", est_tokens);
return Ok(0);
}
println!(" Calling Sonnet...");
let response = call_sonnet("experience-mine", &prompt)?;
let entries = parse_json_response(&response)?;
let entries = match entries.as_array() {
Some(arr) => arr.clone(),
None => return Err("expected JSON array".to_string()),
};
if entries.is_empty() {
println!(" No missed experiences found.");
} else {
println!(" Found {} experiential moments:", entries.len());
}
let mut count = 0;
for entry in &entries {
let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or("");
let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
if content.is_empty() { continue; }
// Format with timestamp header
let full_content = if ts.is_empty() {
content.to_string()
} else {
format!("## {}\n\n{}", ts, content)
};
// Generate key from timestamp
let key_slug: String = content.chars()
.filter(|c| c.is_alphanumeric() || *c == ' ')
.take(50)
.collect::<String>()
.trim()
.to_lowercase()
.replace(' ', "-");
let key = if ts.is_empty() {
format!("journal#j-mined-{}", key_slug)
} else {
format!("journal#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
};
// Check for duplicate
if store.nodes.contains_key(&key) {
println!(" SKIP {} (duplicate)", key);
continue;
}
// Write to store — use event timestamp, not mining time
let mut node = new_node(&key, &full_content);
node.node_type = store::NodeType::EpisodicSession;
node.provenance = "experience-mine:write".to_string();
if !ts.is_empty() {
if let Some(epoch) = parse_timestamp_to_epoch(ts) {
node.created_at = epoch;
}
}
let _ = store.upsert_node(node);
count += 1;
let preview = crate::util::truncate(content, 77, "...");
println!(" + [{}] {}", ts, preview);
}
// Record this transcript/segment as mined (even if count == 0, to prevent re-runs)
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
match segment {
Some(idx) => {
// Per-segment key: the daemon writes the whole-file key when all segments are done
let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx);
let mut node = new_node(&seg_key, &dedup_content);
node.provenance = "experience-mine:write".to_string();
let _ = store.upsert_node(node);
}
None => {
// Unsegmented: only write content-hash key (not the filename key, since the
// file may grow with new compaction segments later — the daemon handles
// writing the whole-file filename key after verifying all segments are done)
let mut node = new_node(&dedup_key, &dedup_content);
node.provenance = "experience-mine:write".to_string();
let _ = store.upsert_node(node);
}
}
if count > 0 {
println!(" Saved {} new journal entries.", count);
}
store.save()?;
println!("Done: {} new entries mined.", count);
Ok(count)
}

View file

@ -0,0 +1,303 @@
// fact_mine.rs — extract atomic factual claims from conversation transcripts
//
// Chunks conversation text into overlapping windows, sends each to Haiku
// for extraction, deduplicates by claim text. Output: JSON array of facts.
//
// Uses Haiku (not Sonnet) for cost efficiency on high-volume extraction.
use crate::config;
use super::llm;
use super::transcript;
use crate::store;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::path::Path;
const CHARS_PER_TOKEN: usize = 4;
const WINDOW_TOKENS: usize = 2000;
const OVERLAP_TOKENS: usize = 200;
const WINDOW_CHARS: usize = WINDOW_TOKENS * CHARS_PER_TOKEN;
const OVERLAP_CHARS: usize = OVERLAP_TOKENS * CHARS_PER_TOKEN;
fn extraction_prompt() -> String {
let cfg = config::get();
format!(
r#"Extract atomic factual claims from this conversation excerpt.
Speakers are labeled [{user}] and [{assistant}] in the transcript.
Use their proper names in claims not "the user" or "the assistant."
Each claim should be:
- A single verifiable statement
- Specific enough to be useful in isolation
- Tagged with domain (e.g., bcachefs/btree, bcachefs/alloc, bcachefs/journal,
bcachefs/ec, bcachefs/reconcile, rust/idioms, workflow/preferences,
linux/kernel, memory/design, identity/personal)
- Tagged with confidence: "stated" (explicitly said), "implied" (logically follows),
or "speculative" (hypothesis, not confirmed)
- Include which speaker said it ("{user}", "{assistant}", or "Unknown")
Do NOT extract:
- Opinions or subjective assessments
- Conversational filler or greetings
- Things that are obviously common knowledge
- Restatements of the same fact (pick the clearest version)
- System messages, tool outputs, or error logs (extract what was LEARNED from them)
- Anything about the conversation itself ("{user} and {assistant} discussed...")
- Facts only relevant to this specific conversation (e.g. transient file paths, mid-debug state)
Output as a JSON array. Each element:
{{
"claim": "the exact factual statement",
"domain": "category/subcategory",
"confidence": "stated|implied|speculative",
"speaker": "{user}|{assistant}|Unknown"
}}
If the excerpt contains no extractable facts, output an empty array: []
--- CONVERSATION EXCERPT ---
"#, user = cfg.user_name, assistant = cfg.assistant_name)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Fact {
pub claim: String,
pub domain: String,
pub confidence: String,
pub speaker: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub source_file: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub source_chunk: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub source_offset: Option<usize>,
}
/// Extract user/assistant text messages from a JSONL transcript.
fn extract_messages(path: &Path) -> Vec<transcript::TranscriptMessage> {
transcript::parse_transcript(path)
.unwrap_or_default()
.into_iter()
.filter(|m| m.text.len() >= 20)
.collect()
}
/// Format messages into a single text for chunking.
fn format_for_extraction(messages: &[transcript::TranscriptMessage]) -> String {
let cfg = config::get();
messages.iter()
.map(|msg| {
let role = if msg.role == "user" { &cfg.user_name } else { &cfg.assistant_name };
let text = crate::util::truncate(&msg.text, 2800, "\n[...truncated...]");
let ts = if msg.timestamp.len() >= 19 { &msg.timestamp[..19] } else { "" };
if ts.is_empty() {
format!("[{}] {}", role, text)
} else {
format!("[{} {}] {}", role, ts, text)
}
})
.collect::<Vec<_>>()
.join("\n\n")
}
/// Split text into overlapping windows, breaking at paragraph boundaries.
fn chunk_text(text: &str) -> Vec<(usize, &str)> {
let mut chunks = Vec::new();
let mut start = 0;
while start < text.len() {
let mut end = text.floor_char_boundary((start + WINDOW_CHARS).min(text.len()));
// Try to break at a paragraph boundary
if end < text.len() {
if let Some(para) = text[start..end].rfind("\n\n") {
if para > WINDOW_CHARS / 2 {
end = start + para;
}
}
}
chunks.push((start, &text[start..end]));
let next = text.floor_char_boundary(end.saturating_sub(OVERLAP_CHARS));
if next <= start {
start = end;
} else {
start = next;
}
}
chunks
}
/// Parse JSON facts from model response.
fn parse_facts(response: &str) -> Vec<Fact> {
let cleaned = response.trim();
// Strip markdown code block
let cleaned = if cleaned.starts_with("```") {
cleaned.lines()
.filter(|l| !l.starts_with("```"))
.collect::<Vec<_>>()
.join("\n")
} else {
cleaned.to_string()
};
// Find JSON array
let start = cleaned.find('[');
let end = cleaned.rfind(']');
let (Some(start), Some(end)) = (start, end) else { return Vec::new() };
serde_json::from_str(&cleaned[start..=end]).unwrap_or_default()
}
/// Mine a single transcript for atomic facts.
/// The optional `progress` callback receives status strings (e.g. "chunk 3/47").
pub fn mine_transcript(
path: &Path,
dry_run: bool,
progress: Option<&dyn Fn(&str)>,
) -> Result<Vec<Fact>, String> {
let filename = path.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".into());
let log = |msg: &str| {
eprintln!("{}", msg);
if let Some(cb) = progress { cb(msg); }
};
log(&format!("Mining: {}", filename));
let messages = extract_messages(path);
if messages.is_empty() {
log("No messages found");
return Ok(Vec::new());
}
log(&format!("{} messages extracted", messages.len()));
let text = format_for_extraction(&messages);
let chunks = chunk_text(&text);
log(&format!("{} chunks ({} chars)", chunks.len(), text.len()));
if dry_run {
for (i, (offset, chunk)) in chunks.iter().enumerate() {
eprintln!("\n--- Chunk {} (offset {}, {} chars) ---", i + 1, offset, chunk.len());
eprintln!("{}", crate::util::truncate(chunk, 500, ""));
if chunk.len() > 500 {
eprintln!(" ... ({} more chars)", chunk.len() - 500);
}
}
return Ok(Vec::new());
}
let prompt_prefix = extraction_prompt();
let mut all_facts = Vec::new();
for (i, (_offset, chunk)) in chunks.iter().enumerate() {
let status = format!("chunk {}/{} ({} chars)", i + 1, chunks.len(), chunk.len());
eprint!(" {}...", status);
if let Some(cb) = progress { cb(&status); }
let prompt = format!("{}{}\n\n--- END OF EXCERPT ---\n\nReturn ONLY a JSON array of factual claims, or [] if none.", prompt_prefix, chunk);
let response = match llm::call_haiku("fact-mine", &prompt) {
Ok(r) => r,
Err(e) => {
eprintln!(" error: {}", e);
continue;
}
};
let mut facts = parse_facts(&response);
for fact in &mut facts {
fact.source_file = Some(filename.clone());
fact.source_chunk = Some(i + 1);
fact.source_offset = Some(*_offset);
}
eprintln!(" {} facts", facts.len());
all_facts.extend(facts);
}
// Deduplicate by claim text
let mut seen = HashSet::new();
let before = all_facts.len();
all_facts.retain(|f| seen.insert(f.claim.to_lowercase()));
let dupes = before - all_facts.len();
if dupes > 0 {
log(&format!("{} duplicates removed", dupes));
}
log(&format!("Total: {} unique facts", all_facts.len()));
Ok(all_facts)
}
/// Mine a transcript and store facts in the capnp store.
/// Returns the number of facts stored.
/// The optional `progress` callback receives status strings for daemon display.
pub fn mine_and_store(
path: &Path,
progress: Option<&dyn Fn(&str)>,
) -> Result<usize, String> {
let facts = mine_transcript(path, false, progress)?;
let filename = path.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".into());
let proposed_key = format!("_facts-{}", filename.trim_end_matches(".jsonl"));
// Always write a marker so we don't re-queue empty transcripts
let json = if facts.is_empty() {
"[]".to_string()
} else {
serde_json::to_string_pretty(&facts)
.map_err(|e| format!("serialize facts: {}", e))?
};
let mut store = store::Store::load()?;
// Run naming resolution to get a good key (and possibly merge into existing)
let resolution = super::knowledge::resolve_naming(&store, &proposed_key, &json);
let key = match resolution {
super::knowledge::NamingResolution::Create(k) => k,
super::knowledge::NamingResolution::MergeInto(existing_key) => {
// Merge: append facts to existing node's content
eprintln!(" Merging facts into existing node: {}", existing_key);
if let Some(node) = store.nodes.get(existing_key.as_str()) {
let merged = format!("{}\n\n{}", node.content, json);
store.upsert_provenance(&existing_key, &merged, "fact-mine:write")?;
store.save()?;
return Ok(facts.len());
}
// Fallback if existing node disappeared
proposed_key
}
};
store.upsert_provenance(&key, &json, "fact-mine:write")?;
store.save()?;
eprintln!(" Stored {} facts as {}", facts.len(), key);
Ok(facts.len())
}
/// Mine transcripts, returning all facts. Skips files with fewer than min_messages.
pub fn mine_batch(paths: &[&Path], min_messages: usize, dry_run: bool) -> Result<Vec<Fact>, String> {
let mut all_facts = Vec::new();
for path in paths {
let messages = extract_messages(path);
if messages.len() < min_messages {
eprintln!("Skipping {} ({} messages < {})",
path.file_name().map(|n| n.to_string_lossy()).unwrap_or_default(),
messages.len(), min_messages);
continue;
}
let facts = mine_transcript(path, dry_run, None)?;
all_facts.extend(facts);
}
Ok(all_facts)
}

View file

@ -0,0 +1,972 @@
// knowledge.rs — knowledge agent action parsing, depth tracking, and convergence loop
//
// Agent prompts live in agents/*.agent files, dispatched via defs.rs.
// This module handles:
// - Action parsing (WRITE_NODE, LINK, REFINE from LLM output)
// - Inference depth tracking (prevents runaway abstraction)
// - Action application (write to store with provenance)
// - Convergence loop (sequences agents, measures graph stability)
// - Conversation fragment selection (for observation agent)
use crate::graph::Graph;
use super::llm;
use crate::spectral;
use crate::store::{self, Store, new_relation, RelationType};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
// ---------------------------------------------------------------------------
// Action types
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Action {
pub kind: ActionKind,
pub confidence: Confidence,
pub weight: f64,
pub depth: i32,
pub applied: Option<bool>,
pub rejected_reason: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ActionKind {
WriteNode {
key: String,
content: String,
covers: Vec<String>,
},
Link {
source: String,
target: String,
},
Refine {
key: String,
content: String,
},
Demote {
key: String,
},
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Confidence {
High,
Medium,
Low,
}
impl Confidence {
/// Weight for delta metrics — how much this action contributes to change measurement.
fn delta_weight(self) -> f64 {
match self {
Self::High => 1.0,
Self::Medium => 0.6,
Self::Low => 0.3,
}
}
/// Confidence value for depth gating — capped below 1.0 so even "high" must clear thresholds.
fn gate_value(self) -> f64 {
match self {
Self::High => 0.9,
Self::Medium => 0.6,
Self::Low => 0.3,
}
}
fn parse(s: &str) -> Self {
match s.to_lowercase().as_str() {
"high" => Self::High,
"low" => Self::Low,
_ => Self::Medium,
}
}
}
// ---------------------------------------------------------------------------
// Action parsing
// ---------------------------------------------------------------------------
pub fn parse_write_nodes(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?s)WRITE_NODE\s+(\S+)\s*\n(.*?)END_NODE").unwrap();
let conf_re = Regex::new(r"(?i)CONFIDENCE:\s*(high|medium|low)").unwrap();
let covers_re = Regex::new(r"COVERS:\s*(.+)").unwrap();
re.captures_iter(text)
.map(|cap| {
let key = cap[1].to_string();
let mut content = cap[2].trim().to_string();
let confidence = conf_re
.captures(&content)
.map(|c| Confidence::parse(&c[1]))
.unwrap_or(Confidence::Medium);
content = conf_re.replace(&content, "").trim().to_string();
let covers: Vec<String> = covers_re
.captures(&content)
.map(|c| c[1].split(',').map(|s| s.trim().to_string()).collect())
.unwrap_or_default();
content = covers_re.replace(&content, "").trim().to_string();
Action {
weight: confidence.delta_weight(),
kind: ActionKind::WriteNode { key, content, covers },
confidence,
depth: 0,
applied: None,
rejected_reason: None,
}
})
.collect()
}
pub fn parse_links(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?m)^LINK\s+(\S+)\s+(\S+)").unwrap();
re.captures_iter(text)
.map(|cap| Action {
kind: ActionKind::Link {
source: cap[1].to_string(),
target: cap[2].to_string(),
},
confidence: Confidence::Low,
weight: 0.3,
depth: -1,
applied: None,
rejected_reason: None,
})
.collect()
}
pub fn parse_refines(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?s)REFINE\s+(\S+)\s*\n(.*?)END_REFINE").unwrap();
re.captures_iter(text)
.map(|cap| {
let key = cap[1].trim_matches('*').trim().to_string();
Action {
kind: ActionKind::Refine {
key,
content: cap[2].trim().to_string(),
},
confidence: Confidence::Medium,
weight: 0.7,
depth: 0,
applied: None,
rejected_reason: None,
}
})
.collect()
}
pub fn parse_demotes(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?m)^DEMOTE\s+(\S+)").unwrap();
re.captures_iter(text)
.map(|cap| Action {
kind: ActionKind::Demote {
key: cap[1].to_string(),
},
confidence: Confidence::Medium,
weight: 0.5,
depth: -1,
applied: None,
rejected_reason: None,
})
.collect()
}
pub fn parse_all_actions(text: &str) -> Vec<Action> {
let mut actions = parse_write_nodes(text);
actions.extend(parse_links(text));
actions.extend(parse_refines(text));
actions.extend(parse_demotes(text));
actions
}
pub fn count_no_ops(text: &str) -> usize {
let no_conn = Regex::new(r"\bNO_CONNECTION\b").unwrap().find_iter(text).count();
let affirm = Regex::new(r"\bAFFIRM\b").unwrap().find_iter(text).count();
let no_extract = Regex::new(r"\bNO_EXTRACTION\b").unwrap().find_iter(text).count();
no_conn + affirm + no_extract
}
// ---------------------------------------------------------------------------
// Inference depth tracking
// ---------------------------------------------------------------------------
const DEPTH_DB_KEY: &str = "_knowledge-depths";
#[derive(Default)]
pub struct DepthDb {
depths: HashMap<String, i32>,
}
impl DepthDb {
pub fn load(store: &Store) -> Self {
let depths = store.nodes.get(DEPTH_DB_KEY)
.and_then(|n| serde_json::from_str(&n.content).ok())
.unwrap_or_default();
Self { depths }
}
pub fn save(&self, store: &mut Store) {
if let Ok(json) = serde_json::to_string(&self.depths) {
store.upsert_provenance(DEPTH_DB_KEY, &json,
"observation:write").ok();
}
}
pub fn get(&self, key: &str) -> i32 {
self.depths.get(key).copied().unwrap_or(0)
}
pub fn set(&mut self, key: String, depth: i32) {
self.depths.insert(key, depth);
}
}
/// Agent base depths: observation=1, extractor=2, connector=3
fn agent_base_depth(agent: &str) -> Option<i32> {
match agent {
"observation" => Some(1),
"extractor" => Some(2),
"generalize" => Some(3),
"connector" => Some(3),
"challenger" => None,
_ => Some(2),
}
}
pub fn compute_action_depth(db: &DepthDb, action: &Action, agent: &str) -> i32 {
match &action.kind {
ActionKind::Link { .. } | ActionKind::Demote { .. } => -1,
ActionKind::Refine { key, .. } => db.get(key),
ActionKind::WriteNode { covers, .. } => {
if !covers.is_empty() {
covers.iter().map(|k| db.get(k)).max().unwrap_or(0) + 1
} else {
agent_base_depth(agent).unwrap_or(2)
}
}
}
}
/// Confidence threshold that scales with inference depth.
pub fn required_confidence(depth: i32, base: f64) -> f64 {
if depth <= 0 {
return 0.0;
}
1.0 - (1.0 - base).powi(depth)
}
/// Confidence bonus from real-world use.
pub fn use_bonus(use_count: u32) -> f64 {
if use_count == 0 {
return 0.0;
}
1.0 - 1.0 / (1.0 + 0.15 * use_count as f64)
}
// ---------------------------------------------------------------------------
// Action application
// ---------------------------------------------------------------------------
fn stamp_content(content: &str, agent: &str, timestamp: &str, depth: i32) -> String {
format!("<!-- author: {} | created: {} | depth: {} -->\n{}", agent, timestamp, depth, content)
}
/// Check if a link already exists between two keys.
fn has_edge(store: &Store, source: &str, target: &str) -> bool {
store.relations.iter().any(|r| {
!r.deleted
&& ((r.source_key == source && r.target_key == target)
|| (r.source_key == target && r.target_key == source))
})
}
pub fn apply_action(
store: &mut Store,
action: &Action,
agent: &str,
timestamp: &str,
depth: i32,
) -> bool {
match &action.kind {
ActionKind::WriteNode { key, content, .. } => {
let stamped = stamp_content(content, agent, timestamp, depth);
let prov = format!("{}:write", agent);
store.upsert_provenance(key, &stamped, &prov).is_ok()
}
ActionKind::Link { source, target } => {
if has_edge(store, source, target) {
return false;
}
let source_uuid = match store.nodes.get(source.as_str()) {
Some(n) => n.uuid,
None => return false,
};
let target_uuid = match store.nodes.get(target.as_str()) {
Some(n) => n.uuid,
None => return false,
};
let mut rel = new_relation(
source_uuid, target_uuid,
RelationType::Link,
0.3,
source, target,
);
rel.provenance = format!("{}:link", agent);
store.add_relation(rel).is_ok()
}
ActionKind::Refine { key, content } => {
let stamped = stamp_content(content, agent, timestamp, depth);
let prov = format!("{}:refine", agent);
store.upsert_provenance(key, &stamped, &prov).is_ok()
}
ActionKind::Demote { key } => {
if let Some(node) = store.nodes.get_mut(key) {
node.provenance = format!("{}:demote", agent);
node.weight = (node.weight * 0.5).max(0.05);
true
} else {
false
}
}
}
}
fn agent_provenance(agent: &str) -> String {
match agent {
"observation" => "agent:knowledge-observation".to_string(),
"extractor" | "pattern" => "agent:knowledge-pattern".to_string(),
"generalize" => "agent:knowledge-generalize".to_string(),
"connector" => "agent:knowledge-connector".to_string(),
"challenger" => "agent:knowledge-challenger".to_string(),
_ => format!("agent:{}", agent),
}
}
// ---------------------------------------------------------------------------
// Naming resolution — called before creating any new node
// ---------------------------------------------------------------------------
/// Resolution from the naming agent.
#[derive(Debug)]
pub enum NamingResolution {
/// Create with the proposed key (or a better one).
Create(String),
/// Merge content into an existing node instead.
MergeInto(String),
}
/// Find existing nodes that might conflict with a proposed new node.
/// Returns up to `limit` (key, content_preview) pairs.
fn find_conflicts(
store: &Store,
proposed_key: &str,
proposed_content: &str,
limit: usize,
) -> Vec<(String, String)> {
use std::collections::BTreeMap;
// Extract search terms from the key (split on separators) and first ~200 chars of content
let mut terms: BTreeMap<String, f64> = BTreeMap::new();
for part in proposed_key.split(|c: char| c == '-' || c == '_' || c == '#' || c == '.') {
let p = part.to_lowercase();
if p.len() >= 3 {
terms.insert(p, 1.0);
}
}
// Add a few content terms
let content_terms = crate::search::extract_query_terms(proposed_content, 5);
for term in content_terms.split_whitespace() {
terms.entry(term.to_string()).or_insert(0.5);
}
if terms.is_empty() {
return Vec::new();
}
// Use component matching to find related nodes
let (seeds, _) = crate::search::match_seeds_opts(&terms, store, true, false);
let mut results: Vec<(String, f64)> = seeds.into_iter()
.filter(|(k, _)| k != proposed_key)
.collect();
results.sort_by(|a, b| b.1.total_cmp(&a.1));
results.into_iter()
.take(limit)
.filter_map(|(key, _)| {
let node = store.nodes.get(key.as_str())?;
let preview: String = node.content.chars().take(200).collect();
Some((key, preview))
})
.collect()
}
/// Format the naming prompt for a proposed node.
fn format_naming_prompt(
proposed_key: &str,
proposed_content: &str,
conflicts: &[(String, String)],
) -> String {
let conflict_section = if conflicts.is_empty() {
"(no existing nodes found with overlapping content)".to_string()
} else {
conflicts.iter()
.map(|(key, preview)| format!("### `{}`\n\n{}", key, preview))
.collect::<Vec<_>>()
.join("\n\n")
};
// Truncate content for the prompt (don't send huge nodes to Haiku)
let content_preview: String = proposed_content.chars().take(1000).collect();
format!(
"# Naming Agent — Node Key Resolution\n\n\
You are given a proposed new node (key + content) and a list of existing\n\
nodes that might overlap with it. Decide what to do:\n\n\
1. **CREATE** the proposed key is good and there's no meaningful overlap.\n\
2. **RENAME** the content is unique but the key is bad (UUID, truncated, generic).\n\
3. **MERGE_INTO** an existing node already covers this content.\n\n\
Good keys: 2-5 words in kebab-case, optionally with `#` subtopic.\n\
Bad keys: UUIDs, single generic words, truncated auto-slugs.\n\n\
Respond with exactly ONE line: `CREATE key`, `RENAME better_key`, or `MERGE_INTO existing_key`.\n\n\
## Proposed node\n\n\
Key: `{}`\n\n\
Content:\n```\n{}\n```\n\n\
## Existing nodes that might overlap\n\n\
{}",
proposed_key, content_preview, conflict_section,
)
}
/// Parse naming agent response.
fn parse_naming_response(response: &str) -> Option<NamingResolution> {
for line in response.lines() {
// Strip backticks — Haiku sometimes wraps the response line in them
let trimmed = line.trim().trim_matches('`').trim();
if let Some(key) = trimmed.strip_prefix("CREATE ") {
return Some(NamingResolution::Create(key.trim().trim_matches('`').to_string()));
}
if let Some(key) = trimmed.strip_prefix("RENAME ") {
return Some(NamingResolution::Create(key.trim().trim_matches('`').to_string()));
}
if let Some(key) = trimmed.strip_prefix("MERGE_INTO ") {
return Some(NamingResolution::MergeInto(key.trim().trim_matches('`').to_string()));
}
}
None
}
/// Resolve naming for a proposed WriteNode action.
///
/// Searches for conflicts, calls the naming LLM (Haiku), and returns
/// either a Create (possibly with a better key) or MergeInto resolution.
/// On LLM failure, falls through to using the proposed key as-is.
pub fn resolve_naming(
store: &Store,
proposed_key: &str,
proposed_content: &str,
) -> NamingResolution {
let conflicts = find_conflicts(store, proposed_key, proposed_content, 5);
let prompt = format_naming_prompt(proposed_key, proposed_content, &conflicts);
match llm::call_model("naming", "sonnet", &prompt) {
Ok(response) => {
match parse_naming_response(&response) {
Some(resolution) => resolution,
None => {
eprintln!("naming: unparseable response, using proposed key");
NamingResolution::Create(proposed_key.to_string())
}
}
}
Err(e) => {
eprintln!("naming: LLM error ({}), using proposed key", e);
NamingResolution::Create(proposed_key.to_string())
}
}
}
// ---------------------------------------------------------------------------
// Shared agent execution
// ---------------------------------------------------------------------------
/// Result of running a single agent through the common pipeline.
pub struct AgentResult {
pub output: String,
pub actions: Vec<Action>,
pub no_ops: usize,
pub node_keys: Vec<String>,
}
/// Resolve naming for all WriteNode actions in a list.
///
/// For each WriteNode, calls the naming agent to check for conflicts and
/// get a good key. May convert WriteNode → Refine (if MERGE_INTO) or
/// update the key (if RENAME/CREATE with different key).
pub fn resolve_action_names(store: &Store, actions: Vec<Action>) -> Vec<Action> {
actions.into_iter().map(|action| {
match &action.kind {
ActionKind::WriteNode { key, content, covers } => {
match resolve_naming(store, key, content) {
NamingResolution::Create(new_key) => {
if new_key == *key {
action // keep as-is
} else {
eprintln!("naming: {}{}", key, new_key);
Action {
kind: ActionKind::WriteNode {
key: new_key,
content: content.clone(),
covers: covers.clone(),
},
..action
}
}
}
NamingResolution::MergeInto(existing_key) => {
eprintln!("naming: {} → MERGE_INTO {}", key, existing_key);
Action {
kind: ActionKind::Refine {
key: existing_key,
content: content.clone(),
},
..action
}
}
}
}
_ => action,
}
}).collect()
}
/// Run a single agent and apply its actions (no depth tracking).
///
/// Returns (total_actions, applied_count) or an error.
pub fn run_and_apply(
store: &mut Store,
agent_name: &str,
batch_size: usize,
llm_tag: &str,
) -> Result<(usize, usize), String> {
let result = run_one_agent(store, agent_name, batch_size, llm_tag)?;
let actions = resolve_action_names(store, result.actions);
let ts = store::compact_timestamp();
let mut applied = 0;
for action in &actions {
if apply_action(store, action, agent_name, &ts, 0) {
applied += 1;
}
}
Ok((actions.len(), applied))
}
/// Run a single agent: build prompt → call LLM → store output → parse actions → record visits.
///
/// This is the common pipeline shared by the knowledge loop, consolidation pipeline,
/// and daemon. Callers handle action application (with or without depth tracking).
pub fn run_one_agent(
store: &mut Store,
agent_name: &str,
batch_size: usize,
llm_tag: &str,
) -> Result<AgentResult, String> {
let def = super::defs::get_def(agent_name)
.ok_or_else(|| format!("no .agent file for {}", agent_name))?;
let agent_batch = super::defs::run_agent(store, &def, batch_size)?;
let output = llm::call_model(llm_tag, &def.model, &agent_batch.prompt)?;
// Store raw output for audit trail
let ts = store::compact_timestamp();
let report_key = format!("_{}-{}-{}", llm_tag, agent_name, ts);
let provenance = agent_provenance(agent_name);
store.upsert_provenance(&report_key, &output, &provenance).ok();
let actions = parse_all_actions(&output);
let no_ops = count_no_ops(&output);
// Record visits for processed nodes
if !agent_batch.node_keys.is_empty() {
store.record_agent_visits(&agent_batch.node_keys, agent_name).ok();
}
Ok(AgentResult {
output,
actions,
no_ops,
node_keys: agent_batch.node_keys,
})
}
// ---------------------------------------------------------------------------
// Conversation fragment selection
// ---------------------------------------------------------------------------
/// Extract human-readable dialogue from a conversation JSONL
fn extract_conversation_text(path: &Path, max_chars: usize) -> String {
let cfg = crate::config::get();
let messages = super::transcript::parse_transcript(path).unwrap_or_default();
let mut fragments = Vec::new();
let mut total = 0;
for msg in &messages {
let min_len = if msg.role == "user" { 5 } else { 10 };
if msg.text.len() <= min_len { continue; }
// Only include external user messages
if msg.role == "user" {
if msg.user_type.as_deref() != Some("external") { continue; }
if msg.text.starts_with("[Request interrupted") { continue; }
}
let role = if msg.role == "user" { &cfg.user_name } else { &cfg.assistant_name };
fragments.push(format!("**{}:** {}", role, msg.text));
total += msg.text.len();
if total > max_chars { break; }
}
fragments.join("\n\n")
}
/// Count short user messages (dialogue turns) in a JSONL
fn count_dialogue_turns(path: &Path) -> usize {
let messages = super::transcript::parse_transcript(path).unwrap_or_default();
messages.iter()
.filter(|m| m.role == "user"
&& m.user_type.as_deref() == Some("external")
&& m.text.len() > 5
&& m.text.len() < 500
&& !m.text.starts_with("[Request interrupted")
&& !m.text.starts_with("Implement the following"))
.count()
}
/// Select conversation fragments for the observation extractor
pub fn select_conversation_fragments(n: usize) -> Vec<(String, String)> {
let projects = crate::config::get().projects_dir.clone();
if !projects.exists() { return Vec::new(); }
let mut jsonl_files: Vec<PathBuf> = Vec::new();
if let Ok(dirs) = fs::read_dir(&projects) {
for dir in dirs.filter_map(|e| e.ok()) {
if !dir.path().is_dir() { continue; }
if let Ok(files) = fs::read_dir(dir.path()) {
for f in files.filter_map(|e| e.ok()) {
let p = f.path();
if p.extension().map(|x| x == "jsonl").unwrap_or(false) {
if let Ok(meta) = p.metadata() {
if meta.len() > 50_000 {
jsonl_files.push(p);
}
}
}
}
}
}
}
let mut scored: Vec<(usize, PathBuf)> = jsonl_files.into_iter()
.map(|f| (count_dialogue_turns(&f), f))
.filter(|(turns, _)| *turns >= 10)
.collect();
scored.sort_by(|a, b| b.0.cmp(&a.0));
let mut fragments = Vec::new();
for (_, f) in scored.iter().take(n * 2) {
let session_id = f.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".into());
let text = extract_conversation_text(f, 8000);
if text.len() > 500 {
fragments.push((session_id, text));
}
if fragments.len() >= n { break; }
}
fragments
}
// ---------------------------------------------------------------------------
// Convergence metrics
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CycleResult {
pub cycle: usize,
pub timestamp: String,
pub total_actions: usize,
pub total_applied: usize,
pub total_no_ops: usize,
pub depth_rejected: usize,
pub weighted_delta: f64,
pub graph_metrics_before: GraphMetrics,
pub graph_metrics_after: GraphMetrics,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GraphMetrics {
pub nodes: usize,
pub edges: usize,
pub cc: f64,
pub sigma: f64,
pub communities: usize,
}
impl GraphMetrics {
pub fn from_graph(store: &Store, graph: &Graph) -> Self {
Self {
nodes: store.nodes.len(),
edges: graph.edge_count(),
cc: graph.avg_clustering_coefficient() as f64,
sigma: graph.small_world_sigma() as f64,
communities: graph.community_count(),
}
}
}
fn metric_stability(history: &[CycleResult], key: &str, window: usize) -> f64 {
if history.len() < window { return f64::INFINITY; }
let values: Vec<f64> = history[history.len() - window..].iter()
.map(|h| match key {
"sigma" => h.graph_metrics_after.sigma,
"cc" => h.graph_metrics_after.cc,
"communities" => h.graph_metrics_after.communities as f64,
_ => 0.0,
})
.collect();
if values.len() < 2 { return f64::INFINITY; }
let mean = values.iter().sum::<f64>() / values.len() as f64;
if mean == 0.0 { return 0.0; }
let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / values.len() as f64;
variance.sqrt() / mean.abs()
}
pub fn check_convergence(history: &[CycleResult], window: usize) -> bool {
if history.len() < window { return false; }
let sigma_cv = metric_stability(history, "sigma", window);
let cc_cv = metric_stability(history, "cc", window);
let comm_cv = metric_stability(history, "communities", window);
let recent = &history[history.len() - window..];
let avg_delta = recent.iter().map(|r| r.weighted_delta).sum::<f64>() / recent.len() as f64;
eprintln!("\n Convergence check (last {} cycles):", window);
eprintln!(" sigma CV: {:.4} (< 0.05?)", sigma_cv);
eprintln!(" CC CV: {:.4} (< 0.05?)", cc_cv);
eprintln!(" community CV: {:.4} (< 0.10?)", comm_cv);
eprintln!(" avg delta: {:.2} (< 1.00?)", avg_delta);
let structural = sigma_cv < 0.05 && cc_cv < 0.05 && comm_cv < 0.10;
let behavioral = avg_delta < 1.0;
if structural && behavioral {
eprintln!(" → CONVERGED");
true
} else {
false
}
}
// ---------------------------------------------------------------------------
// The knowledge loop
// ---------------------------------------------------------------------------
pub struct KnowledgeLoopConfig {
pub max_cycles: usize,
pub batch_size: usize,
pub window: usize,
pub max_depth: i32,
pub confidence_base: f64,
}
impl Default for KnowledgeLoopConfig {
fn default() -> Self {
Self {
max_cycles: 20,
batch_size: 5,
window: 5,
max_depth: 4,
confidence_base: 0.3,
}
}
}
pub fn run_knowledge_loop(config: &KnowledgeLoopConfig) -> Result<Vec<CycleResult>, String> {
let mut store = Store::load()?;
let mut depth_db = DepthDb::load(&store);
let mut history = Vec::new();
eprintln!("Knowledge Loop — fixed-point iteration");
eprintln!(" max_cycles={} batch_size={}", config.max_cycles, config.batch_size);
eprintln!(" window={} max_depth={}", config.window, config.max_depth);
for cycle in 1..=config.max_cycles {
let result = run_cycle(cycle, config, &mut depth_db)?;
history.push(result);
if check_convergence(&history, config.window) {
eprintln!("\n CONVERGED after {} cycles", cycle);
break;
}
}
// Save loop summary as a store node
if let Some(first) = history.first() {
let key = format!("_knowledge-loop-{}", first.timestamp);
if let Ok(json) = serde_json::to_string_pretty(&history) {
store = Store::load()?;
store.upsert_provenance(&key, &json,
"observation:write").ok();
depth_db.save(&mut store);
store.save()?;
}
}
Ok(history)
}
fn run_cycle(
cycle_num: usize,
config: &KnowledgeLoopConfig,
depth_db: &mut DepthDb,
) -> Result<CycleResult, String> {
let timestamp = store::compact_timestamp();
eprintln!("\n{}", "=".repeat(60));
eprintln!("CYCLE {}{}", cycle_num, timestamp);
eprintln!("{}", "=".repeat(60));
let mut store = Store::load()?;
let graph = store.build_graph();
let metrics_before = GraphMetrics::from_graph(&store, &graph);
eprintln!(" Before: nodes={} edges={} cc={:.3} sigma={:.3}",
metrics_before.nodes, metrics_before.edges, metrics_before.cc, metrics_before.sigma);
let mut all_actions = Vec::new();
let mut all_no_ops = 0;
let mut depth_rejected = 0;
let mut total_applied = 0;
// Run each agent via .agent file dispatch
let agent_names = ["observation", "extractor", "generalize", "connector", "challenger"];
for agent_name in &agent_names {
eprintln!("\n --- {} (n={}) ---", agent_name, config.batch_size);
let result = match run_one_agent(&mut store, agent_name, config.batch_size, "knowledge") {
Ok(r) => r,
Err(e) => {
eprintln!(" ERROR: {}", e);
continue;
}
};
let mut actions = result.actions;
all_no_ops += result.no_ops;
eprintln!(" Actions: {} No-ops: {}", actions.len(), result.no_ops);
let mut applied = 0;
for action in &mut actions {
let depth = compute_action_depth(depth_db, action, agent_name);
action.depth = depth;
match &action.kind {
ActionKind::WriteNode { key, covers, .. } => {
let conf_val = action.confidence.gate_value();
let req = required_confidence(depth, config.confidence_base);
let source_uses: Vec<u32> = covers.iter()
.filter_map(|k| store.nodes.get(k).map(|n| n.uses))
.collect();
let avg_uses = if source_uses.is_empty() { 0 }
else { source_uses.iter().sum::<u32>() / source_uses.len() as u32 };
let eff_conf = (conf_val + use_bonus(avg_uses)).min(1.0);
if eff_conf < req {
action.applied = Some(false);
action.rejected_reason = Some("depth_threshold".into());
depth_rejected += 1;
continue;
}
if depth > config.max_depth {
action.applied = Some(false);
action.rejected_reason = Some("max_depth".into());
depth_rejected += 1;
continue;
}
eprintln!(" WRITE {} depth={} conf={:.2} eff={:.2} req={:.2}",
key, depth, conf_val, eff_conf, req);
}
ActionKind::Link { source, target } => {
eprintln!(" LINK {}{}", source, target);
}
ActionKind::Refine { key, .. } => {
eprintln!(" REFINE {} depth={}", key, depth);
}
ActionKind::Demote { key } => {
eprintln!(" DEMOTE {}", key);
}
}
if apply_action(&mut store, action, agent_name, &timestamp, depth) {
applied += 1;
action.applied = Some(true);
if let ActionKind::WriteNode { key, .. } | ActionKind::Refine { key, .. } = &action.kind {
depth_db.set(key.clone(), depth);
}
} else {
action.applied = Some(false);
}
}
eprintln!(" Applied: {}/{}", applied, actions.len());
total_applied += applied;
all_actions.extend(actions);
}
depth_db.save(&mut store);
// Recompute spectral if anything changed
if total_applied > 0 {
eprintln!("\n Recomputing spectral embedding...");
let graph = store.build_graph();
let result = spectral::decompose(&graph, 8);
let emb = spectral::to_embedding(&result);
spectral::save_embedding(&emb).ok();
}
let graph = store.build_graph();
let metrics_after = GraphMetrics::from_graph(&store, &graph);
let weighted_delta: f64 = all_actions.iter()
.filter(|a| a.applied == Some(true))
.map(|a| a.weight)
.sum();
eprintln!("\n CYCLE {} SUMMARY", cycle_num);
eprintln!(" Applied: {}/{} depth-rejected: {} no-ops: {}",
total_applied, all_actions.len(), depth_rejected, all_no_ops);
eprintln!(" Weighted delta: {:.2}", weighted_delta);
Ok(CycleResult {
cycle: cycle_num,
timestamp,
total_actions: all_actions.len(),
total_applied,
total_no_ops: all_no_ops,
depth_rejected,
weighted_delta,
graph_metrics_before: metrics_before,
graph_metrics_after: metrics_after,
})
}

View file

@ -0,0 +1,190 @@
// LLM utilities: model invocation and response parsing
//
// Calls claude CLI as a subprocess. Uses prctl(PR_SET_PDEATHSIG)
// so child processes die when the daemon exits, preventing orphans.
use crate::store::Store;
use regex::Regex;
use std::fs;
use std::os::unix::process::CommandExt;
use std::process::Command;
fn log_usage(agent: &str, model: &str, prompt: &str, response: &str,
duration_ms: u128, ok: bool) {
let dir = crate::config::get().data_dir.join("llm-logs").join(agent);
let _ = fs::create_dir_all(&dir);
let date = chrono::Local::now().format("%Y-%m-%d");
let path = dir.join(format!("{}.md", date));
let ts = chrono::Local::now().format("%H:%M:%S");
let status = if ok { "ok" } else { "ERROR" };
let entry = format!(
"\n## {} — {} ({}, {:.1}s, {})\n\n\
### Prompt ({} chars)\n\n\
```\n{}\n```\n\n\
### Response ({} chars)\n\n\
```\n{}\n```\n\n---\n",
ts, agent, model, duration_ms as f64 / 1000.0, status,
prompt.len(), prompt,
response.len(), response,
);
use std::io::Write;
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(&path) {
let _ = f.write_all(entry.as_bytes());
}
}
/// Maximum time to wait for a claude subprocess before killing it.
const SUBPROCESS_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300); // 5 minutes
/// Call a model via claude CLI. Returns the response text.
///
/// Sets PR_SET_PDEATHSIG on the child so it gets SIGTERM if the
/// parent daemon exits — no more orphaned claude processes.
/// Times out after 5 minutes to prevent blocking the daemon forever.
pub(crate) fn call_model(agent: &str, model: &str, prompt: &str) -> Result<String, String> {
// Write prompt to temp file (claude CLI needs file input for large prompts)
let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt",
std::process::id(), std::thread::current().id()));
fs::write(&tmp, prompt)
.map_err(|e| format!("write temp prompt: {}", e))?;
let mut cmd = Command::new("claude");
cmd.args(["-p", "--model", model, "--tools", "", "--no-session-persistence",
"--strict-mcp-config"])
.stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.env_remove("CLAUDECODE");
// Use separate OAuth credentials for agent work if configured
if let Some(ref dir) = crate::config::get().agent_config_dir {
cmd.env("CLAUDE_CONFIG_DIR", dir);
}
// Tell hooks this is a daemon agent call, not interactive
cmd.env("POC_AGENT", "1");
let start = std::time::Instant::now();
let mut child = unsafe {
cmd.pre_exec(|| {
libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM);
Ok(())
})
.spawn()
.map_err(|e| format!("spawn claude: {}", e))?
};
// Spawn a watchdog thread that kills the child after the timeout.
// Uses a cancellation flag so the thread exits promptly when the child finishes.
let child_id = child.id();
let cancel = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
let cancel_flag = cancel.clone();
let watchdog = std::thread::spawn(move || {
// Sleep in 1s increments so we can check the cancel flag
let deadline = std::time::Instant::now() + SUBPROCESS_TIMEOUT;
while std::time::Instant::now() < deadline {
if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
return;
}
std::thread::sleep(std::time::Duration::from_secs(1));
}
if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
return;
}
// Send SIGTERM, then SIGKILL after 5s grace period
unsafe { libc::kill(child_id as i32, libc::SIGTERM); }
for _ in 0..5 {
std::thread::sleep(std::time::Duration::from_secs(1));
if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
return;
}
}
unsafe { libc::kill(child_id as i32, libc::SIGKILL); }
});
let result = child.wait_with_output();
// Cancel the watchdog thread
cancel.store(true, std::sync::atomic::Ordering::Relaxed);
watchdog.join().ok();
fs::remove_file(&tmp).ok();
match result {
Ok(output) => {
let elapsed = start.elapsed().as_millis();
if elapsed > SUBPROCESS_TIMEOUT.as_millis() - 1000 {
log_usage(agent, model, prompt, "TIMEOUT", elapsed, false);
return Err(format!("claude timed out after {:.0}s", elapsed as f64 / 1000.0));
}
if output.status.success() {
let response = String::from_utf8_lossy(&output.stdout).trim().to_string();
log_usage(agent, model, prompt, &response, elapsed, true);
Ok(response)
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
let preview = crate::util::first_n_chars(&stderr, 500);
log_usage(agent, model, prompt, &preview, elapsed, false);
Err(format!("claude exited {}: {}", output.status, preview.trim()))
}
}
Err(e) => Err(format!("wait claude: {}", e)),
}
}
/// Call Sonnet via claude CLI.
pub(crate) fn call_sonnet(agent: &str, prompt: &str) -> Result<String, String> {
call_model(agent, "sonnet", prompt)
}
/// Call Haiku via claude CLI (cheaper, faster — good for high-volume extraction).
pub(crate) fn call_haiku(agent: &str, prompt: &str) -> Result<String, String> {
call_model(agent, "haiku", prompt)
}
/// Parse a JSON response, handling markdown fences.
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
let cleaned = response.trim();
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);
let cleaned = cleaned.strip_prefix("```").unwrap_or(cleaned);
let cleaned = cleaned.strip_suffix("```").unwrap_or(cleaned);
let cleaned = cleaned.trim();
if let Ok(v) = serde_json::from_str(cleaned) {
return Ok(v);
}
// Try to find JSON object or array
let re_obj = Regex::new(r"\{[\s\S]*\}").unwrap();
let re_arr = Regex::new(r"\[[\s\S]*\]").unwrap();
if let Some(m) = re_obj.find(cleaned) {
if let Ok(v) = serde_json::from_str(m.as_str()) {
return Ok(v);
}
}
if let Some(m) = re_arr.find(cleaned) {
if let Ok(v) = serde_json::from_str(m.as_str()) {
return Ok(v);
}
}
let preview = crate::util::first_n_chars(cleaned, 200);
Err(format!("no valid JSON in response: {preview}..."))
}
/// Get all keys for prompt context.
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
let mut keys: Vec<String> = store.nodes.keys()
.cloned()
.collect();
keys.sort();
keys.truncate(200);
keys
}

View file

@ -0,0 +1,28 @@
// Agent layer: LLM-powered operations on the memory graph
//
// Everything here calls external models (Sonnet, Haiku) or orchestrates
// sequences of such calls. The core graph infrastructure (store, graph,
// spectral, search, similarity) lives at the crate root.
//
// llm — model invocation, response parsing
// prompts — prompt generation from store data
// audit — link quality review via Sonnet
// consolidate — full consolidation pipeline
// knowledge — knowledge production agents + convergence loop
// enrich — journal enrichment, experience mining
// fact_mine — fact extraction from transcripts
// digest — episodic digest generation (daily/weekly/monthly)
// daemon — background job scheduler
// transcript — shared JSONL transcript parsing
pub mod transcript;
pub mod llm;
pub mod prompts;
pub mod defs;
pub mod audit;
pub mod consolidate;
pub mod knowledge;
pub mod enrich;
pub mod fact_mine;
pub mod digest;
pub mod daemon;

View file

@ -0,0 +1,464 @@
// Agent prompt generation and formatting. Presentation logic —
// builds text prompts from store data for consolidation agents.
use crate::store::Store;
use crate::graph::Graph;
use crate::similarity;
use crate::neuro::{
ReplayItem,
replay_queue, detect_interference,
};
/// Result of building an agent prompt — includes both the prompt text
/// and the keys of nodes selected for processing, so the caller can
/// record visits after successful completion.
pub struct AgentBatch {
pub prompt: String,
pub node_keys: Vec<String>,
}
/// Load a prompt template, replacing {{PLACEHOLDER}} with data
pub fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
let path = crate::config::get().prompts_dir.join(format!("{}.md", name));
let mut content = std::fs::read_to_string(&path)
.map_err(|e| format!("load prompt {}: {}", path.display(), e))?;
for (placeholder, data) in replacements {
content = content.replace(placeholder, data);
}
Ok(content)
}
pub fn format_topology_header(graph: &Graph) -> String {
let sigma = graph.small_world_sigma();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let avg_cc = graph.avg_clustering_coefficient();
let n = graph.nodes().len();
let e = graph.edge_count();
// Identify saturated hubs — nodes with degree well above threshold
let threshold = graph.hub_threshold();
let mut hubs: Vec<_> = graph.nodes().iter()
.map(|k| (k.clone(), graph.degree(k)))
.filter(|(_, d)| *d >= threshold)
.collect();
hubs.sort_by(|a, b| b.1.cmp(&a.1));
hubs.truncate(15);
let hub_list = if hubs.is_empty() {
String::new()
} else {
let lines: Vec<String> = hubs.iter()
.map(|(k, d)| format!(" - {} (degree {})", k, d))
.collect();
format!(
"### SATURATED HUBS — DO NOT LINK TO THESE\n\
The following nodes are already over-connected. Adding more links\n\
to them makes the graph worse (star topology). Find lateral\n\
connections between peripheral nodes instead.\n\n{}\n\n\
Only link to a hub if it is genuinely the ONLY reasonable target.\n\n",
lines.join("\n"))
};
format!(
"## Current graph topology\n\
Nodes: {} Edges: {} Communities: {}\n\
Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\
Avg clustering coefficient: {:.4}\n\n\
{}\
Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list)
}
pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String {
let hub_thresh = graph.hub_threshold();
let mut out = String::new();
for item in items {
let node = match store.nodes.get(&item.key) {
Some(n) => n,
None => continue,
};
out.push_str(&format!("## {} \n", item.key));
out.push_str(&format!("Priority: {:.3} CC: {:.3} Emotion: {:.1} ",
item.priority, item.cc, item.emotion));
out.push_str(&format!("Interval: {}d\n",
node.spaced_repetition_interval));
if item.outlier_score > 0.0 {
out.push_str(&format!("Spectral: {} (outlier={:.1})\n",
item.classification, item.outlier_score));
}
if let Some(community) = node.community_id {
out.push_str(&format!("Community: {} ", community));
}
let deg = graph.degree(&item.key);
let cc = graph.clustering_coefficient(&item.key);
// Hub-link ratio: what fraction of this node's edges go to hubs?
let neighbors = graph.neighbors(&item.key);
let hub_links = neighbors.iter()
.filter(|(n, _)| graph.degree(n) >= hub_thresh)
.count();
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
let is_hub = deg >= hub_thresh;
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
deg, cc, hub_ratio * 100.0, hub_links, deg));
if is_hub {
out.push_str(" ← THIS IS A HUB");
} else if hub_ratio > 0.6 {
out.push_str(" ← mostly hub-connected, needs lateral links");
}
out.push('\n');
let hits = crate::counters::search_hit_count(&item.key);
if hits > 0 {
out.push_str(&format!("Search hits: {} ← actively found by search, prefer to keep\n", hits));
}
// Content (truncated for large nodes)
let content = &node.content;
if content.len() > 1500 {
let truncated = crate::util::truncate(content, 1500, "\n[...]");
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n\n",
content.len(), truncated));
} else {
out.push_str(&format!("\nContent:\n{}\n\n", content));
}
// Neighbors
let neighbors = graph.neighbors(&item.key);
if !neighbors.is_empty() {
out.push_str("Neighbors:\n");
for (n, strength) in neighbors.iter().take(15) {
let n_cc = graph.clustering_coefficient(n);
let n_community = store.nodes.get(n.as_str())
.and_then(|n| n.community_id);
out.push_str(&format!(" - {} (str={:.2}, cc={:.3}",
n, strength, n_cc));
if let Some(c) = n_community {
out.push_str(&format!(", c{}", c));
}
out.push_str(")\n");
}
}
// Suggested link targets: text-similar semantic nodes not already neighbors
let neighbor_keys: std::collections::HashSet<&str> = neighbors.iter()
.map(|(k, _)| k.as_str()).collect();
let mut candidates: Vec<(&str, f32)> = store.nodes.iter()
.filter(|(k, _)| {
*k != &item.key
&& !neighbor_keys.contains(k.as_str())
})
.map(|(k, n)| {
let sim = similarity::cosine_similarity(content, &n.content);
(k.as_str(), sim)
})
.filter(|(_, sim)| *sim > 0.1)
.collect();
candidates.sort_by(|a, b| b.1.total_cmp(&a.1));
candidates.truncate(8);
if !candidates.is_empty() {
out.push_str("\nSuggested link targets (by text similarity, not yet linked):\n");
for (k, sim) in &candidates {
let is_hub = graph.degree(k) >= hub_thresh;
out.push_str(&format!(" - {} (sim={:.3}{})\n",
k, sim, if is_hub { ", HUB" } else { "" }));
}
}
out.push_str("\n---\n\n");
}
out
}
pub fn format_health_section(store: &Store, graph: &Graph) -> String {
use crate::graph;
let health = graph::health_report(graph, store);
let mut out = health;
out.push_str("\n\n## Weight distribution\n");
// Weight histogram
let mut buckets = [0u32; 10]; // 0.0-0.1, 0.1-0.2, ..., 0.9-1.0
for node in store.nodes.values() {
let bucket = ((node.weight * 10.0) as usize).min(9);
buckets[bucket] += 1;
}
for (i, &count) in buckets.iter().enumerate() {
let lo = i as f32 / 10.0;
let hi = (i + 1) as f32 / 10.0;
let bar = "".repeat((count as usize) / 10);
out.push_str(&format!(" {:.1}-{:.1}: {:4} {}\n", lo, hi, count, bar));
}
// Near-prune nodes
let near_prune: Vec<_> = store.nodes.iter()
.filter(|(_, n)| n.weight < 0.15)
.map(|(k, n)| (k.clone(), n.weight))
.collect();
if !near_prune.is_empty() {
out.push_str(&format!("\n## Near-prune nodes ({} total)\n", near_prune.len()));
for (k, w) in near_prune.iter().take(20) {
out.push_str(&format!(" [{:.3}] {}\n", w, k));
}
}
// Community sizes
let communities = graph.communities();
let mut comm_sizes: std::collections::HashMap<u32, Vec<String>> = std::collections::HashMap::new();
for (key, &label) in communities {
comm_sizes.entry(label).or_default().push(key.clone());
}
let mut sizes: Vec<_> = comm_sizes.iter()
.map(|(id, members)| (*id, members.len(), members.clone()))
.collect();
sizes.sort_by(|a, b| b.1.cmp(&a.1));
out.push_str("\n## Largest communities\n");
for (id, size, members) in sizes.iter().take(10) {
out.push_str(&format!(" Community {} ({} nodes): ", id, size));
let sample: Vec<_> = members.iter().take(5).map(|s| s.as_str()).collect();
out.push_str(&sample.join(", "));
if *size > 5 { out.push_str(", ..."); }
out.push('\n');
}
out
}
pub fn format_pairs_section(
pairs: &[(String, String, f32)],
store: &Store,
graph: &Graph,
) -> String {
let mut out = String::new();
let communities = graph.communities();
for (a, b, sim) in pairs {
out.push_str(&format!("## Pair: similarity={:.3}\n", sim));
let ca = communities.get(a).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
let cb = communities.get(b).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
// Node A
out.push_str(&format!("\n### {} ({})\n", a, ca));
if let Some(node) = store.nodes.get(a) {
let content = crate::util::truncate(&node.content, 500, "...");
out.push_str(&format!("Weight: {:.2}\n{}\n",
node.weight, content));
}
// Node B
out.push_str(&format!("\n### {} ({})\n", b, cb));
if let Some(node) = store.nodes.get(b) {
let content = crate::util::truncate(&node.content, 500, "...");
out.push_str(&format!("Weight: {:.2}\n{}\n",
node.weight, content));
}
out.push_str("\n---\n\n");
}
out
}
pub fn format_rename_candidates(store: &Store, count: usize) -> (Vec<String>, String) {
let mut candidates: Vec<(&str, &crate::store::Node)> = store.nodes.iter()
.filter(|(key, _)| {
if key.starts_with("_facts-") { return true; }
if key.len() < 60 { return false; }
if key.starts_with("journal#j-") { return true; }
if key.starts_with("_mined-transcripts#f-") { return true; }
false
})
.map(|(k, n)| (k.as_str(), n))
.collect();
// Deprioritize nodes actively found by search — renaming them would
// break working queries. Sort by: search hits (ascending), then
// least-recently visited. Nodes with many hits sink to the bottom.
let hit_counts = crate::counters::all_search_hits();
let hit_map: std::collections::HashMap<&str, u64> = hit_counts.iter()
.map(|(k, v)| (k.as_str(), *v))
.collect();
candidates.sort_by_key(|(key, _)| {
let hits = hit_map.get(key).copied().unwrap_or(0);
(hits, store.last_visited(key, "rename"))
});
candidates.truncate(count);
let keys: Vec<String> = candidates.iter().map(|(k, _)| k.to_string()).collect();
let mut out = String::new();
out.push_str(&format!("## Nodes to rename ({} of {} candidates)\n\n",
candidates.len(),
store.nodes.keys().filter(|k| k.starts_with("_facts-") ||
(k.len() >= 60 &&
(k.starts_with("journal#j-") || k.starts_with("_mined-transcripts#f-")))).count()));
for (key, node) in &candidates {
out.push_str(&format!("### {}\n", key));
let created = if node.timestamp > 0 {
crate::store::format_datetime(node.timestamp)
} else {
"unknown".to_string()
};
out.push_str(&format!("Created: {}\n", created));
let hits = hit_map.get(key).copied().unwrap_or(0);
if hits > 0 {
out.push_str(&format!("Search hits: {} ← actively found by search, prefer to keep current name\n", hits));
}
let content = &node.content;
if content.len() > 800 {
let truncated = crate::util::truncate(content, 800, "\n[...]");
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n\n",
content.len(), truncated));
} else {
out.push_str(&format!("\nContent:\n{}\n\n", content));
}
out.push_str("---\n\n");
}
(keys, out)
}
/// Get split candidates sorted by size (largest first)
pub fn split_candidates(store: &Store) -> Vec<String> {
let mut candidates: Vec<(&str, usize)> = store.nodes.iter()
.filter(|(key, node)| {
!key.starts_with('_')
&& !node.deleted
&& matches!(node.node_type, crate::store::NodeType::Semantic)
})
.map(|(k, n)| (k.as_str(), n.content.len()))
.collect();
candidates.sort_by(|a, b| b.1.cmp(&a.1));
candidates.into_iter().map(|(k, _)| k.to_string()).collect()
}
/// Format a single node for split-plan prompt (phase 1)
pub fn format_split_plan_node(store: &Store, graph: &Graph, key: &str) -> String {
let communities = graph.communities();
let node = match store.nodes.get(key) {
Some(n) => n,
None => return format!("Node '{}' not found\n", key),
};
let mut out = String::new();
out.push_str(&format!("### {} ({} chars)\n", key, node.content.len()));
// Show neighbors grouped by community
let neighbors = graph.neighbors(key);
if !neighbors.is_empty() {
let mut by_community: std::collections::BTreeMap<String, Vec<(&str, f32)>> =
std::collections::BTreeMap::new();
for (nkey, strength) in &neighbors {
let comm = communities.get(nkey.as_str())
.map(|c| format!("c{}", c))
.unwrap_or_else(|| "unclustered".into());
by_community.entry(comm)
.or_default()
.push((nkey.as_str(), *strength));
}
out.push_str("\nNeighbors by community:\n");
for (comm, members) in &by_community {
out.push_str(&format!(" {} ({}):", comm, members.len()));
for (nkey, strength) in members.iter().take(5) {
out.push_str(&format!(" {}({:.2})", nkey, strength));
}
if members.len() > 5 {
out.push_str(&format!(" +{} more", members.len() - 5));
}
out.push('\n');
}
}
// Full content
out.push_str(&format!("\nContent:\n{}\n\n", node.content));
out.push_str("---\n\n");
out
}
/// Build split-plan prompt for a single node (phase 1).
/// Uses the split.agent template with placeholders resolved for the given key.
pub fn split_plan_prompt(store: &Store, key: &str) -> Result<String, String> {
let def = super::defs::get_def("split")
.ok_or_else(|| "no split.agent file".to_string())?;
let graph = store.build_graph();
// Override the query — we have a specific key to split
let keys = vec![key.to_string()];
let (prompt, _) = super::defs::resolve_placeholders(&def.prompt, store, &graph, &keys, 1);
Ok(prompt)
}
/// Build split-extract prompt for one child (phase 2)
pub fn split_extract_prompt(store: &Store, parent_key: &str, child_key: &str, child_desc: &str, child_sections: &str) -> Result<String, String> {
let parent_content = store.nodes.get(parent_key)
.map(|n| n.content.as_str())
.ok_or_else(|| format!("No node '{}'", parent_key))?;
load_prompt("split-extract", &[
("{{CHILD_KEY}}", child_key),
("{{CHILD_DESC}}", child_desc),
("{{CHILD_SECTIONS}}", child_sections),
("{{PARENT_CONTENT}}", parent_content),
])
}
/// Show consolidation batch status or generate an agent prompt.
pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
if auto {
let batch = agent_prompt(store, "replay", count)?;
println!("{}", batch.prompt);
return Ok(());
}
let graph = store.build_graph();
let items = replay_queue(store, count);
if items.is_empty() {
println!("No nodes to consolidate.");
return Ok(());
}
println!("Consolidation batch ({} nodes):\n", items.len());
for item in &items {
let node_type = store.nodes.get(&item.key)
.map(|n| if matches!(n.node_type, crate::store::NodeType::EpisodicSession) { "episodic" } else { "semantic" })
.unwrap_or("?");
println!(" [{:.3}] {} (cc={:.3}, interval={}d, type={})",
item.priority, item.key, item.cc, item.interval_days, node_type);
}
let pairs = detect_interference(store, &graph, 0.6);
if !pairs.is_empty() {
println!("\nInterfering pairs ({}):", pairs.len());
for (a, b, sim) in pairs.iter().take(5) {
println!(" [{:.3}] {}{}", sim, a, b);
}
}
println!("\nAgent prompts:");
println!(" --auto Generate replay agent prompt");
println!(" --agent replay Replay agent (schema assimilation)");
println!(" --agent linker Linker agent (relational binding)");
println!(" --agent separator Separator agent (pattern separation)");
println!(" --agent transfer Transfer agent (CLS episodic→semantic)");
println!(" --agent health Health agent (synaptic homeostasis)");
Ok(())
}
/// Generate a specific agent prompt with filled-in data.
pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<AgentBatch, String> {
let def = super::defs::get_def(agent)
.ok_or_else(|| format!("Unknown agent: {}", agent))?;
super::defs::run_agent(store, &def, count)
}

View file

@ -0,0 +1,94 @@
// Shared JSONL transcript parsing
//
// Three agents (enrich, fact_mine, knowledge) all parse Claude Code JSONL
// transcripts. This module provides the shared core: parse each line, extract
// message type, text content from string-or-array blocks, timestamp, and
// user type. Callers filter and transform as needed.
use std::fs;
use std::path::Path;
/// A single message extracted from a JSONL transcript.
pub struct TranscriptMessage {
/// 1-based line number in the JSONL file.
pub line: usize,
/// Raw role: "user" or "assistant".
pub role: String,
/// Extracted text content (trimmed, blocks joined with newlines).
pub text: String,
/// ISO timestamp from the message, or empty string.
pub timestamp: String,
/// For user messages: "external", "internal", etc. None for assistant.
pub user_type: Option<String>,
}
/// Parse a JSONL transcript into structured messages.
///
/// Extracts all user and assistant messages. Content blocks of type "text"
/// are joined; tool_use, tool_result, thinking blocks are skipped.
/// System-reminder blocks are filtered out.
pub fn parse_transcript(path: &Path) -> Result<Vec<TranscriptMessage>, String> {
let content = fs::read_to_string(path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
let mut messages = Vec::new();
for (i, line) in content.lines().enumerate() {
let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if msg_type != "user" && msg_type != "assistant" { continue; }
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let user_type = obj.get("userType")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let Some(text) = extract_text_content(&obj) else { continue };
let text = text.trim().to_string();
if text.is_empty() { continue; }
messages.push(TranscriptMessage {
line: i + 1,
role: msg_type.to_string(),
text,
timestamp,
user_type,
});
}
Ok(messages)
}
/// Extract text content from a JSONL message object.
///
/// Handles both string content and array-of-blocks content (filtering to
/// type="text" blocks only). Strips `<system-reminder>` tags.
fn extract_text_content(obj: &serde_json::Value) -> Option<String> {
let msg = obj.get("message").unwrap_or(obj);
let content = msg.get("content")?;
let text = match content {
serde_json::Value::String(s) => s.clone(),
serde_json::Value::Array(arr) => {
let texts: Vec<&str> = arr.iter()
.filter_map(|block| {
let block_type = block.get("type").and_then(|v| v.as_str())?;
if block_type != "text" { return None; }
let t = block.get("text").and_then(|v| v.as_str())?;
// Skip system-reminder blocks entirely
if t.contains("<system-reminder>") { return None; }
Some(t)
})
.collect();
if texts.is_empty() { return None; }
texts.join("\n")
}
_ => return None,
};
Some(text)
}

View file

@ -0,0 +1,640 @@
// memory-search: combined hook for session context loading + ambient memory retrieval
//
// Modes:
// --hook Run as Claude Code UserPromptSubmit hook (reads stdin, injects into conversation)
// --debug Replay last stashed input, dump every stage to stdout
// --seen Show the seen set for current session
// (default) No-op (future: manual search modes)
use clap::Parser;
use poc_memory::search::{self, AlgoStage};
use poc_memory::store;
use std::collections::{BTreeMap, HashSet};
use std::fs;
use std::io::{self, Read, Write};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{Duration, SystemTime};
#[derive(Parser)]
#[command(name = "memory-search")]
struct Args {
/// Run as Claude Code hook (reads stdin, outputs for injection)
#[arg(long)]
hook: bool,
/// Debug mode: replay last stashed input, dump every stage
#[arg(short, long)]
debug: bool,
/// Show the seen set and returned memories for this session
#[arg(long)]
seen: bool,
/// Show full seen set (list all keys)
#[arg(long)]
seen_full: bool,
/// Max results to return
#[arg(long, default_value = "5")]
max_results: usize,
/// Algorithm pipeline stages: e.g. spread spectral,k=20 spread,max_hops=4
/// Default: spread.
pipeline: Vec<String>,
}
const STASH_PATH: &str = "/tmp/claude-memory-search/last-input.json";
/// Max bytes per context chunk (hook output limit is ~10K chars)
const CHUNK_SIZE: usize = 9000;
fn main() {
// Daemon agent calls set POC_AGENT=1 — skip memory search.
if std::env::var("POC_AGENT").is_ok() {
return;
}
let args = Args::parse();
if args.seen || args.seen_full {
show_seen();
return;
}
let input = if args.hook {
// Hook mode: read from stdin, stash for later debug runs
let mut buf = String::new();
io::stdin().read_to_string(&mut buf).unwrap_or_default();
fs::create_dir_all("/tmp/claude-memory-search").ok();
fs::write(STASH_PATH, &buf).ok();
buf
} else {
// All other modes: replay stashed input
fs::read_to_string(STASH_PATH).unwrap_or_else(|_| {
eprintln!("No stashed input at {}", STASH_PATH);
std::process::exit(1);
})
};
let debug = args.debug || !args.hook;
let json: serde_json::Value = match serde_json::from_str(&input) {
Ok(v) => v,
Err(_) => return,
};
let prompt = json["prompt"].as_str().unwrap_or("");
let session_id = json["session_id"].as_str().unwrap_or("");
if session_id.is_empty() {
return;
}
let state_dir = PathBuf::from("/tmp/claude-memory-search");
fs::create_dir_all(&state_dir).ok();
// Detect post-compaction reload via mmap backward scan
let transcript_path = json["transcript_path"].as_str().unwrap_or("");
let is_compaction = poc_memory::transcript::detect_new_compaction(
&state_dir, session_id, transcript_path,
);
// First prompt or post-compaction: load full context
let cookie_path = state_dir.join(format!("cookie-{}", session_id));
let is_first = !cookie_path.exists();
if is_first || is_compaction {
// Reset seen set to keys that load-context will inject
let seen_path = state_dir.join(format!("seen-{}", session_id));
fs::remove_file(&seen_path).ok();
}
if debug {
println!("[memory-search] session={} is_first={} is_compaction={}", session_id, is_first, is_compaction);
}
if is_first || is_compaction {
// Create/touch the cookie
let cookie = if is_first {
let c = generate_cookie();
fs::write(&cookie_path, &c).ok();
c
} else {
fs::read_to_string(&cookie_path).unwrap_or_default().trim().to_string()
};
if debug { println!("[memory-search] loading full context"); }
// Load full memory context, chunk it, print first chunk, save rest
if let Ok(output) = Command::new("poc-memory").args(["admin", "load-context"]).output() {
if output.status.success() {
let ctx = String::from_utf8_lossy(&output.stdout).to_string();
if !ctx.trim().is_empty() {
// Extract keys from all chunks for seen set
for line in ctx.lines() {
if line.starts_with("--- ") && line.ends_with(" ---") {
let inner = &line[4..line.len() - 4];
if let Some(paren) = inner.rfind(" (") {
let key = inner[..paren].trim();
mark_seen(&state_dir, session_id, key);
}
}
}
let chunks = chunk_context(&ctx, CHUNK_SIZE);
if debug {
println!("[memory-search] context: {} bytes, {} chunks",
ctx.len(), chunks.len());
}
// Print first chunk
if let Some(first) = chunks.first() {
if args.hook {
print!("{}", first);
}
}
// Save remaining chunks for drip-feeding
save_pending_chunks(&state_dir, session_id, &chunks[1..]);
}
}
}
let _ = cookie;
} else {
// Not first call: drip-feed next pending chunk
if let Some(chunk) = pop_pending_chunk(&state_dir, session_id) {
if debug {
println!("[memory-search] drip-feeding pending chunk: {} bytes", chunk.len());
}
if args.hook {
print!("{}", chunk);
}
}
}
// Search requires a prompt (PostToolUse events don't have one)
if prompt.is_empty() {
return;
}
// Skip system/AFK prompts
for prefix in &["is AFK", "You're on your own", "IRC mention"] {
if prompt.starts_with(prefix) {
return;
}
}
let store = match store::Store::load() {
Ok(s) => s,
Err(_) => return,
};
// Search for node keys in last ~150k tokens of transcript
if debug { println!("[memory-search] transcript: {}", transcript_path); }
let mut terms = extract_weighted_terms(transcript_path, 150_000, &store);
// Also extract terms from the prompt itself (handles fresh sessions
// and queries about topics not yet mentioned in the transcript)
let prompt_terms = search::extract_query_terms(prompt, 8);
if !prompt_terms.is_empty() {
if debug { println!("[memory-search] prompt terms: {}", prompt_terms); }
for word in prompt_terms.split_whitespace() {
let lower = word.to_lowercase();
// Prompt terms get weight 1.0 (same as direct mention)
terms.entry(lower).or_insert(1.0);
}
}
if debug {
println!("[memory-search] {} terms total", terms.len());
let mut by_weight: Vec<_> = terms.iter().collect();
by_weight.sort_by(|a, b| b.1.total_cmp(a.1));
for (term, weight) in by_weight.iter().take(20) {
println!(" {:.3} {}", weight, term);
}
}
if terms.is_empty() {
if debug { println!("[memory-search] no terms found, done"); }
return;
}
// Parse algorithm pipeline
let pipeline: Vec<AlgoStage> = if args.pipeline.is_empty() {
// Default: just spreading activation
vec![AlgoStage::parse("spread").unwrap()]
} else {
let mut stages = Vec::new();
for arg in &args.pipeline {
match AlgoStage::parse(arg) {
Ok(s) => stages.push(s),
Err(e) => {
eprintln!("error: {}", e);
std::process::exit(1);
}
}
}
stages
};
if debug {
let names: Vec<String> = pipeline.iter().map(|s| format!("{}", s.algo)).collect();
println!("[memory-search] pipeline: {}", names.join(""));
}
// Extract seeds from terms
let graph = poc_memory::graph::build_graph_fast(&store);
let (seeds, direct_hits) = search::match_seeds(&terms, &store);
if seeds.is_empty() {
if debug { println!("[memory-search] no seeds matched, done"); }
return;
}
if debug {
println!("[memory-search] {} seeds", seeds.len());
let mut sorted = seeds.clone();
sorted.sort_by(|a, b| b.1.total_cmp(&a.1));
for (key, score) in sorted.iter().take(20) {
println!(" {:.4} {}", score, key);
}
}
let max_results = if debug { args.max_results.max(25) } else { args.max_results };
let raw_results = search::run_pipeline(&pipeline, seeds, &graph, &store, debug, max_results);
let results: Vec<search::SearchResult> = raw_results.into_iter()
.map(|(key, activation)| {
let is_direct = direct_hits.contains(&key);
search::SearchResult { key, activation, is_direct, snippet: None }
}).collect();
if debug {
println!("[memory-search] {} search results", results.len());
for r in results.iter().take(10) {
let marker = if r.is_direct { "" } else { " " };
println!(" {} [{:.4}] {}", marker, r.activation, r.key);
}
}
if results.is_empty() {
if debug { println!("[memory-search] no results, done"); }
return;
}
let seen = load_seen(&state_dir, session_id);
if debug { println!("[memory-search] {} keys in seen set", seen.len()); }
// Format results like poc-memory search output
let search_output = search::format_results(&results);
let cookie = fs::read_to_string(&cookie_path).unwrap_or_default().trim().to_string();
let mut result_output = String::new();
let mut count = 0;
let max_entries = 5;
for line in search_output.lines() {
if count >= max_entries { break; }
let trimmed = line.trim();
if trimmed.is_empty() { continue; }
if let Some(key) = extract_key_from_line(trimmed) {
if seen.contains(&key) { continue; }
mark_seen(&state_dir, session_id, &key);
mark_returned(&state_dir, session_id, &key);
result_output.push_str(line);
result_output.push('\n');
count += 1;
} else if count > 0 {
result_output.push_str(line);
result_output.push('\n');
}
}
if count == 0 {
if debug { println!("[memory-search] all results already seen"); }
return;
}
if args.hook {
println!("Recalled memories [{}]:", cookie);
}
print!("{}", result_output);
// Record search hits with daemon (fire-and-forget)
let hit_keys: Vec<&str> = results.iter().map(|r| r.key.as_str()).collect();
if debug { println!("[memory-search] recording {} search hits", hit_keys.len()); }
match poc_memory::agents::daemon::rpc_record_hits(&hit_keys) {
Ok(()) => { if debug { println!("[memory-search] hits recorded"); } }
Err(e) => { if debug { println!("[memory-search] hit recording failed: {}", e); } }
}
// Clean up stale state files (opportunistic)
cleanup_stale_files(&state_dir, Duration::from_secs(86400));
}
/// Split context output into chunks of approximately `max_bytes`, breaking
/// at section boundaries ("--- KEY (group) ---" lines).
fn chunk_context(ctx: &str, max_bytes: usize) -> Vec<String> {
// Split into sections at group boundaries, then merge small adjacent
// sections into chunks up to max_bytes.
let mut sections: Vec<String> = Vec::new();
let mut current = String::new();
for line in ctx.lines() {
// Group headers start new sections
if line.starts_with("--- ") && line.ends_with(" ---") && !current.is_empty() {
sections.push(std::mem::take(&mut current));
}
if !current.is_empty() {
current.push('\n');
}
current.push_str(line);
}
if !current.is_empty() {
sections.push(current);
}
// Merge small sections into chunks, respecting max_bytes
let mut chunks: Vec<String> = Vec::new();
let mut chunk = String::new();
for section in sections {
if !chunk.is_empty() && chunk.len() + section.len() + 1 > max_bytes {
chunks.push(std::mem::take(&mut chunk));
}
if !chunk.is_empty() {
chunk.push('\n');
}
chunk.push_str(&section);
}
if !chunk.is_empty() {
chunks.push(chunk);
}
chunks
}
/// Save remaining chunks to disk for drip-feeding on subsequent hook calls.
fn save_pending_chunks(dir: &Path, session_id: &str, chunks: &[String]) {
let chunks_dir = dir.join(format!("chunks-{}", session_id));
// Clear any old chunks
let _ = fs::remove_dir_all(&chunks_dir);
if chunks.is_empty() { return; }
fs::create_dir_all(&chunks_dir).ok();
for (i, chunk) in chunks.iter().enumerate() {
let path = chunks_dir.join(format!("{:04}", i));
fs::write(path, chunk).ok();
}
}
/// Pop the next pending chunk (lowest numbered file). Returns None if no chunks remain.
fn pop_pending_chunk(dir: &Path, session_id: &str) -> Option<String> {
let chunks_dir = dir.join(format!("chunks-{}", session_id));
if !chunks_dir.exists() { return None; }
let mut entries: Vec<_> = fs::read_dir(&chunks_dir).ok()?
.flatten()
.filter(|e| e.file_type().map(|t| t.is_file()).unwrap_or(false))
.collect();
entries.sort_by_key(|e| e.file_name());
let first = entries.first()?;
let content = fs::read_to_string(first.path()).ok()?;
fs::remove_file(first.path()).ok();
// Clean up directory if empty
if fs::read_dir(&chunks_dir).ok().map(|mut d| d.next().is_none()).unwrap_or(true) {
fs::remove_dir(&chunks_dir).ok();
}
Some(content)
}
/// Reverse-scan the transcript JSONL, extracting text from user/assistant
/// messages until we accumulate `max_tokens` tokens of text content.
/// Then search for all node keys as substrings, weighted by position.
fn extract_weighted_terms(
path: &str,
max_tokens: usize,
store: &poc_memory::store::Store,
) -> BTreeMap<String, f64> {
if path.is_empty() { return BTreeMap::new(); }
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return BTreeMap::new(),
};
// Collect text from messages, scanning backwards, until token budget hit
let mut message_texts: Vec<String> = Vec::new();
let mut token_count = 0;
for line in content.lines().rev() {
if token_count >= max_tokens { break; }
let obj: serde_json::Value = match serde_json::from_str(line) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if msg_type != "user" && msg_type != "assistant" { continue; }
let mut msg_text = String::new();
let msg = obj.get("message").unwrap_or(&obj);
match msg.get("content") {
Some(serde_json::Value::String(s)) => {
msg_text.push_str(s);
}
Some(serde_json::Value::Array(arr)) => {
for block in arr {
if block.get("type").and_then(|v| v.as_str()) == Some("text") {
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
msg_text.push(' ');
msg_text.push_str(t);
}
}
}
}
_ => {}
}
token_count += msg_text.len() / 4;
message_texts.push(msg_text);
}
// Reverse so oldest is first (position weighting: later = more recent = higher)
message_texts.reverse();
let all_text = message_texts.join(" ").to_lowercase();
let text_len = all_text.len();
if text_len == 0 { return BTreeMap::new(); }
// Search for each node key as a substring (casefolded), accumulate position-weighted score
let mut terms = BTreeMap::new();
for (key, _node) in &store.nodes {
let key_folded = key.to_lowercase();
let mut pos = 0;
while let Some(found) = all_text[pos..].find(&key_folded) {
let abs_pos = pos + found;
let weight = (abs_pos + 1) as f64 / text_len as f64;
*terms.entry(key_folded.clone()).or_insert(0.0) += weight;
pos = abs_pos + key_folded.len();
}
}
terms
}
fn extract_key_from_line(line: &str) -> Option<String> {
let after_bracket = line.find("] ")?;
let rest = &line[after_bracket + 2..];
let key_end = rest.find(" (c").unwrap_or(rest.len());
let key = rest[..key_end].trim();
if key.is_empty() {
None
} else {
Some(key.to_string())
}
}
fn generate_cookie() -> String {
uuid::Uuid::new_v4().as_simple().to_string()[..12].to_string()
}
/// Parse a seen-file line: "TIMESTAMP\tKEY" or legacy "KEY"
fn parse_seen_line(line: &str) -> &str {
line.split_once('\t').map(|(_, key)| key).unwrap_or(line)
}
fn load_seen(dir: &Path, session_id: &str) -> HashSet<String> {
let path = dir.join(format!("seen-{}", session_id));
if path.exists() {
fs::read_to_string(path)
.unwrap_or_default()
.lines()
.filter(|s| !s.is_empty())
.map(|s| parse_seen_line(s).to_string())
.collect()
} else {
HashSet::new()
}
}
fn mark_seen(dir: &Path, session_id: &str, key: &str) {
let path = dir.join(format!("seen-{}", session_id));
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
let ts = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S");
writeln!(f, "{}\t{}", ts, key).ok();
}
}
fn mark_returned(dir: &Path, session_id: &str, key: &str) {
let returned = load_returned(dir, session_id);
if returned.contains(&key.to_string()) { return; }
let path = dir.join(format!("returned-{}", session_id));
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
writeln!(f, "{}", key).ok();
}
}
fn load_returned(dir: &Path, session_id: &str) -> Vec<String> {
let path = dir.join(format!("returned-{}", session_id));
if path.exists() {
let mut seen = HashSet::new();
fs::read_to_string(path)
.unwrap_or_default()
.lines()
.filter(|s| !s.is_empty())
.filter(|s| seen.insert(s.to_string()))
.map(|s| s.to_string())
.collect()
} else {
Vec::new()
}
}
fn show_seen() {
let state_dir = PathBuf::from("/tmp/claude-memory-search");
// Read stashed input for session_id
let input = match fs::read_to_string(STASH_PATH) {
Ok(s) => s,
Err(_) => {
eprintln!("No stashed input at {}", STASH_PATH);
return;
}
};
let json: serde_json::Value = match serde_json::from_str(&input) {
Ok(v) => v,
Err(_) => {
eprintln!("Failed to parse stashed input");
return;
}
};
let session_id = json["session_id"].as_str().unwrap_or("");
if session_id.is_empty() {
eprintln!("No session_id in stashed input");
return;
}
println!("Session: {}", session_id);
let cookie_path = state_dir.join(format!("cookie-{}", session_id));
if let Ok(cookie) = fs::read_to_string(&cookie_path) {
println!("Cookie: {}", cookie.trim());
}
let returned = load_returned(&state_dir, session_id);
if !returned.is_empty() {
println!("\nReturned by search ({}):", returned.len());
for key in &returned {
println!(" {}", key);
}
}
// Read seen file in insertion order (append-only file)
let seen_path = state_dir.join(format!("seen-{}", session_id));
let seen_lines: Vec<String> = fs::read_to_string(&seen_path)
.unwrap_or_default()
.lines()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
.collect();
let returned_set: HashSet<_> = returned.iter().cloned().collect();
let pre_seeded = seen_lines.len().saturating_sub(returned.len());
println!("\nSeen set ({} total, {} pre-seeded):", seen_lines.len(), pre_seeded);
if Args::parse().seen_full {
for line in &seen_lines {
let key = parse_seen_line(line);
let marker = if returned_set.contains(key) { "" } else { " " };
// Show timestamp if present, otherwise just key
if let Some((ts, k)) = line.split_once('\t') {
println!(" {} {}{}", ts, marker, k);
} else {
println!(" (no ts) {}{}", marker, line);
}
}
}
}
fn cleanup_stale_files(dir: &Path, max_age: Duration) {
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
let cutoff = SystemTime::now() - max_age;
for entry in entries.flatten() {
if let Ok(meta) = entry.metadata() {
if let Ok(modified) = meta.modified() {
if modified < cutoff {
fs::remove_file(entry.path()).ok();
}
}
}
}
}

View file

@ -0,0 +1,328 @@
// parse-claude-conversation: debug tool for inspecting what's in the context window
//
// Two-layer design:
// 1. extract_context_items() — walks JSONL from last compaction, yields
// structured records representing what's in the context window
// 2. format_as_context() — renders those records as they appear to Claude
//
// The transcript is mmap'd and scanned backwards from EOF using brace-depth
// tracking to find complete JSON objects, avoiding a full forward scan of
// what can be a 500MB+ file.
//
// Usage:
// parse-claude-conversation [TRANSCRIPT_PATH]
// parse-claude-conversation --last # use the last stashed session
use clap::Parser;
use memmap2::Mmap;
use poc_memory::transcript::{JsonlBackwardIter, find_last_compaction};
use serde_json::Value;
use std::fs;
#[derive(Parser)]
#[command(name = "parse-claude-conversation")]
struct Args {
/// Transcript JSONL path (or --last to use stashed session)
path: Option<String>,
/// Use the last stashed session from memory-search
#[arg(long)]
last: bool,
/// Dump raw JSONL objects. Optional integer: number of extra objects
/// to include before the compaction boundary.
#[arg(long, num_args = 0..=1, default_missing_value = "0")]
raw: Option<usize>,
}
// --- Context extraction ---
/// A single item in the context window, as Claude sees it.
enum ContextItem {
UserText(String),
SystemReminder(String),
AssistantText(String),
AssistantThinking,
ToolUse { name: String, input: String },
ToolResult(String),
}
/// Extract context items from the transcript, starting from the last compaction.
fn extract_context_items(data: &[u8]) -> Vec<ContextItem> {
let start = find_last_compaction(data).unwrap_or(0);
let region = &data[start..];
let mut items = Vec::new();
// Forward scan through JSONL lines from compaction onward
for line in region.split(|&b| b == b'\n') {
if line.is_empty() { continue; }
let obj: Value = match serde_json::from_slice(line) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
match msg_type {
"user" => {
if let Some(content) = obj.get("message").and_then(|m| m.get("content")) {
extract_user_content(content, &mut items);
}
}
"assistant" => {
if let Some(content) = obj.get("message").and_then(|m| m.get("content")) {
extract_assistant_content(content, &mut items);
}
}
_ => {}
}
}
items
}
/// Parse user message content into context items.
fn extract_user_content(content: &Value, items: &mut Vec<ContextItem>) {
match content {
Value::String(s) => {
split_system_reminders(s, items, false);
}
Value::Array(arr) => {
for block in arr {
let btype = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
match btype {
"text" => {
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
split_system_reminders(t, items, false);
}
}
"tool_result" => {
let result_text = extract_tool_result_text(block);
if !result_text.is_empty() {
split_system_reminders(&result_text, items, true);
}
}
_ => {}
}
}
}
_ => {}
}
}
/// Extract text from a tool_result block (content can be string or array).
fn extract_tool_result_text(block: &Value) -> String {
match block.get("content") {
Some(Value::String(s)) => s.clone(),
Some(Value::Array(arr)) => {
arr.iter()
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
.collect::<Vec<_>>()
.join("\n")
}
_ => String::new(),
}
}
/// Split text on <system-reminder> tags. Non-reminder text emits UserText
/// or ToolResult depending on `is_tool_result`.
fn split_system_reminders(text: &str, items: &mut Vec<ContextItem>, is_tool_result: bool) {
let mut remaining = text;
loop {
if let Some(start) = remaining.find("<system-reminder>") {
let before = remaining[..start].trim();
if !before.is_empty() {
if is_tool_result {
items.push(ContextItem::ToolResult(before.to_string()));
} else {
items.push(ContextItem::UserText(before.to_string()));
}
}
let after_open = &remaining[start + "<system-reminder>".len()..];
if let Some(end) = after_open.find("</system-reminder>") {
let reminder = after_open[..end].trim();
if !reminder.is_empty() {
items.push(ContextItem::SystemReminder(reminder.to_string()));
}
remaining = &after_open[end + "</system-reminder>".len()..];
} else {
let reminder = after_open.trim();
if !reminder.is_empty() {
items.push(ContextItem::SystemReminder(reminder.to_string()));
}
break;
}
} else {
let trimmed = remaining.trim();
if !trimmed.is_empty() {
if is_tool_result {
items.push(ContextItem::ToolResult(trimmed.to_string()));
} else {
items.push(ContextItem::UserText(trimmed.to_string()));
}
}
break;
}
}
}
/// Parse assistant message content into context items.
fn extract_assistant_content(content: &Value, items: &mut Vec<ContextItem>) {
match content {
Value::String(s) => {
let trimmed = s.trim();
if !trimmed.is_empty() {
items.push(ContextItem::AssistantText(trimmed.to_string()));
}
}
Value::Array(arr) => {
for block in arr {
let btype = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
match btype {
"text" => {
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
let trimmed = t.trim();
if !trimmed.is_empty() {
items.push(ContextItem::AssistantText(trimmed.to_string()));
}
}
}
"tool_use" => {
let name = block.get("name")
.and_then(|v| v.as_str())
.unwrap_or("?")
.to_string();
let input = block.get("input")
.map(|v| v.to_string())
.unwrap_or_default();
items.push(ContextItem::ToolUse { name, input });
}
"thinking" => {
items.push(ContextItem::AssistantThinking);
}
_ => {}
}
}
}
_ => {}
}
}
// --- Formatting layer ---
fn truncate(s: &str, max: usize) -> String {
if s.len() <= max {
s.to_string()
} else {
format!("{}...({} total)", &s[..max], s.len())
}
}
fn format_as_context(items: &[ContextItem]) {
for item in items {
match item {
ContextItem::UserText(text) => {
println!("USER: {}", truncate(text, 300));
println!();
}
ContextItem::SystemReminder(text) => {
println!("<system-reminder>");
println!("{}", truncate(text, 500));
println!("</system-reminder>");
println!();
}
ContextItem::AssistantText(text) => {
println!("ASSISTANT: {}", truncate(text, 300));
println!();
}
ContextItem::AssistantThinking => {
println!("[thinking]");
println!();
}
ContextItem::ToolUse { name, input } => {
println!("TOOL_USE: {} {}", name, truncate(input, 200));
println!();
}
ContextItem::ToolResult(text) => {
println!("TOOL_RESULT: {}", truncate(text, 300));
println!();
}
}
}
}
fn main() {
let args = Args::parse();
let path = if args.last {
let stash = fs::read_to_string("/tmp/claude-memory-search/last-input.json")
.expect("No stashed input");
let json: Value = serde_json::from_str(&stash).expect("Bad JSON");
json["transcript_path"]
.as_str()
.expect("No transcript_path")
.to_string()
} else if let Some(p) = args.path {
p
} else {
eprintln!("error: provide a transcript path or --last");
std::process::exit(1);
};
let file = fs::File::open(&path).expect("Can't open transcript");
let mmap = unsafe { Mmap::map(&file).expect("Failed to mmap") };
eprintln!(
"Transcript: {} ({:.1} MB)",
&path,
mmap.len() as f64 / 1_000_000.0
);
let compaction_offset = find_last_compaction(&mmap).unwrap_or(0);
eprintln!("Compaction at byte offset: {}", compaction_offset);
if let Some(extra) = args.raw {
use std::io::Write;
// Collect `extra` JSON objects before the compaction boundary
let mut before = Vec::new();
if extra > 0 && compaction_offset > 0 {
for obj_bytes in JsonlBackwardIter::new(&mmap[..compaction_offset]) {
if let Ok(obj) = serde_json::from_slice::<Value>(obj_bytes) {
let t = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if t == "file-history-snapshot" { continue; }
}
before.push(obj_bytes.to_vec());
if before.len() >= extra {
break;
}
}
before.reverse();
}
for obj in &before {
std::io::stdout().write_all(obj).ok();
println!();
}
// Then dump everything from compaction onward
let region = &mmap[compaction_offset..];
for line in region.split(|&b| b == b'\n') {
if line.is_empty() { continue; }
if let Ok(obj) = serde_json::from_slice::<Value>(line) {
let t = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if t == "file-history-snapshot" { continue; }
std::io::stdout().write_all(line).ok();
println!();
}
}
} else {
let items = extract_context_items(&mmap);
eprintln!("Context items: {}", items.len());
format_as_context(&items);
}
}

View file

@ -0,0 +1,214 @@
// Unified Claude Code hook.
//
// Single binary handling all hook events:
// UserPromptSubmit — signal daemon, check notifications, check context
// PostToolUse — check context (rate-limited)
// Stop — signal daemon response
//
// Replaces: record-user-message-time.sh, check-notifications.sh,
// check-context-usage.sh, notify-done.sh, context-check
use serde_json::Value;
use std::fs;
use std::io::{self, Read};
use std::path::PathBuf;
use std::process::Command;
use std::time::{SystemTime, UNIX_EPOCH};
const CONTEXT_THRESHOLD: u64 = 130_000;
const RATE_LIMIT_SECS: u64 = 60;
const SOCK_PATH: &str = ".claude/hooks/idle-timer.sock";
fn now_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs()
}
fn home() -> PathBuf {
PathBuf::from(std::env::var("HOME").unwrap_or_else(|_| "/root".into()))
}
fn daemon_cmd(args: &[&str]) {
Command::new("poc-daemon")
.args(args)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.ok();
}
fn daemon_available() -> bool {
home().join(SOCK_PATH).exists()
}
fn signal_user() {
let pane = std::env::var("TMUX_PANE").unwrap_or_default();
if pane.is_empty() {
daemon_cmd(&["user"]);
} else {
daemon_cmd(&["user", &pane]);
}
}
fn signal_response() {
daemon_cmd(&["response"]);
}
fn check_notifications() {
if !daemon_available() {
return;
}
let output = Command::new("poc-daemon")
.arg("notifications")
.output()
.ok();
if let Some(out) = output {
let text = String::from_utf8_lossy(&out.stdout);
if !text.trim().is_empty() {
println!("You have pending notifications:");
print!("{text}");
}
}
}
fn check_context(transcript: &PathBuf, rate_limit: bool) {
if rate_limit {
let rate_file = PathBuf::from("/tmp/claude-context-check-last");
if let Ok(s) = fs::read_to_string(&rate_file) {
if let Ok(last) = s.trim().parse::<u64>() {
if now_secs() - last < RATE_LIMIT_SECS {
return;
}
}
}
let _ = fs::write(&rate_file, now_secs().to_string());
}
if !transcript.exists() {
return;
}
let content = match fs::read_to_string(transcript) {
Ok(c) => c,
Err(_) => return,
};
let mut usage: u64 = 0;
for line in content.lines().rev().take(500) {
if !line.contains("cache_read_input_tokens") {
continue;
}
if let Ok(v) = serde_json::from_str::<Value>(line) {
let u = &v["message"]["usage"];
let input_tokens = u["input_tokens"].as_u64().unwrap_or(0);
let cache_creation = u["cache_creation_input_tokens"].as_u64().unwrap_or(0);
let cache_read = u["cache_read_input_tokens"].as_u64().unwrap_or(0);
usage = input_tokens + cache_creation + cache_read;
break;
}
}
if usage > CONTEXT_THRESHOLD {
print!(
"\
CONTEXT WARNING: Compaction approaching ({usage} tokens). Write a journal entry NOW.
Use `poc-memory journal write \"entry text\"` to save a dated entry covering:
- What you're working on and current state (done / in progress / blocked)
- Key things learned this session (patterns, debugging insights)
- Anything half-finished that needs pickup
Keep it narrative, not a task log."
);
}
}
fn main() {
let mut input = String::new();
io::stdin().read_to_string(&mut input).ok();
let hook: Value = match serde_json::from_str(&input) {
Ok(v) => v,
Err(_) => return,
};
let hook_type = hook["hook_event_name"].as_str().unwrap_or("unknown");
let transcript = hook["transcript_path"]
.as_str()
.filter(|p| !p.is_empty())
.map(PathBuf::from);
// Daemon agent calls set POC_AGENT=1 — skip all signaling.
// Without this, the daemon's claude -p calls trigger hooks that
// signal "user active", keeping the idle timer permanently reset.
if std::env::var("POC_AGENT").is_ok() {
return;
}
match hook_type {
"UserPromptSubmit" => {
signal_user();
check_notifications();
// Run memory-search, passing through the hook input it needs
if let Ok(output) = Command::new("memory-search")
.arg("--hook")
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::null())
.spawn()
.and_then(|mut child| {
if let Some(ref mut stdin) = child.stdin {
use std::io::Write;
let _ = stdin.write_all(input.as_bytes());
}
child.wait_with_output()
})
{
let text = String::from_utf8_lossy(&output.stdout);
if !text.is_empty() {
print!("{text}");
}
}
if let Some(ref t) = transcript {
check_context(t, false);
}
}
"PostToolUse" => {
// Drip-feed pending context chunks from initial load
if let Ok(output) = Command::new("memory-search")
.arg("--hook")
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::null())
.spawn()
.and_then(|mut child| {
if let Some(ref mut stdin) = child.stdin {
use std::io::Write;
let _ = stdin.write_all(input.as_bytes());
}
child.wait_with_output()
})
{
let text = String::from_utf8_lossy(&output.stdout);
if !text.is_empty() {
print!("{text}");
}
}
if let Some(ref t) = transcript {
check_context(t, true);
}
}
"Stop" => {
let stop_hook_active = hook["stop_hook_active"].as_bool().unwrap_or(false);
if !stop_hook_active {
signal_response();
}
}
_ => {}
}
}

191
poc-memory/src/config.rs Normal file
View file

@ -0,0 +1,191 @@
// Configuration for poc-memory
//
// Loaded from ~/.config/poc-memory/config.jsonl (or POC_MEMORY_CONFIG env).
// Falls back to sensible defaults if no config file exists.
//
// Format: JSONL — one JSON object per line.
// First line with "config" key: global settings.
// Lines with "group" key: context loading groups (order preserved).
//
// Example:
// {"config": {"user_name": "Alice", "data_dir": "~/.claude/memory"}}
// {"group": "identity", "keys": ["identity"]}
// {"group": "orientation", "keys": ["where-am-i.md"], "source": "file"}
use std::path::PathBuf;
use std::sync::OnceLock;
static CONFIG: OnceLock<Config> = OnceLock::new();
#[derive(Debug, Clone, PartialEq)]
pub enum ContextSource {
Store,
File,
Journal,
}
#[derive(Debug, Clone)]
pub struct ContextGroup {
pub label: String,
pub keys: Vec<String>,
pub source: ContextSource,
}
#[derive(Debug, Clone)]
pub struct Config {
/// Display name for the human user in transcripts/prompts.
pub user_name: String,
/// Display name for the AI assistant.
pub assistant_name: String,
/// Base directory for memory data (store, logs, status).
pub data_dir: PathBuf,
/// Directory containing Claude session transcripts.
pub projects_dir: PathBuf,
/// Core node keys that should never be decayed/deleted.
pub core_nodes: Vec<String>,
/// How many days of journal to include in load-context.
pub journal_days: u32,
/// Max journal entries to include in load-context.
pub journal_max: usize,
/// Ordered context groups for session-start loading.
pub context_groups: Vec<ContextGroup>,
/// Max concurrent LLM calls in the daemon.
pub llm_concurrency: usize,
/// Directory containing prompt templates for agents.
pub prompts_dir: PathBuf,
/// Separate Claude config dir for background agent work (daemon jobs).
/// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
/// with different OAuth credentials than the interactive session.
pub agent_config_dir: Option<PathBuf>,
}
impl Default for Config {
fn default() -> Self {
let home = PathBuf::from(std::env::var("HOME").expect("HOME not set"));
Self {
user_name: "User".to_string(),
assistant_name: "Assistant".to_string(),
data_dir: home.join(".claude/memory"),
projects_dir: home.join(".claude/projects"),
core_nodes: vec!["identity".to_string(), "core-practices".to_string()],
journal_days: 7,
journal_max: 20,
context_groups: vec![
ContextGroup {
label: "identity".into(),
keys: vec!["identity".into()],
source: ContextSource::Store,
},
ContextGroup {
label: "core-practices".into(),
keys: vec!["core-practices".into()],
source: ContextSource::Store,
},
],
llm_concurrency: 1,
prompts_dir: home.join("poc/memory/prompts"),
agent_config_dir: None,
}
}
}
impl Config {
fn load_from_file() -> Self {
let path = std::env::var("POC_MEMORY_CONFIG")
.map(PathBuf::from)
.unwrap_or_else(|_| {
PathBuf::from(std::env::var("HOME").expect("HOME not set"))
.join(".config/poc-memory/config.jsonl")
});
let mut config = Config::default();
let Ok(content) = std::fs::read_to_string(&path) else {
return config;
};
let mut context_groups: Vec<ContextGroup> = Vec::new();
// Parse as a stream of JSON values (handles multi-line objects)
let stream = serde_json::Deserializer::from_str(&content)
.into_iter::<serde_json::Value>();
for result in stream {
let Ok(obj) = result else { continue };
// Global config line
if let Some(cfg) = obj.get("config") {
if let Some(s) = cfg.get("user_name").and_then(|v| v.as_str()) {
config.user_name = s.to_string();
}
if let Some(s) = cfg.get("assistant_name").and_then(|v| v.as_str()) {
config.assistant_name = s.to_string();
}
if let Some(s) = cfg.get("data_dir").and_then(|v| v.as_str()) {
config.data_dir = expand_home(s);
}
if let Some(s) = cfg.get("projects_dir").and_then(|v| v.as_str()) {
config.projects_dir = expand_home(s);
}
if let Some(arr) = cfg.get("core_nodes").and_then(|v| v.as_array()) {
config.core_nodes = arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect();
}
if let Some(d) = cfg.get("journal_days").and_then(|v| v.as_u64()) {
config.journal_days = d as u32;
}
if let Some(m) = cfg.get("journal_max").and_then(|v| v.as_u64()) {
config.journal_max = m as usize;
}
if let Some(n) = cfg.get("llm_concurrency").and_then(|v| v.as_u64()) {
config.llm_concurrency = n.max(1) as usize;
}
if let Some(s) = cfg.get("prompts_dir").and_then(|v| v.as_str()) {
config.prompts_dir = expand_home(s);
}
if let Some(s) = cfg.get("agent_config_dir").and_then(|v| v.as_str()) {
config.agent_config_dir = Some(expand_home(s));
}
continue;
}
// Context group line
if let Some(label) = obj.get("group").and_then(|v| v.as_str()) {
let keys = obj.get("keys")
.and_then(|v| v.as_array())
.map(|arr| arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect())
.unwrap_or_default();
let source = match obj.get("source").and_then(|v| v.as_str()) {
Some("file") => ContextSource::File,
Some("journal") => ContextSource::Journal,
_ => ContextSource::Store,
};
context_groups.push(ContextGroup { label: label.to_string(), keys, source });
}
}
if !context_groups.is_empty() {
config.context_groups = context_groups;
}
config
}
}
fn expand_home(path: &str) -> PathBuf {
if let Some(rest) = path.strip_prefix("~/") {
PathBuf::from(std::env::var("HOME").expect("HOME not set")).join(rest)
} else {
PathBuf::from(path)
}
}
/// Get the global config (loaded once on first access).
pub fn get() -> &'static Config {
CONFIG.get_or_init(Config::load_from_file)
}

View file

@ -7,7 +7,7 @@
// search_hits: key → u64 (how often memory-search found this node)
// last_hit_ts: key → i64 (unix timestamp of last search hit)
use redb::{Database, ReadableDatabase, ReadableTable, TableDefinition};
use redb::{Database, ReadableTable, TableDefinition};
use std::path::PathBuf;
const SEARCH_HITS: TableDefinition<&str, u64> = TableDefinition::new("search_hits");
@ -18,7 +18,7 @@ fn db_path() -> PathBuf {
}
/// Open (or create) the counters database.
fn open() -> Result<Database, String> {
pub fn open() -> Result<Database, String> {
Database::create(db_path()).map_err(|e| format!("open counters db: {}", e))
}

View file

@ -12,16 +12,6 @@ use crate::store::{Store, RelationType, StoreView};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};
/// Community info for reporting
#[derive(Clone, Debug)]
pub struct CommunityInfo {
pub id: u32,
pub members: Vec<String>,
pub size: usize,
pub isolation: f32,
pub cross_edges: usize,
}
/// Weighted edge in the graph
#[derive(Clone, Debug)]
pub struct Edge {
@ -74,43 +64,6 @@ impl Graph {
.unwrap_or_default()
}
/// Jaccard similarity between two nodes' neighborhoods.
/// Measures overlap: |intersection| / |union| of their neighbor sets.
pub fn jaccard(&self, a: &str, b: &str) -> f32 {
let na = self.neighbor_keys(a);
let nb = self.neighbor_keys(b);
let intersection = na.intersection(&nb).count();
let union = na.union(&nb).count();
if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
}
/// Compute Jaccard-based strength for every edge in the graph.
/// Returns (source_key, target_key, jaccard_strength) triples.
/// Scales raw Jaccard (typically 0.0-0.3) to a useful range.
pub fn jaccard_strengths(&self) -> Vec<(String, String, f32)> {
let mut result = Vec::new();
let mut seen = HashSet::new();
for (key, edges) in &self.adj {
for edge in edges {
// Deduplicate undirected edges
let pair = if key < &edge.target {
(key.as_str(), edge.target.as_str())
} else {
(edge.target.as_str(), key.as_str())
};
if !seen.insert((pair.0.to_string(), pair.1.to_string())) {
continue;
}
let j = self.jaccard(key, &edge.target);
// Scale: raw Jaccard 0.05 → 0.15, 0.15 → 0.45, 0.30 → 0.90
// Formula: clamp(j * 3, 0.1, 1.0)
let strength = (j * 3.0).clamp(0.1, 1.0);
result.push((key.clone(), edge.target.clone(), strength));
}
}
result
}
pub fn community_count(&self) -> usize {
let labels: HashSet<_> = self.communities.values().collect();
labels.len()
@ -120,75 +73,6 @@ impl Graph {
&self.communities
}
/// Community isolation scores: for each community, what fraction of its
/// total edge weight is internal (vs cross-community). Returns community_id → score
/// where 1.0 = fully isolated (no external edges), 0.0 = all edges external.
/// Singleton communities (1 node, no edges) get score 1.0.
pub fn community_isolation(&self) -> HashMap<u32, f32> {
// Accumulate internal and total edge weight per community
let mut internal: HashMap<u32, f32> = HashMap::new();
let mut total: HashMap<u32, f32> = HashMap::new();
for (key, edges) in &self.adj {
let Some(&my_comm) = self.communities.get(key) else { continue };
for edge in edges {
let nbr_comm = self.communities.get(&edge.target).copied().unwrap_or(u32::MAX);
*total.entry(my_comm).or_default() += edge.strength;
if my_comm == nbr_comm {
*internal.entry(my_comm).or_default() += edge.strength;
}
}
}
let mut scores = HashMap::new();
let all_communities: HashSet<u32> = self.communities.values().copied().collect();
for &comm in &all_communities {
let t = total.get(&comm).copied().unwrap_or(0.0);
if t < 0.001 {
scores.insert(comm, 1.0); // no edges = fully isolated
} else {
let i = internal.get(&comm).copied().unwrap_or(0.0);
scores.insert(comm, i / t);
}
}
scores
}
/// Community info: id → (member keys, size, isolation score, cross-community edge count)
pub fn community_info(&self) -> Vec<CommunityInfo> {
let isolation = self.community_isolation();
// Group members by community
let mut members: HashMap<u32, Vec<String>> = HashMap::new();
for (key, &comm) in &self.communities {
members.entry(comm).or_default().push(key.clone());
}
// Count cross-community edges per community
let mut cross_edges: HashMap<u32, usize> = HashMap::new();
for (key, edges) in &self.adj {
let Some(&my_comm) = self.communities.get(key) else { continue };
for edge in edges {
let nbr_comm = self.communities.get(&edge.target).copied().unwrap_or(u32::MAX);
if my_comm != nbr_comm {
*cross_edges.entry(my_comm).or_default() += 1;
}
}
}
let mut result: Vec<CommunityInfo> = members.into_iter()
.map(|(id, mut keys)| {
keys.sort();
let size = keys.len();
let iso = isolation.get(&id).copied().unwrap_or(1.0);
let cross = cross_edges.get(&id).copied().unwrap_or(0) / 2; // undirected
CommunityInfo { id, members: keys, size, isolation: iso, cross_edges: cross }
})
.collect();
result.sort_by(|a, b| b.isolation.total_cmp(&a.isolation));
result
}
/// Hub degree threshold: top 5% by degree
pub fn hub_threshold(&self) -> usize {
let mut degrees: Vec<usize> = self.keys.iter()
@ -519,9 +403,11 @@ pub fn build_graph_fast(store: &impl StoreView) -> Graph {
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
let mut adj: HashMap<String, Vec<Edge>> = HashMap::new();
let mut keys: HashSet<String> = HashSet::new();
// Get keys directly from index — no need to deserialize node content
let keys: HashSet<String> = store.all_keys().into_iter().collect();
store.for_each_node(|key, _, _| {
keys.insert(key.to_owned());
});
store.for_each_relation(|source_key, target_key, strength, rel_type| {
if !keys.contains(source_key) || !keys.contains(target_key) {
@ -540,184 +426,9 @@ fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashS
});
});
add_implicit_temporal_edges(store, &keys, &mut adj);
(adj, keys)
}
/// Add implicit edges for the temporal/digest hierarchy.
///
/// These edges are derived from node types and dates — they don't
/// need to be stored. Two kinds:
/// - parent/child: session→daily→weekly→monthly (by date containment)
/// - prev/next: chronological ordering within each level
///
/// Sessions use their timestamp for date. Digest nodes (daily/weekly/monthly)
/// extract the date they *cover* from the key name, since their timestamp
/// is when the digest was created, not what period it covers.
fn add_implicit_temporal_edges(
store: &impl StoreView,
keys: &HashSet<String>,
adj: &mut HashMap<String, Vec<Edge>>,
) {
use crate::store::NodeType::*;
use chrono::{Datelike, DateTime, NaiveDate};
// Extract the covered date from a key name.
// Patterns: "daily-2026-03-06", "daily-2026-03-06-identity"
fn date_from_key(key: &str) -> Option<NaiveDate> {
let rest = key.strip_prefix("daily-")?;
if rest.len() >= 10 {
NaiveDate::parse_from_str(&rest[..10], "%Y-%m-%d").ok()
} else {
None
}
}
fn week_from_key(key: &str) -> Option<(i32, u32)> {
// "weekly-2026-W09" → (2026, 9)
let rest = key.strip_prefix("weekly-")?;
let (year_str, w_str) = rest.split_once("-W")?;
let year: i32 = year_str.parse().ok()?;
// Week string might have a suffix like "-foo"
let week_str = w_str.split('-').next()?;
let week: u32 = week_str.parse().ok()?;
Some((year, week))
}
fn month_from_key(key: &str) -> Option<(i32, u32)> {
// "monthly-2026-02" → (2026, 2)
let rest = key.strip_prefix("monthly-")?;
let (year_str, month_str) = rest.split_once('-')?;
let year: i32 = year_str.parse().ok()?;
let month_str = month_str.split('-').next()?;
let month: u32 = month_str.parse().ok()?;
Some((year, month))
}
// Collect episodic nodes by type
struct Dated { key: String, ts: i64, date: NaiveDate }
let mut sessions: Vec<Dated> = Vec::new();
let mut dailies: Vec<(String, NaiveDate)> = Vec::new();
let mut weeklies: Vec<(String, (i32, u32))> = Vec::new();
let mut monthlies: Vec<(String, (i32, u32))> = Vec::new();
store.for_each_node_meta(|key, node_type, ts| {
if !keys.contains(key) { return; }
match node_type {
EpisodicSession => {
// Prefer date from key (local time) over timestamp (UTC)
// to avoid timezone mismatches
let date = date_from_key(key).or_else(|| {
DateTime::from_timestamp(ts, 0).map(|dt| dt.date_naive())
});
if let Some(date) = date {
sessions.push(Dated { key: key.to_owned(), ts, date });
}
}
EpisodicDaily => {
if let Some(date) = date_from_key(key) {
dailies.push((key.to_owned(), date));
}
}
EpisodicWeekly => {
if let Some(yw) = week_from_key(key) {
weeklies.push((key.to_owned(), yw));
}
}
EpisodicMonthly => {
if let Some(ym) = month_from_key(key) {
monthlies.push((key.to_owned(), ym));
}
}
_ => {}
}
});
sessions.sort_by_key(|d| d.ts);
dailies.sort_by_key(|(_, d)| *d);
weeklies.sort_by_key(|(_, yw)| *yw);
monthlies.sort_by_key(|(_, ym)| *ym);
let add_edge = |adj: &mut HashMap<String, Vec<Edge>>, a: &str, b: &str| {
if let Some(edges) = adj.get(a)
&& edges.iter().any(|e| e.target == b) { return; }
adj.entry(a.to_owned()).or_default().push(Edge {
target: b.to_owned(),
strength: 1.0,
rel_type: RelationType::Auto,
});
adj.entry(b.to_owned()).or_default().push(Edge {
target: a.to_owned(),
strength: 1.0,
rel_type: RelationType::Auto,
});
};
// Build indexes: date→dailies, (year,week)→weekly, (year,month)→monthly
// Note: multiple dailies can share a date (e.g. daily-2026-03-06-identity,
// daily-2026-03-06-technical), so we collect all of them.
let mut date_to_dailies: HashMap<NaiveDate, Vec<String>> = HashMap::new();
for (key, date) in &dailies {
date_to_dailies.entry(*date).or_default().push(key.clone());
}
let mut yw_to_weekly: HashMap<(i32, u32), String> = HashMap::new();
for (key, yw) in &weeklies {
yw_to_weekly.insert(*yw, key.clone());
}
let mut ym_to_monthly: HashMap<(i32, u32), String> = HashMap::new();
for (key, ym) in &monthlies {
ym_to_monthly.insert(*ym, key.clone());
}
// Session → Daily (parent): each session links to all dailies for its date
for sess in &sessions {
if let Some(daily_keys) = date_to_dailies.get(&sess.date) {
for daily in daily_keys {
add_edge(adj, &sess.key, daily);
}
}
}
// Daily → Weekly (parent)
for (key, date) in &dailies {
let yw = (date.iso_week().year(), date.iso_week().week());
if let Some(weekly) = yw_to_weekly.get(&yw) {
add_edge(adj, key, weekly);
}
}
// Weekly → Monthly (parent)
for (key, yw) in &weeklies {
// A week can span two months; use the Thursday date (ISO week convention)
let thursday = NaiveDate::from_isoywd_opt(yw.0, yw.1, chrono::Weekday::Thu);
if let Some(d) = thursday {
let ym = (d.year(), d.month());
if let Some(monthly) = ym_to_monthly.get(&ym) {
add_edge(adj, key, monthly);
}
}
}
// Prev/next within each level
for pair in sessions.windows(2) {
add_edge(adj, &pair[0].key, &pair[1].key);
}
for pair in dailies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
for pair in weeklies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
for pair in monthlies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
}
/// Label propagation community detection.
///
/// Each node starts with its own label. Each iteration: adopt the most
@ -880,33 +591,30 @@ pub fn health_report(graph: &Graph, store: &Store) -> String {
.count();
// Orphan edges: relations referencing non-existent nodes
// With index-based lookup, we count edges where endpoints don't resolve
let mut orphan_edges = 0usize;
let mut missing_nodes: HashSet<String> = HashSet::new();
store.for_each_relation(|source, target, _, _| {
let s_missing = !store.contains_key(source).unwrap_or(false);
let t_missing = !store.contains_key(target).unwrap_or(false);
for rel in &store.relations {
if rel.deleted { continue; }
let s_missing = !store.nodes.contains_key(&rel.source_key);
let t_missing = !store.nodes.contains_key(&rel.target_key);
if s_missing || t_missing {
orphan_edges += 1;
if s_missing { missing_nodes.insert(source.to_string()); }
if t_missing { missing_nodes.insert(target.to_string()); }
if s_missing { missing_nodes.insert(rel.source_key.clone()); }
if t_missing { missing_nodes.insert(rel.target_key.clone()); }
}
});
}
// NodeType breakdown
let mut type_counts: HashMap<&str, usize> = HashMap::new();
let all_keys = store.all_keys().unwrap_or_default();
for key in &all_keys {
if let Ok(Some(node)) = store.get_node(key) {
let label = match node.node_type {
crate::store::NodeType::EpisodicSession => "episodic",
crate::store::NodeType::EpisodicDaily => "daily",
crate::store::NodeType::EpisodicWeekly => "weekly",
crate::store::NodeType::EpisodicMonthly => "monthly",
crate::store::NodeType::Semantic => "semantic",
};
*type_counts.entry(label).or_default() += 1;
}
for node in store.nodes.values() {
let label = match node.node_type {
crate::store::NodeType::EpisodicSession => "episodic",
crate::store::NodeType::EpisodicDaily => "daily",
crate::store::NodeType::EpisodicWeekly => "weekly",
crate::store::NodeType::EpisodicMonthly => "monthly",
crate::store::NodeType::Semantic => "semantic",
};
*type_counts.entry(label).or_default() += 1;
}
// Load history for deltas

32
poc-memory/src/lib.rs Normal file
View file

@ -0,0 +1,32 @@
// poc-memory library — shared modules for all binaries
//
// Re-exports modules so that memory-search and other binaries
// can call library functions directly instead of shelling out.
// Core infrastructure
pub mod config;
pub mod store;
pub mod util;
pub mod graph;
pub mod search;
pub mod similarity;
pub mod spectral;
pub mod lookups;
pub mod query;
pub mod transcript;
pub mod neuro;
pub mod counters;
// Agent layer (LLM-powered operations)
pub mod agents;
pub mod tui;
// Re-export agent submodules at crate root for backwards compatibility
pub use agents::{
llm, audit, consolidate, knowledge,
enrich, fact_mine, digest, daemon,
};
pub mod memory_capnp {
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
}

View file

@ -197,3 +197,8 @@ pub fn dump_resolved(date: &str, keys: &[String]) -> Result<Vec<(String, u32)>,
Ok(resolved)
}
/// Hash a key (exposed for testing/external use).
pub fn hash_key(key: &str) -> u64 {
fnv1a(key)
}

2636
poc-memory/src/main.rs Normal file

File diff suppressed because it is too large Load diff

368
poc-memory/src/migrate.rs Normal file
View file

@ -0,0 +1,368 @@
// Migration from old weights.json + markdown marker system
//
// Reads:
// ~/.claude/memory/weights.json (1,874 entries with metrics)
// ~/.claude/memory/*.md (content + mem markers + edges)
//
// Emits:
// ~/.claude/memory/nodes.capnp (all nodes with preserved metadata)
// ~/.claude/memory/relations.capnp (all edges from markers + md links)
// ~/.claude/memory/state.json (derived cache)
//
// Old files are preserved as backup. Run once.
use crate::store::{
self, Store, Node, NodeType, RelationType,
parse_units, new_relation,
};
use serde::Deserialize;
use uuid::Uuid;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
fn home() -> PathBuf {
PathBuf::from(env::var("HOME").expect("HOME not set"))
}
// Old system data structures (just enough for deserialization)
#[derive(Deserialize)]
struct OldStore {
#[serde(default)]
entries: HashMap<String, OldEntry>,
#[serde(default)]
retrieval_log: Vec<OldRetrievalEvent>,
#[serde(default)]
params: OldParams,
}
#[derive(Deserialize)]
#[allow(dead_code)] // fields needed for deserialization of old format
struct OldEntry {
weight: f64,
created: String,
#[serde(default)]
last_retrieved: Option<String>,
#[serde(default)]
last_used: Option<String>,
#[serde(default)]
retrievals: u32,
#[serde(default)]
uses: u32,
#[serde(default)]
wrongs: u32,
#[serde(default = "default_category")]
category: String,
}
fn default_category() -> String { "General".to_string() }
#[derive(Deserialize)]
struct OldRetrievalEvent {
query: String,
timestamp: String,
results: Vec<String>,
#[serde(default)]
used: Option<Vec<String>>,
}
#[derive(Deserialize)]
struct OldParams {
#[serde(default = "default_0_7")]
default_weight: f64,
#[serde(default = "default_0_95")]
decay_factor: f64,
#[serde(default = "default_0_15")]
use_boost: f64,
#[serde(default = "default_0_1")]
prune_threshold: f64,
#[serde(default = "default_0_3")]
edge_decay: f64,
#[serde(default = "default_3")]
max_hops: u32,
#[serde(default = "default_0_05")]
min_activation: f64,
}
impl Default for OldParams {
fn default() -> Self {
OldParams {
default_weight: 0.7,
decay_factor: 0.95,
use_boost: 0.15,
prune_threshold: 0.1,
edge_decay: 0.3,
max_hops: 3,
min_activation: 0.05,
}
}
}
fn default_0_7() -> f64 { 0.7 }
fn default_0_95() -> f64 { 0.95 }
fn default_0_15() -> f64 { 0.15 }
fn default_0_1() -> f64 { 0.1 }
fn default_0_3() -> f64 { 0.3 }
fn default_3() -> u32 { 3 }
fn default_0_05() -> f64 { 0.05 }
pub fn migrate() -> Result<(), String> {
let weights_path = home().join(".claude/memory/weights.json");
let memory_dir = home().join(".claude/memory");
let nodes_path = memory_dir.join("nodes.capnp");
let rels_path = memory_dir.join("relations.capnp");
// Safety check
if nodes_path.exists() || rels_path.exists() {
return Err("nodes.capnp or relations.capnp already exist. \
Remove them first if you want to re-migrate.".into());
}
// Load old store
let old_store: OldStore = if weights_path.exists() {
let data = fs::read_to_string(&weights_path)
.map_err(|e| format!("read weights.json: {}", e))?;
serde_json::from_str(&data)
.map_err(|e| format!("parse weights.json: {}", e))?
} else {
eprintln!("Warning: no weights.json found, migrating markdown only");
OldStore {
entries: HashMap::new(),
retrieval_log: Vec::new(),
params: OldParams::default(),
}
};
eprintln!("Old store: {} entries, {} retrieval events",
old_store.entries.len(), old_store.retrieval_log.len());
// Scan markdown files to get content + edges
let mut units_by_key: HashMap<String, store::MemoryUnit> = HashMap::new();
scan_markdown_dir(&memory_dir, &mut units_by_key)?;
eprintln!("Scanned {} markdown units", units_by_key.len());
// Create new store
let mut store = Store::default();
// Migrate params
store.params.default_weight = old_store.params.default_weight;
store.params.decay_factor = old_store.params.decay_factor;
store.params.use_boost = old_store.params.use_boost;
store.params.prune_threshold = old_store.params.prune_threshold;
store.params.edge_decay = old_store.params.edge_decay;
store.params.max_hops = old_store.params.max_hops;
store.params.min_activation = old_store.params.min_activation;
// Migrate retrieval log
store.retrieval_log = old_store.retrieval_log.iter().map(|e| {
store::RetrievalEvent {
query: e.query.clone(),
timestamp: e.timestamp.clone(),
results: e.results.clone(),
used: e.used.clone(),
}
}).collect();
// Phase 1: Create nodes
// Merge old entries (weight metadata) with markdown units (content)
let mut all_nodes: Vec<Node> = Vec::new();
let mut key_to_uuid: HashMap<String, [u8; 16]> = HashMap::new();
// First, all entries from the old store
for (key, old_entry) in &old_store.entries {
let uuid = *Uuid::new_v4().as_bytes();
key_to_uuid.insert(key.clone(), uuid);
let content = units_by_key.get(key)
.map(|u| u.content.clone())
.unwrap_or_default();
let state_tag = units_by_key.get(key)
.and_then(|u| u.state.clone())
.unwrap_or_default();
let node = Node {
uuid,
version: 1,
timestamp: store::now_epoch(),
node_type: if key.contains("journal") {
NodeType::EpisodicSession
} else {
NodeType::Semantic
},
provenance: "manual".to_string(),
key: key.clone(),
content,
weight: old_entry.weight as f32,
emotion: 0.0,
deleted: false,
source_ref: String::new(),
created: old_entry.created.clone(),
retrievals: old_entry.retrievals,
uses: old_entry.uses,
wrongs: old_entry.wrongs,
state_tag,
last_replayed: 0,
spaced_repetition_interval: 1,
position: 0,
created_at: 0,
community_id: None,
clustering_coefficient: None,
degree: None,
};
all_nodes.push(node);
}
// Then, any markdown units not in the old store
for (key, unit) in &units_by_key {
if key_to_uuid.contains_key(key) { continue; }
let uuid = *Uuid::new_v4().as_bytes();
key_to_uuid.insert(key.clone(), uuid);
let node = Node {
uuid,
version: 1,
timestamp: store::now_epoch(),
node_type: if key.contains("journal") {
NodeType::EpisodicSession
} else {
NodeType::Semantic
},
provenance: "manual".to_string(),
key: key.clone(),
content: unit.content.clone(),
weight: 0.7,
emotion: 0.0,
deleted: false,
source_ref: String::new(),
created: String::new(),
retrievals: 0,
uses: 0,
wrongs: 0,
state_tag: unit.state.clone().unwrap_or_default(),
last_replayed: 0,
spaced_repetition_interval: 1,
position: 0,
created_at: 0,
community_id: None,
clustering_coefficient: None,
degree: None,
};
all_nodes.push(node);
}
// Write nodes to capnp log
store.append_nodes(&all_nodes)?;
for node in &all_nodes {
store.uuid_to_key.insert(node.uuid, node.key.clone());
store.nodes.insert(node.key.clone(), node.clone());
}
eprintln!("Migrated {} nodes", all_nodes.len());
// Phase 2: Create relations from markdown links + causal edges
let mut all_relations = Vec::new();
for (key, unit) in &units_by_key {
let source_uuid = match key_to_uuid.get(key) {
Some(u) => *u,
None => continue,
};
// Association links (bidirectional)
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
let target_uuid = match key_to_uuid.get(link) {
Some(u) => *u,
None => continue,
};
// Avoid duplicate relations
let exists = all_relations.iter().any(|r: &store::Relation|
(r.source == source_uuid && r.target == target_uuid) ||
(r.source == target_uuid && r.target == source_uuid));
if exists { continue; }
all_relations.push(new_relation(
source_uuid, target_uuid,
RelationType::Link, 1.0,
key, link,
));
}
// Causal edges (directed)
for cause in &unit.causes {
let cause_uuid = match key_to_uuid.get(cause) {
Some(u) => *u,
None => continue,
};
all_relations.push(new_relation(
cause_uuid, source_uuid,
RelationType::Causal, 1.0,
cause, key,
));
}
}
// Write relations to capnp log
store.append_relations(&all_relations)?;
store.relations = all_relations;
eprintln!("Migrated {} relations", store.relations.len());
// Phase 3: Compute graph metrics
store.update_graph_metrics();
// Save derived cache
store.save()?;
eprintln!("Migration complete. Files:");
eprintln!(" {}", nodes_path.display());
eprintln!(" {}", rels_path.display());
eprintln!(" {}", memory_dir.join("state.json").display());
// Verify
let g = store.build_graph();
eprintln!("\nVerification:");
eprintln!(" Nodes: {}", store.nodes.len());
eprintln!(" Relations: {}", store.relations.len());
eprintln!(" Graph edges: {}", g.edge_count());
eprintln!(" Communities: {}", g.community_count());
eprintln!(" Avg CC: {:.4}", g.avg_clustering_coefficient());
Ok(())
}
fn scan_markdown_dir(
dir: &Path,
units: &mut HashMap<String, store::MemoryUnit>,
) -> Result<(), String> {
let entries = fs::read_dir(dir)
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
scan_markdown_dir(&path, units)?;
continue;
}
let Some(ext) = path.extension() else { continue };
if ext != "md" { continue }
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = match fs::read_to_string(&path) {
Ok(c) => c,
Err(_) => continue,
};
for unit in parse_units(&filename, &content) {
units.insert(unit.key.clone(), unit);
}
}
Ok(())
}

View file

@ -0,0 +1,25 @@
// Neuroscience-inspired memory algorithms, split by concern:
//
// scoring — pure analysis: priority, replay queues, interference, plans
// prompts — agent prompt generation and formatting
// rewrite — graph topology mutations: differentiation, closure, linking
mod scoring;
mod rewrite;
pub use scoring::{
ReplayItem,
ConsolidationPlan,
consolidation_priority,
replay_queue, replay_queue_with_graph,
detect_interference,
consolidation_plan, consolidation_plan_quick, format_plan,
daily_check,
};
pub use rewrite::{
refine_target, LinkMove,
differentiate_hub,
apply_differentiation, find_differentiable_hubs,
triangle_close, link_orphans,
};

View file

@ -0,0 +1,348 @@
// Graph topology mutations: hub differentiation, triangle closure,
// orphan linking, and link refinement. These modify the store.
use crate::store::{Store, new_relation};
use crate::graph::Graph;
use crate::similarity;
/// Collect (key, content) pairs for all section children of a file-level node.
fn section_children<'a>(store: &'a Store, file_key: &str) -> Vec<(&'a str, &'a str)> {
let prefix = format!("{}#", file_key);
store.nodes.iter()
.filter(|(k, _)| k.starts_with(&prefix))
.map(|(k, n)| (k.as_str(), n.content.as_str()))
.collect()
}
/// Find the best matching candidate by cosine similarity against content.
/// Returns (key, similarity) if any candidate exceeds threshold.
fn best_match(candidates: &[(&str, &str)], content: &str, threshold: f32) -> Option<(String, f32)> {
let (best_key, best_sim) = candidates.iter()
.map(|(key, text)| (*key, similarity::cosine_similarity(content, text)))
.max_by(|a, b| a.1.total_cmp(&b.1))?;
if best_sim > threshold {
Some((best_key.to_string(), best_sim))
} else {
None
}
}
/// Refine a link target: if the target is a file-level node with section
/// children, find the best-matching section by cosine similarity against
/// the source content. Returns the original key if no sections exist or
/// no section matches above threshold.
///
/// This prevents hub formation at link creation time — every new link
/// targets the most specific available node.
pub fn refine_target(store: &Store, source_content: &str, target_key: &str) -> String {
// Only refine file-level nodes (no # in key)
if target_key.contains('#') { return target_key.to_string(); }
let sections = section_children(store, target_key);
if sections.is_empty() { return target_key.to_string(); }
best_match(&sections, source_content, 0.05)
.map(|(key, _)| key)
.unwrap_or_else(|| target_key.to_string())
}
/// A proposed link move: from hub→neighbor to section→neighbor
pub struct LinkMove {
pub neighbor_key: String,
pub from_hub: String,
pub to_section: String,
pub similarity: f32,
pub neighbor_snippet: String,
}
/// Analyze a hub node and propose redistributing its links to child sections.
///
/// Returns None if the node isn't a hub or has no sections to redistribute to.
pub fn differentiate_hub(store: &Store, hub_key: &str) -> Option<Vec<LinkMove>> {
let graph = store.build_graph();
differentiate_hub_with_graph(store, hub_key, &graph)
}
/// Like differentiate_hub but uses a pre-built graph.
pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph) -> Option<Vec<LinkMove>> {
let degree = graph.degree(hub_key);
// Only differentiate actual hubs
if degree < 20 { return None; }
// Only works on file-level nodes that have section children
if hub_key.contains('#') { return None; }
let sections = section_children(store, hub_key);
if sections.is_empty() { return None; }
// Get all neighbors of the hub
let neighbors = graph.neighbors(hub_key);
let prefix = format!("{}#", hub_key);
let mut moves = Vec::new();
for (neighbor_key, _strength) in &neighbors {
// Skip section children — they should stay linked to parent
if neighbor_key.starts_with(&prefix) { continue; }
let neighbor_content = match store.nodes.get(neighbor_key.as_str()) {
Some(n) => &n.content,
None => continue,
};
// Find best-matching section by content similarity
if let Some((best_section, best_sim)) = best_match(&sections, neighbor_content, 0.05) {
let snippet = crate::util::first_n_chars(
neighbor_content.lines()
.find(|l| !l.is_empty() && !l.starts_with("<!--") && !l.starts_with("##"))
.unwrap_or(""),
80);
moves.push(LinkMove {
neighbor_key: neighbor_key.to_string(),
from_hub: hub_key.to_string(),
to_section: best_section,
similarity: best_sim,
neighbor_snippet: snippet,
});
}
}
moves.sort_by(|a, b| b.similarity.total_cmp(&a.similarity));
Some(moves)
}
/// Apply link moves: soft-delete hub→neighbor, create section→neighbor.
pub fn apply_differentiation(
store: &mut Store,
moves: &[LinkMove],
) -> (usize, usize) {
let mut applied = 0usize;
let mut skipped = 0usize;
for mv in moves {
// Check that section→neighbor doesn't already exist
let exists = store.relations.iter().any(|r|
((r.source_key == mv.to_section && r.target_key == mv.neighbor_key)
|| (r.source_key == mv.neighbor_key && r.target_key == mv.to_section))
&& !r.deleted
);
if exists { skipped += 1; continue; }
let section_uuid = match store.nodes.get(&mv.to_section) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let neighbor_uuid = match store.nodes.get(&mv.neighbor_key) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
// Soft-delete old hub→neighbor relation
for rel in &mut store.relations {
if ((rel.source_key == mv.from_hub && rel.target_key == mv.neighbor_key)
|| (rel.source_key == mv.neighbor_key && rel.target_key == mv.from_hub))
&& !rel.deleted
{
rel.deleted = true;
}
}
// Create new section→neighbor relation
let new_rel = new_relation(
section_uuid, neighbor_uuid,
crate::store::RelationType::Auto,
0.5,
&mv.to_section, &mv.neighbor_key,
);
if store.add_relation(new_rel).is_ok() {
applied += 1;
}
}
(applied, skipped)
}
/// Find all file-level hubs that have section children to split into.
pub fn find_differentiable_hubs(store: &Store) -> Vec<(String, usize, usize)> {
let graph = store.build_graph();
let threshold = graph.hub_threshold();
let mut hubs = Vec::new();
for key in graph.nodes() {
let deg = graph.degree(key);
if deg < threshold { continue; }
if key.contains('#') { continue; }
let section_count = section_children(store, key).len();
if section_count > 0 {
hubs.push((key.clone(), deg, section_count));
}
}
hubs.sort_by(|a, b| b.1.cmp(&a.1));
hubs
}
/// Triangle closure: for each node with degree >= min_degree, find pairs
/// of its neighbors that aren't directly connected and have cosine
/// similarity above sim_threshold. Add links between them.
///
/// This turns hub-spoke patterns into triangles, directly improving
/// clustering coefficient and schema fit.
pub fn triangle_close(
store: &mut Store,
min_degree: usize,
sim_threshold: f32,
max_links_per_hub: usize,
) -> (usize, usize) {
let graph = store.build_graph();
let mut added = 0usize;
let mut hubs_processed = 0usize;
// Get nodes sorted by degree (highest first)
let mut candidates: Vec<(String, usize)> = graph.nodes().iter()
.map(|k| (k.clone(), graph.degree(k)))
.filter(|(_, d)| *d >= min_degree)
.collect();
candidates.sort_by(|a, b| b.1.cmp(&a.1));
for (hub_key, hub_deg) in &candidates {
let neighbors = graph.neighbor_keys(hub_key);
if neighbors.len() < 2 { continue; }
// Collect neighbor content for similarity
let neighbor_docs: Vec<(String, String)> = neighbors.iter()
.filter_map(|&k| {
store.nodes.get(k).map(|n| (k.to_string(), n.content.clone()))
})
.collect();
// Find unconnected pairs with high similarity
let mut pair_scores: Vec<(String, String, f32)> = Vec::new();
for i in 0..neighbor_docs.len() {
for j in (i + 1)..neighbor_docs.len() {
// Check if already connected
let n_i = graph.neighbor_keys(&neighbor_docs[i].0);
if n_i.contains(neighbor_docs[j].0.as_str()) { continue; }
let sim = similarity::cosine_similarity(
&neighbor_docs[i].1, &neighbor_docs[j].1);
if sim >= sim_threshold {
pair_scores.push((
neighbor_docs[i].0.clone(),
neighbor_docs[j].0.clone(),
sim,
));
}
}
}
pair_scores.sort_by(|a, b| b.2.total_cmp(&a.2));
let to_add = pair_scores.len().min(max_links_per_hub);
if to_add > 0 {
println!(" {} (deg={}) — {} triangles to close (top {})",
hub_key, hub_deg, pair_scores.len(), to_add);
for (a, b, sim) in pair_scores.iter().take(to_add) {
let uuid_a = match store.nodes.get(a) { Some(n) => n.uuid, None => continue };
let uuid_b = match store.nodes.get(b) { Some(n) => n.uuid, None => continue };
let rel = new_relation(
uuid_a, uuid_b,
crate::store::RelationType::Auto,
sim * 0.5, // scale by similarity
a, b,
);
if let Ok(()) = store.add_relation(rel) {
added += 1;
}
}
hubs_processed += 1;
}
}
if added > 0 {
let _ = store.save();
}
(hubs_processed, added)
}
/// Link orphan nodes (degree < min_degree) to their most textually similar
/// connected nodes. For each orphan, finds top-K nearest neighbors by
/// cosine similarity and creates Auto links.
/// Returns (orphans_linked, total_links_added).
pub fn link_orphans(
store: &mut Store,
min_degree: usize,
links_per_orphan: usize,
sim_threshold: f32,
) -> (usize, usize) {
let graph = store.build_graph();
let mut added = 0usize;
let mut orphans_linked = 0usize;
// Separate orphans from connected nodes
let orphans: Vec<String> = graph.nodes().iter()
.filter(|k| graph.degree(k) < min_degree)
.cloned()
.collect();
// Build candidate pool: connected nodes with their content
let candidates: Vec<(String, String)> = graph.nodes().iter()
.filter(|k| graph.degree(k) >= min_degree)
.filter_map(|k| store.nodes.get(k).map(|n| (k.clone(), n.content.clone())))
.collect();
if candidates.is_empty() { return (0, 0); }
for orphan_key in &orphans {
let orphan_content = match store.nodes.get(orphan_key) {
Some(n) => n.content.clone(),
None => continue,
};
if orphan_content.len() < 20 { continue; } // skip near-empty nodes
// Score against all candidates
let mut scores: Vec<(usize, f32)> = candidates.iter()
.enumerate()
.map(|(i, (_, content))| {
(i, similarity::cosine_similarity(&orphan_content, content))
})
.filter(|(_, s)| *s >= sim_threshold)
.collect();
scores.sort_by(|a, b| b.1.total_cmp(&a.1));
let to_link = scores.len().min(links_per_orphan);
if to_link == 0 { continue; }
let orphan_uuid = store.nodes.get(orphan_key).unwrap().uuid;
for &(idx, sim) in scores.iter().take(to_link) {
let target_key = &candidates[idx].0;
let target_uuid = match store.nodes.get(target_key) {
Some(n) => n.uuid,
None => continue,
};
let rel = new_relation(
orphan_uuid, target_uuid,
crate::store::RelationType::Auto,
sim * 0.5,
orphan_key, target_key,
);
if store.add_relation(rel).is_ok() {
added += 1;
}
}
orphans_linked += 1;
}
if added > 0 {
let _ = store.save();
}
(orphans_linked, added)
}

View file

@ -26,7 +26,7 @@ pub fn consolidation_priority(
graph: &Graph,
spectral_outlier: Option<f64>,
) -> f64 {
let node = match store.get_node(key).ok().flatten() {
let node = match store.nodes.get(key) {
Some(n) => n,
None => return 0.0,
};
@ -97,10 +97,8 @@ pub fn replay_queue_with_graph(
HashMap::new()
};
let all_keys = store.all_keys().unwrap_or_default();
let mut items: Vec<ReplayItem> = all_keys.iter()
.filter_map(|key| {
let node = store.get_node(key).ok()??;
let mut items: Vec<ReplayItem> = store.nodes.iter()
.map(|(key, node)| {
let pos = positions.get(key);
let outlier_score = pos.map(|p| p.outlier_score).unwrap_or(0.0);
let classification = pos
@ -111,7 +109,7 @@ pub fn replay_queue_with_graph(
store, key, graph,
pos.map(|p| p.outlier_score),
);
Some(ReplayItem {
ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
@ -119,7 +117,7 @@ pub fn replay_queue_with_graph(
cc: graph.clustering_coefficient(key),
classification,
outlier_score,
})
}
})
.collect();
@ -128,70 +126,73 @@ pub fn replay_queue_with_graph(
items
}
/// Agent allocation from the control loop.
/// Agent types and counts are data-driven — add agents by adding
/// entries to the counts map.
/// Detect interfering memory pairs: high text similarity but different communities
pub fn detect_interference(
store: &Store,
graph: &Graph,
threshold: f32,
) -> Vec<(String, String, f32)> {
use crate::similarity;
let communities = graph.communities();
// Only compare nodes within a reasonable set — take the most active ones
let mut docs: Vec<(String, String)> = store.nodes.iter()
.filter(|(_, n)| n.content.len() > 50) // skip tiny nodes
.map(|(k, n)| (k.clone(), n.content.clone()))
.collect();
// For large stores, sample to keep pairwise comparison feasible
if docs.len() > 200 {
docs.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
docs.truncate(200);
}
let similar = similarity::pairwise_similar(&docs, threshold);
// Filter to pairs in different communities
similar.into_iter()
.filter(|(a, b, _)| {
let ca = communities.get(a);
let cb = communities.get(b);
match (ca, cb) {
(Some(a), Some(b)) => a != b,
_ => true, // if community unknown, flag it
}
})
.collect()
}
/// Agent allocation from the control loop
#[derive(Default)]
pub struct ConsolidationPlan {
/// agent_name → run count
pub counts: std::collections::HashMap<String, usize>,
pub replay_count: usize,
pub linker_count: usize,
pub separator_count: usize,
pub transfer_count: usize,
pub run_health: bool,
pub rationale: Vec<String>,
}
impl ConsolidationPlan {
pub fn count(&self, agent: &str) -> usize {
self.counts.get(agent).copied().unwrap_or(0)
}
pub fn set(&mut self, agent: &str, count: usize) {
self.counts.insert(agent.to_string(), count);
}
pub fn add(&mut self, agent: &str, count: usize) {
*self.counts.entry(agent.to_string()).or_default() += count;
}
pub fn total(&self) -> usize {
self.counts.values().sum::<usize>() + if self.run_health { 1 } else { 0 }
}
/// Expand the plan into a flat list of (agent_name, batch_size) runs.
/// Interleaves agent types so different types alternate.
pub fn to_agent_runs(&self, batch_size: usize) -> Vec<(String, usize)> {
pub fn to_agent_runs(&self, batch_size: usize) -> Vec<(&'static str, usize)> {
let mut runs = Vec::new();
if self.run_health {
runs.push(("health".to_string(), 0));
runs.push(("health", 0));
}
// Sort by count descending so high-volume agents interleave well
let mut types: Vec<(&String, &usize)> = self.counts.iter()
.filter(|(_, c)| **c > 0)
.collect();
types.sort_by(|a, b| b.1.cmp(a.1));
let mut queues: Vec<Vec<(String, usize)>> = types.iter().map(|(name, count)| {
let mut q = Vec::new();
let mut remaining = **count;
for (name, count) in [
("replay", self.replay_count),
("linker", self.linker_count),
("separator", self.separator_count),
("transfer", self.transfer_count),
] {
let mut remaining = count;
while remaining > 0 {
let batch = remaining.min(batch_size);
q.push((name.to_string(), batch));
runs.push((name, batch));
remaining -= batch;
}
q
}).collect();
// Round-robin interleave
loop {
let mut added = false;
for q in &mut queues {
if let Some(run) = q.first() {
runs.push(run.clone());
q.remove(0);
added = true;
}
}
if !added { break; }
}
runs
}
@ -210,110 +211,101 @@ pub fn consolidation_plan_quick(store: &Store) -> ConsolidationPlan {
consolidation_plan_inner(store, false)
}
fn consolidation_plan_inner(store: &Store, _detect_interf: bool) -> ConsolidationPlan {
fn consolidation_plan_inner(store: &Store, detect_interf: bool) -> ConsolidationPlan {
let graph = store.build_graph();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let _avg_cc = graph.avg_clustering_coefficient();
let avg_cc = graph.avg_clustering_coefficient();
let interference_count = if detect_interf {
detect_interference(store, &graph, 0.5).len()
} else {
0
};
let all_keys = store.all_keys().unwrap_or_default();
let episodic_count = all_keys.iter()
.filter_map(|k| store.get_node(k).ok()?)
.filter(|n| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
let episodic_count = store.nodes.iter()
.filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
.count();
let _episodic_ratio = if all_keys.is_empty() { 0.0 }
else { episodic_count as f32 / all_keys.len() as f32 };
let episodic_ratio = if store.nodes.is_empty() { 0.0 }
else { episodic_count as f32 / store.nodes.len() as f32 };
let mut plan = ConsolidationPlan {
counts: std::collections::HashMap::new(),
replay_count: 0,
linker_count: 0,
separator_count: 0,
transfer_count: 0,
run_health: true,
rationale: Vec::new(),
};
// Active agent types from config
let config = crate::config::get();
let agent_types: Vec<&str> = config.agent_types.iter().map(|s| s.as_str()).collect();
// Target: α ≥ 2.5 (healthy scale-free)
if alpha < 2.0 {
plan.add("linker", 100);
plan.replay_count += 10;
plan.linker_count += 5;
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): extreme hub dominance → 100 linker", alpha));
"α={:.2} (target ≥2.5): extreme hub dominance → 10 replay + 5 linker",
alpha));
} else if alpha < 2.5 {
plan.add("linker", 50);
plan.replay_count += 5;
plan.linker_count += 3;
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): moderate hub dominance → 50 linker", alpha));
"α={:.2} (target ≥2.5): moderate hub dominance → 5 replay + 3 linker",
alpha));
} else {
plan.add("linker", 20);
plan.replay_count += 3;
plan.rationale.push(format!(
"α={:.2}: healthy — 20 linker for maintenance", alpha));
"α={:.2}: healthy — 3 replay for maintenance", alpha));
}
// Target: Gini ≤ 0.4
if gini > 0.5 {
plan.add("linker", 50);
plan.replay_count += 3;
plan.rationale.push(format!(
"Gini={:.3} (target ≤0.4): high inequality → +50 linker", gini));
"Gini={:.3} (target ≤0.4): high inequality → +3 replay",
gini));
}
// Organize: proportional to linker — synthesizes what linker connects
let linker = plan.count("linker");
plan.set("organize", linker / 2);
plan.rationale.push(format!(
"Organize: {} (half of linker count)", plan.count("organize")));
// Distill: core concept maintenance
let organize = plan.count("organize");
let mut distill = organize;
if gini > 0.4 { distill += 20; }
if alpha < 2.0 { distill += 20; }
plan.set("distill", distill);
plan.rationale.push(format!(
"Distill: {} (synthesize hub content)", plan.count("distill")));
// Split: handle oversized nodes
plan.set("split", 5);
// Distribute agent budget using Elo ratings
let budget = crate::config::get().agent_budget;
let elo_path = crate::config::get().data_dir.join("agent-elo.json");
if let Ok(elo_json) = std::fs::read_to_string(&elo_path) {
if let Ok(ratings) = serde_json::from_str::<std::collections::HashMap<String, f64>>(&elo_json) {
let elos: Vec<f64> = agent_types.iter()
.map(|t| ratings.get(*t).copied().unwrap_or(1000.0))
.collect();
let min_elo = elos.iter().copied().fold(f64::MAX, f64::min);
let weights: Vec<f64> = elos.iter()
.map(|e| {
let shifted = e - min_elo + 50.0;
shifted * shifted
})
.collect();
let total_weight: f64 = weights.iter().sum();
let allocate = |w: f64| -> usize {
((w / total_weight * budget as f64).round() as usize).max(2)
};
for (i, agent) in agent_types.iter().enumerate() {
plan.set(agent, allocate(weights[i]));
}
let summary: Vec<String> = agent_types.iter()
.map(|a| format!("{}={}", a, plan.count(a)))
.collect();
plan.rationale.push(format!(
"Elo allocation (budget={}): {}", budget, summary.join(" ")));
}
} else {
// No Elo file — use budget with equal distribution
let per_type = budget / agent_types.len();
for agent in &agent_types {
plan.set(agent, per_type);
}
// Target: avg CC ≥ 0.2
if avg_cc < 0.1 {
plan.replay_count += 5;
plan.rationale.push(format!(
"No Elo ratings — equal distribution ({} each, budget={})", per_type, budget));
"CC={:.3} (target ≥0.2): very poor integration → +5 replay",
avg_cc));
} else if avg_cc < 0.2 {
plan.replay_count += 2;
plan.rationale.push(format!(
"CC={:.3} (target ≥0.2): low integration → +2 replay",
avg_cc));
}
// Interference: >100 pairs is a lot, <10 is clean
if interference_count > 100 {
plan.separator_count += 10;
plan.rationale.push(format!(
"Interference: {} pairs (target <50) → 10 separator",
interference_count));
} else if interference_count > 20 {
plan.separator_count += 5;
plan.rationale.push(format!(
"Interference: {} pairs (target <50) → 5 separator",
interference_count));
} else if interference_count > 0 {
plan.separator_count += interference_count.min(3);
plan.rationale.push(format!(
"Interference: {} pairs → {} separator",
interference_count, plan.separator_count));
}
// Episodic → semantic transfer
if episodic_ratio > 0.6 {
plan.transfer_count += 10;
plan.rationale.push(format!(
"Episodic ratio: {:.0}% ({}/{}) → 10 transfer",
episodic_ratio * 100.0, episodic_count, store.nodes.len()));
} else if episodic_ratio > 0.4 {
plan.transfer_count += 5;
plan.rationale.push(format!(
"Episodic ratio: {:.0}% → 5 transfer",
episodic_ratio * 100.0));
}
plan
@ -330,19 +322,34 @@ pub fn format_plan(plan: &ConsolidationPlan) -> String {
out.push_str("\nAgent allocation:\n");
if plan.run_health {
out.push_str(" 1. health — system audit\n");
out.push_str(" 1. health — system audit\n");
}
let mut step = 2;
let mut sorted: Vec<_> = plan.counts.iter()
.filter(|(_, c)| **c > 0)
.collect();
sorted.sort_by(|a, b| b.1.cmp(a.1));
for (agent, count) in &sorted {
out.push_str(&format!(" {}. {} ×{}\n", step, agent, count));
if plan.replay_count > 0 {
out.push_str(&format!(" {}. replay ×{:2} — schema assimilation + lateral linking\n",
step, plan.replay_count));
step += 1;
}
if plan.linker_count > 0 {
out.push_str(&format!(" {}. linker ×{:2} — relational binding from episodes\n",
step, plan.linker_count));
step += 1;
}
if plan.separator_count > 0 {
out.push_str(&format!(" {}. separator ×{} — pattern separation\n",
step, plan.separator_count));
step += 1;
}
if plan.transfer_count > 0 {
out.push_str(&format!(" {}. transfer ×{:2} — episodic→semantic extraction\n",
step, plan.transfer_count));
}
let total = plan.replay_count + plan.linker_count
+ plan.separator_count + plan.transfer_count
+ if plan.run_health { 1 } else { 0 };
out.push_str(&format!("\nTotal agent runs: {}\n", total));
out.push_str(&format!("\nTotal agent runs: {}\n", plan.total()));
out
}

501
poc-memory/src/query.rs Normal file
View file

@ -0,0 +1,501 @@
// query.rs — peg-based query language for the memory graph
//
// Grammar-driven: the peg definition IS the language spec.
// Evaluates against node properties, graph metrics, and edge attributes.
// Designed for ad-hoc exploration without memorizing 35+ subcommands.
//
// Syntax:
// expr | stage | stage ...
//
// Stages (piped):
// sort FIELD sort descending (default for exploration)
// sort FIELD asc sort ascending
// limit N cap results
// select F,F,... output specific fields as TSV
// count just show count
//
// Examples:
// degree > 15 | sort degree | limit 10
// category = core | select degree,weight
// neighbors('identity') WHERE strength > 0.5 | sort strength
// key ~ 'journal.*' AND degree > 10 | count
// * | sort weight asc | limit 20
use crate::store::{NodeType, RelationType, Store};
use crate::graph::Graph;
use regex::Regex;
use std::collections::BTreeMap;
// -- AST types --
#[derive(Debug, Clone)]
pub enum Expr {
All,
Comparison { field: String, op: CmpOp, value: Value },
And(Box<Expr>, Box<Expr>),
Or(Box<Expr>, Box<Expr>),
Not(Box<Expr>),
Neighbors { key: String, filter: Option<Box<Expr>> },
}
#[derive(Debug, Clone)]
pub enum Value {
Num(f64),
Str(String),
Ident(String),
FnCall(FnCall),
}
#[derive(Debug, Clone)]
pub enum FnCall {
Community(String),
Degree(String),
}
#[derive(Debug, Clone, Copy)]
pub enum CmpOp {
Gt, Lt, Ge, Le, Eq, Ne, Match,
}
#[derive(Debug, Clone)]
pub enum Stage {
Sort { field: String, ascending: bool },
Limit(usize),
Select(Vec<String>),
Count,
}
#[derive(Debug, Clone)]
pub struct Query {
pub expr: Expr,
pub stages: Vec<Stage>,
}
// -- PEG grammar --
peg::parser! {
pub grammar query_parser() for str {
rule _() = [' ' | '\t']*
pub rule query() -> Query
= e:expr() s:stages() { Query { expr: e, stages: s } }
rule stages() -> Vec<Stage>
= s:(_ "|" _ s:stage() { s })* { s }
rule stage() -> Stage
= "sort" _ f:field() _ a:asc_desc() { Stage::Sort { field: f, ascending: a } }
/ "limit" _ n:integer() { Stage::Limit(n) }
/ "select" _ f:field_list() { Stage::Select(f) }
/ "count" { Stage::Count }
rule asc_desc() -> bool
= "asc" { true }
/ "desc" { false }
/ { false } // default: descending
rule field_list() -> Vec<String>
= f:field() fs:(_ "," _ f:field() { f })* {
let mut v = vec![f];
v.extend(fs);
v
}
rule integer() -> usize
= n:$(['0'..='9']+) { n.parse().unwrap() }
pub rule expr() -> Expr = precedence! {
a:(@) _ "OR" _ b:@ { Expr::Or(Box::new(a), Box::new(b)) }
--
a:(@) _ "AND" _ b:@ { Expr::And(Box::new(a), Box::new(b)) }
--
"NOT" _ e:@ { Expr::Not(Box::new(e)) }
--
"neighbors" _ "(" _ k:string() _ ")" _ w:where_clause()? {
Expr::Neighbors { key: k, filter: w.map(Box::new) }
}
f:field() _ op:cmp_op() _ v:value() {
Expr::Comparison { field: f, op, value: v }
}
"*" { Expr::All }
"(" _ e:expr() _ ")" { e }
}
rule where_clause() -> Expr
= "WHERE" _ e:expr() { e }
rule field() -> String
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*) {
s.to_string()
}
rule cmp_op() -> CmpOp
= ">=" { CmpOp::Ge }
/ "<=" { CmpOp::Le }
/ "!=" { CmpOp::Ne }
/ ">" { CmpOp::Gt }
/ "<" { CmpOp::Lt }
/ "=" { CmpOp::Eq }
/ "~" { CmpOp::Match }
rule value() -> Value
= f:fn_call() { Value::FnCall(f) }
/ n:number() { Value::Num(n) }
/ s:string() { Value::Str(s) }
/ i:ident() { Value::Ident(i) }
rule fn_call() -> FnCall
= "community" _ "(" _ k:string() _ ")" { FnCall::Community(k) }
/ "degree" _ "(" _ k:string() _ ")" { FnCall::Degree(k) }
rule number() -> f64
= n:$(['0'..='9']+ ("." ['0'..='9']+)?) {
n.parse().unwrap()
}
rule string() -> String
= "'" s:$([^ '\'']*) "'" { s.to_string() }
rule ident() -> String
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '.']*) {
s.to_string()
}
}
}
// -- Field resolution --
/// Resolve a field value from a node + graph context, returning a comparable Value.
fn resolve_field(field: &str, key: &str, store: &Store, graph: &Graph) -> Option<Value> {
let node = store.nodes.get(key)?;
match field {
"key" => Some(Value::Str(key.to_string())),
"weight" => Some(Value::Num(node.weight as f64)),
"category" => None, // vestigial, kept for query compat
"node_type" => Some(Value::Str(node_type_label(node.node_type).to_string())),
"provenance" => Some(Value::Str(node.provenance.clone())),
"emotion" => Some(Value::Num(node.emotion as f64)),
"retrievals" => Some(Value::Num(node.retrievals as f64)),
"uses" => Some(Value::Num(node.uses as f64)),
"wrongs" => Some(Value::Num(node.wrongs as f64)),
"created" => Some(Value::Str(node.created.clone())),
"content" => Some(Value::Str(node.content.clone())),
"degree" => Some(Value::Num(graph.degree(key) as f64)),
"community_id" => {
graph.communities().get(key).map(|&c| Value::Num(c as f64))
}
"clustering_coefficient" | "schema_fit" | "cc" => {
Some(Value::Num(graph.clustering_coefficient(key) as f64))
}
_ => None,
}
}
fn node_type_label(nt: NodeType) -> &'static str {
match nt {
NodeType::EpisodicSession => "episodic_session",
NodeType::EpisodicDaily => "episodic_daily",
NodeType::EpisodicWeekly => "episodic_weekly",
NodeType::EpisodicMonthly => "episodic_monthly",
NodeType::Semantic => "semantic",
}
}
fn rel_type_label(r: RelationType) -> &'static str {
match r {
RelationType::Link => "link",
RelationType::Causal => "causal",
RelationType::Auto => "auto",
}
}
// -- Comparison logic --
fn as_num(v: &Value) -> Option<f64> {
match v {
Value::Num(n) => Some(*n),
Value::Str(s) => s.parse().ok(),
Value::Ident(s) => s.parse().ok(),
Value::FnCall(_) => None,
}
}
fn as_str(v: &Value) -> String {
match v {
Value::Str(s) | Value::Ident(s) => s.clone(),
Value::Num(n) => format!("{}", n),
Value::FnCall(_) => String::new(),
}
}
fn compare(lhs: &Value, op: CmpOp, rhs: &Value) -> bool {
if let CmpOp::Match = op {
return Regex::new(&as_str(rhs))
.map(|re| re.is_match(&as_str(lhs)))
.unwrap_or(false);
}
// Numeric comparison if both parse, otherwise string
let ord = match (as_num(lhs), as_num(rhs)) {
(Some(a), Some(b)) => a.total_cmp(&b),
_ => as_str(lhs).cmp(&as_str(rhs)),
};
match op {
CmpOp::Eq => ord.is_eq(),
CmpOp::Ne => !ord.is_eq(),
CmpOp::Gt => ord.is_gt(),
CmpOp::Lt => ord.is_lt(),
CmpOp::Ge => !ord.is_lt(),
CmpOp::Le => !ord.is_gt(),
CmpOp::Match => unreachable!(),
}
}
// -- Evaluator --
fn resolve_fn(f: &FnCall, store: &Store, graph: &Graph) -> Value {
match f {
FnCall::Community(key) => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
graph.communities().get(&resolved)
.map(|&c| Value::Num(c as f64))
.unwrap_or(Value::Num(f64::NAN))
}
FnCall::Degree(key) => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
Value::Num(graph.degree(&resolved) as f64)
}
}
}
fn resolve_value(v: &Value, store: &Store, graph: &Graph) -> Value {
match v {
Value::FnCall(f) => resolve_fn(f, store, graph),
other => other.clone(),
}
}
/// Evaluate an expression against a field resolver.
/// The resolver returns field values — different for nodes vs edges.
fn eval(
expr: &Expr,
resolve: &dyn Fn(&str) -> Option<Value>,
store: &Store,
graph: &Graph,
) -> bool {
match expr {
Expr::All => true,
Expr::Comparison { field, op, value } => {
let lhs = match resolve(field) {
Some(v) => v,
None => return false,
};
let rhs = resolve_value(value, store, graph);
compare(&lhs, *op, &rhs)
}
Expr::And(a, b) => eval(a, resolve, store, graph) && eval(b, resolve, store, graph),
Expr::Or(a, b) => eval(a, resolve, store, graph) || eval(b, resolve, store, graph),
Expr::Not(e) => !eval(e, resolve, store, graph),
Expr::Neighbors { .. } => false,
}
}
// -- Query result --
pub struct QueryResult {
pub key: String,
pub fields: BTreeMap<String, Value>,
}
// -- Query executor --
pub fn execute_query(
store: &Store,
graph: &Graph,
query_str: &str,
) -> Result<Vec<QueryResult>, String> {
let q = query_parser::query(query_str)
.map_err(|e| format!("Parse error: {}", e))?;
execute_parsed(store, graph, &q)
}
fn execute_parsed(
store: &Store,
graph: &Graph,
q: &Query,
) -> Result<Vec<QueryResult>, String> {
let mut results = match &q.expr {
Expr::Neighbors { key, filter } => {
let resolved = store.resolve_key(key).unwrap_or_else(|_| key.clone());
let edges = graph.edges_of(&resolved);
let mut out = Vec::new();
for edge in edges {
let include = match filter {
Some(f) => {
let strength = edge.strength;
let rt = edge.rel_type;
let target = &edge.target;
eval(f, &|field| match field {
"strength" => Some(Value::Num(strength as f64)),
"rel_type" => Some(Value::Str(rel_type_label(rt).to_string())),
_ => resolve_field(field, target, store, graph),
}, store, graph)
}
None => true,
};
if include {
let mut fields = BTreeMap::new();
fields.insert("strength".into(), Value::Num(edge.strength as f64));
fields.insert("rel_type".into(),
Value::Str(rel_type_label(edge.rel_type).to_string()));
out.push(QueryResult { key: edge.target.clone(), fields });
}
}
out
}
_ => {
let mut out = Vec::new();
for key in store.nodes.keys() {
if store.nodes[key].deleted { continue; }
if eval(&q.expr, &|f| resolve_field(f, key, store, graph), store, graph) {
out.push(QueryResult { key: key.clone(), fields: BTreeMap::new() });
}
}
out
}
};
// Collect fields needed by select/sort stages and resolve them once
let needed: Vec<String> = {
let mut set = Vec::new();
for stage in &q.stages {
match stage {
Stage::Select(fields) => {
for f in fields {
if !set.contains(f) { set.push(f.clone()); }
}
}
Stage::Sort { field, .. } => {
if !set.contains(field) { set.push(field.clone()); }
}
_ => {}
}
}
set
};
for r in &mut results {
for f in &needed {
if !r.fields.contains_key(f) {
if let Some(v) = resolve_field(f, &r.key, store, graph) {
r.fields.insert(f.clone(), v);
}
}
}
}
// Apply pipeline stages
let mut has_sort = false;
for stage in &q.stages {
match stage {
Stage::Sort { field, ascending } => {
has_sort = true;
let asc = *ascending;
results.sort_by(|a, b| {
let va = a.fields.get(field).and_then(as_num);
let vb = b.fields.get(field).and_then(as_num);
let ord = match (va, vb) {
(Some(a), Some(b)) => a.total_cmp(&b),
_ => {
let sa = a.fields.get(field).map(as_str).unwrap_or_default();
let sb = b.fields.get(field).map(as_str).unwrap_or_default();
sa.cmp(&sb)
}
};
if asc { ord } else { ord.reverse() }
});
}
Stage::Limit(n) => {
results.truncate(*n);
}
Stage::Select(_) | Stage::Count => {} // handled in output
}
}
// Default sort by degree desc if no explicit sort
if !has_sort {
results.sort_by(|a, b| {
let da = graph.degree(&a.key);
let db = graph.degree(&b.key);
db.cmp(&da)
});
}
Ok(results)
}
/// Format a Value for display
pub fn format_value(v: &Value) -> String {
match v {
Value::Num(n) => {
if *n == n.floor() && n.abs() < 1e15 {
format!("{}", *n as i64)
} else {
format!("{:.3}", n)
}
}
Value::Str(s) => s.clone(),
Value::Ident(s) => s.clone(),
Value::FnCall(_) => "?".to_string(),
}
}
/// Execute query and print formatted output.
pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), String> {
let q = query_parser::query(query_str)
.map_err(|e| format!("Parse error: {}", e))?;
let results = execute_parsed(store, graph, &q)?;
// Count stage
if q.stages.iter().any(|s| matches!(s, Stage::Count)) {
println!("{}", results.len());
return Ok(());
}
if results.is_empty() {
eprintln!("No results");
return Ok(());
}
// Select stage
let fields: Option<&Vec<String>> = q.stages.iter().find_map(|s| match s {
Stage::Select(f) => Some(f),
_ => None,
});
if let Some(fields) = fields {
let mut header = vec!["key".to_string()];
header.extend(fields.iter().cloned());
println!("{}", header.join("\t"));
for r in &results {
let mut row = vec![r.key.clone()];
for f in fields {
row.push(match r.fields.get(f) {
Some(v) => format_value(v),
None => "-".to_string(),
});
}
println!("{}", row.join("\t"));
}
} else {
for r in &results {
println!("{}", r.key);
}
}
Ok(())
}

View file

@ -148,6 +148,8 @@ pub enum Filter {
Age(Cmp), // vs now - timestamp (seconds)
ContentLen(Cmp),
Provenance(String),
NotVisited { agent: String, duration: i64 }, // seconds
Visited { agent: String },
Negated(Box<Filter>),
}
@ -155,10 +157,6 @@ pub enum Filter {
pub enum Transform {
Sort(SortField),
Limit(usize),
Select(Vec<String>),
Count,
Connectivity,
DominatingSet,
}
#[derive(Clone, Debug)]
@ -168,21 +166,6 @@ pub enum SortField {
ContentLen,
Degree,
Weight,
Isolation,
Key,
Named(String, bool), // (field_name, ascending)
Composite(Vec<(ScoreField, f64)>),
}
/// Individual scoring dimensions for composite sorts.
/// Each computes a 0.0-1.0 score per node.
#[derive(Clone, Debug)]
pub enum ScoreField {
Isolation,
Degree,
Weight,
ContentLen,
Priority,
}
/// Numeric comparison operator.
@ -207,61 +190,153 @@ impl Cmp {
}
}
/// Parse a comparison like ">0.5", ">=60", "<7d" (durations converted to seconds).
fn parse_cmp(s: &str) -> Result<Cmp, String> {
let (op_len, ctor): (usize, fn(f64) -> Cmp) = if s.starts_with(">=") {
(2, Cmp::Gte)
} else if s.starts_with("<=") {
(2, Cmp::Lte)
} else if s.starts_with('>') {
(1, Cmp::Gt)
} else if s.starts_with('<') {
(1, Cmp::Lt)
} else if s.starts_with('=') {
(1, Cmp::Eq)
} else {
return Err(format!("expected comparison operator in '{}'", s));
};
/// Compute a 0-1 score for a node on a single dimension.
fn score_field(
field: &ScoreField,
key: &str,
store: &Store,
graph: &Graph,
precomputed: &CompositeCache,
) -> f64 {
match field {
ScoreField::Isolation => {
let comm = graph.communities().get(key).copied().unwrap_or(0);
precomputed.isolation.get(&comm).copied().unwrap_or(1.0) as f64
}
ScoreField::Degree => {
let d = graph.degree(key) as f64;
let max = precomputed.max_degree.max(1.0);
(d / max).min(1.0)
}
ScoreField::Weight => {
store.get_node(key).ok().flatten().map(|n| n.weight as f64).unwrap_or(0.0)
}
ScoreField::ContentLen => {
let len = store.get_node(key).ok().flatten().map(|n| n.content.len()).unwrap_or(0) as f64;
let max = precomputed.max_content_len.max(1.0);
(len / max).min(1.0)
}
ScoreField::Priority => {
let p = crate::neuro::consolidation_priority(store, key, graph, None);
// Priority is already roughly 0-1 from the scoring function
p.min(1.0)
}
let val_str = &s[op_len..];
let val = parse_duration_or_number(val_str)?;
Ok(ctor(val))
}
/// Parse "7d", "24h", "30m" as seconds, or plain numbers.
fn parse_duration_or_number(s: &str) -> Result<f64, String> {
if let Some(n) = s.strip_suffix('d') {
let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
Ok(v * 86400.0)
} else if let Some(n) = s.strip_suffix('h') {
let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
Ok(v * 3600.0)
} else if let Some(n) = s.strip_suffix('m') {
let v: f64 = n.parse().map_err(|_| format!("bad number: {}", n))?;
Ok(v * 60.0)
} else {
s.parse().map_err(|_| format!("bad number: {}", s))
}
}
/// Cached values for composite scoring (computed once per sort).
struct CompositeCache {
isolation: HashMap<u32, f32>,
max_degree: f64,
max_content_len: f64,
/// Parse a NodeType from a label.
fn parse_node_type(s: &str) -> Result<NodeType, String> {
match s {
"episodic" | "session" => Ok(NodeType::EpisodicSession),
"daily" => Ok(NodeType::EpisodicDaily),
"weekly" => Ok(NodeType::EpisodicWeekly),
"monthly" => Ok(NodeType::EpisodicMonthly),
"semantic" => Ok(NodeType::Semantic),
_ => Err(format!("unknown node type: {} (use: episodic, semantic, daily, weekly, monthly)", s)),
}
}
impl CompositeCache {
fn build(items: &[(String, f64)], store: &Store, graph: &Graph) -> Self {
let max_degree = items.iter()
.map(|(k, _)| graph.degree(k) as f64)
.fold(0.0f64, f64::max);
let max_content_len = items.iter()
.map(|(k, _)| store.get_node(k).ok().flatten().map(|n| n.content.len()).unwrap_or(0) as f64)
.fold(0.0f64, f64::max);
Self {
isolation: graph.community_isolation(),
max_degree,
max_content_len,
impl Stage {
/// Parse a single stage from a string.
///
/// Algorithm names are tried first (bare words), then predicate syntax
/// (contains ':'). No ambiguity since algorithms are bare words.
pub fn parse(s: &str) -> Result<Self, String> {
let s = s.trim();
let (negated, s) = if let Some(rest) = s.strip_prefix('!') {
(true, rest)
} else {
(false, s)
};
// Generator: "all"
if s == "all" {
return Ok(Stage::Generator(Generator::All));
}
// Try algorithm parse first (bare words, no colon)
if !s.contains(':') {
if let Ok(algo) = AlgoStage::parse(s) {
return Ok(Stage::Algorithm(algo));
}
}
// Algorithm with params: "spread,max_hops=4" (contains comma but no colon)
if s.contains(',') && !s.contains(':') {
return AlgoStage::parse(s).map(Stage::Algorithm);
}
// Predicate/transform syntax: "key:value"
let (prefix, value) = s.split_once(':')
.ok_or_else(|| format!("unknown stage: {}", s))?;
let filter_or_transform = match prefix {
"type" => Stage::Filter(Filter::Type(parse_node_type(value)?)),
"key" => Stage::Filter(Filter::KeyGlob(value.to_string())),
"weight" => Stage::Filter(Filter::Weight(parse_cmp(value)?)),
"age" => Stage::Filter(Filter::Age(parse_cmp(value)?)),
"content-len" => Stage::Filter(Filter::ContentLen(parse_cmp(value)?)),
"provenance" => {
Stage::Filter(Filter::Provenance(value.to_string()))
}
"not-visited" => {
let (agent, dur) = value.split_once(',')
.ok_or("not-visited:AGENT,DURATION")?;
let secs = parse_duration_or_number(dur)?;
Stage::Filter(Filter::NotVisited {
agent: agent.to_string(),
duration: secs as i64,
})
}
"visited" => Stage::Filter(Filter::Visited {
agent: value.to_string(),
}),
"sort" => {
let field = match value {
"priority" => SortField::Priority,
"timestamp" => SortField::Timestamp,
"content-len" => SortField::ContentLen,
"degree" => SortField::Degree,
"weight" => SortField::Weight,
_ => return Err(format!("unknown sort field: {}", value)),
};
Stage::Transform(Transform::Sort(field))
}
"limit" => {
let n: usize = value.parse()
.map_err(|_| format!("bad limit: {}", value))?;
Stage::Transform(Transform::Limit(n))
}
"match" => {
let terms: Vec<String> = value.split(',')
.map(|t| t.to_string())
.collect();
Stage::Generator(Generator::Match(terms))
}
// Algorithm with colon in params? Try fallback.
_ => return AlgoStage::parse(s).map(Stage::Algorithm)
.map_err(|_| format!("unknown stage: {}", s)),
};
// Apply negation to filters
if negated {
match filter_or_transform {
Stage::Filter(f) => Ok(Stage::Filter(Filter::Negated(Box::new(f)))),
_ => Err("! prefix only works on filter stages".to_string()),
}
} else {
Ok(filter_or_transform)
}
}
/// Parse a pipe-separated pipeline string.
pub fn parse_pipeline(s: &str) -> Result<Vec<Stage>, String> {
s.split('|')
.map(|part| Stage::parse(part.trim()))
.collect()
}
}
@ -273,10 +348,6 @@ impl fmt::Display for Stage {
Stage::Filter(filt) => write!(f, "{}", filt),
Stage::Transform(Transform::Sort(field)) => write!(f, "sort:{:?}", field),
Stage::Transform(Transform::Limit(n)) => write!(f, "limit:{}", n),
Stage::Transform(Transform::Select(fields)) => write!(f, "select:{}", fields.join(",")),
Stage::Transform(Transform::Count) => write!(f, "count"),
Stage::Transform(Transform::Connectivity) => write!(f, "connectivity"),
Stage::Transform(Transform::DominatingSet) => write!(f, "dominating-set"),
Stage::Algorithm(a) => write!(f, "{}", a.algo),
}
}
@ -291,6 +362,8 @@ impl fmt::Display for Filter {
Filter::Age(c) => write!(f, "age:{}", c),
Filter::ContentLen(c) => write!(f, "content-len:{}", c),
Filter::Provenance(p) => write!(f, "provenance:{}", p),
Filter::NotVisited { agent, duration } => write!(f, "not-visited:{},{}s", agent, duration),
Filter::Visited { agent } => write!(f, "visited:{}", agent),
Filter::Negated(inner) => write!(f, "!{}", inner),
}
}
@ -354,7 +427,7 @@ pub fn run_query(
}
current = match stage {
Stage::Generator(g) => run_generator(g, store),
Stage::Generator(gen) => run_generator(gen, store),
Stage::Filter(filt) => {
current.into_iter()
@ -390,15 +463,12 @@ pub fn run_query(
current
}
fn run_generator(g: &Generator, store: &Store) -> Vec<(String, f64)> {
match g {
fn run_generator(gen: &Generator, store: &Store) -> Vec<(String, f64)> {
match gen {
Generator::All => {
store.all_keys().unwrap_or_default().into_iter()
.filter_map(|key| {
let n = store.get_node(&key).ok()??;
if n.deleted { return None; }
Some((key, n.weight as f64))
})
store.nodes.iter()
.filter(|(_, n)| !n.deleted)
.map(|(key, n)| (key.clone(), n.weight as f64))
.collect()
}
Generator::Match(terms) => {
@ -411,8 +481,8 @@ fn run_generator(g: &Generator, store: &Store) -> Vec<(String, f64)> {
}
}
pub fn eval_filter(filt: &Filter, key: &str, store: &Store, now: i64) -> bool {
let node = match store.get_node(key).ok().flatten() {
fn eval_filter(filt: &Filter, key: &str, store: &Store, now: i64) -> bool {
let node = match store.nodes.get(key) {
Some(n) => n,
None => return false,
};
@ -427,11 +497,18 @@ pub fn eval_filter(filt: &Filter, key: &str, store: &Store, now: i64) -> bool {
}
Filter::ContentLen(cmp) => cmp.matches(node.content.len() as f64),
Filter::Provenance(p) => node.provenance == *p,
Filter::NotVisited { agent, duration } => {
let last = store.last_visited(key, agent);
last == 0 || (now - last) > *duration
}
Filter::Visited { agent } => {
store.last_visited(key, agent) > 0
}
Filter::Negated(inner) => !eval_filter(inner, key, store, now),
}
}
pub fn run_transform(
fn run_transform(
xform: &Transform,
mut items: Vec<(String, f64)>,
store: &Store,
@ -445,15 +522,15 @@ pub fn run_transform(
}
SortField::Timestamp => {
items.sort_by(|a, b| {
let ta = store.get_node(&a.0).ok().flatten().map(|n| n.timestamp).unwrap_or(0);
let tb = store.get_node(&b.0).ok().flatten().map(|n| n.timestamp).unwrap_or(0);
let ta = store.nodes.get(&a.0).map(|n| n.timestamp).unwrap_or(0);
let tb = store.nodes.get(&b.0).map(|n| n.timestamp).unwrap_or(0);
tb.cmp(&ta) // desc
});
}
SortField::ContentLen => {
items.sort_by(|a, b| {
let la = store.get_node(&a.0).ok().flatten().map(|n| n.content.len()).unwrap_or(0);
let lb = store.get_node(&b.0).ok().flatten().map(|n| n.content.len()).unwrap_or(0);
let la = store.nodes.get(&a.0).map(|n| n.content.len()).unwrap_or(0);
let lb = store.nodes.get(&b.0).map(|n| n.content.len()).unwrap_or(0);
lb.cmp(&la) // desc
});
}
@ -464,52 +541,6 @@ pub fn run_transform(
db.cmp(&da) // desc
});
}
SortField::Isolation => {
// Score nodes by their community's isolation.
// Most isolated communities first (highest internal edge ratio).
let iso = graph.community_isolation();
let comms = graph.communities();
items.sort_by(|a, b| {
let ca = comms.get(&a.0).copied().unwrap_or(0);
let cb = comms.get(&b.0).copied().unwrap_or(0);
let sa = iso.get(&ca).copied().unwrap_or(1.0);
let sb = iso.get(&cb).copied().unwrap_or(1.0);
sb.total_cmp(&sa) // most isolated first
});
}
SortField::Key => {
items.sort_by(|a, b| a.0.cmp(&b.0));
}
SortField::Named(field, asc) => {
// Resolve field from node properties
let resolve = |key: &str| -> Option<f64> {
let node = store.get_node(key).ok()??;
match field.as_str() {
"weight" => Some(node.weight as f64),
"emotion" => Some(node.emotion as f64),
"retrievals" => Some(node.retrievals as f64),
"uses" => Some(node.uses as f64),
"wrongs" => Some(node.wrongs as f64),
"created" => Some(node.created_at as f64),
"timestamp" => Some(node.timestamp as f64),
"degree" => Some(graph.degree(key) as f64),
"content_len" => Some(node.content.len() as f64),
_ => None,
}
};
let asc = *asc;
items.sort_by(|a, b| {
let va = resolve(&a.0);
let vb = resolve(&b.0);
let ord = match (va, vb) {
(Some(a), Some(b)) => a.total_cmp(&b),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => a.0.cmp(&b.0),
};
if asc { ord } else { ord.reverse() }
});
}
SortField::Priority => {
// Pre-compute priorities to avoid O(n log n) calls
// inside the sort comparator.
@ -526,22 +557,6 @@ pub fn run_transform(
pb.total_cmp(&pa) // desc
});
}
SortField::Composite(terms) => {
let cache = CompositeCache::build(&items, store, graph);
let scores: HashMap<String, f64> = items.iter()
.map(|(key, _)| {
let s: f64 = terms.iter()
.map(|(field, w)| score_field(field, key, store, graph, &cache) * w)
.sum();
(key.clone(), s)
})
.collect();
items.sort_by(|a, b| {
let sa = scores.get(&a.0).copied().unwrap_or(0.0);
let sb = scores.get(&b.0).copied().unwrap_or(0.0);
sb.total_cmp(&sa) // highest composite score first
});
}
}
items
}
@ -549,57 +564,6 @@ pub fn run_transform(
items.truncate(*n);
items
}
// Output mode directives - don't modify result set, handled at output layer
Transform::Select(_) | Transform::Count | Transform::Connectivity => items,
Transform::DominatingSet => {
// Greedy 3-covering dominating set: pick the node that covers
// the most under-covered neighbors, repeat until every node
// has been covered 3 times (by 3 different selected seeds).
use std::collections::HashMap as HMap;
let input_keys: std::collections::HashSet<String> = items.iter().map(|(k, _)| k.clone()).collect();
let mut cover_count: HMap<String, usize> = items.iter().map(|(k, _)| (k.clone(), 0)).collect();
let mut selected: Vec<(String, f64)> = Vec::new();
let mut selected_set: std::collections::HashSet<String> = std::collections::HashSet::new();
const REQUIRED_COVERAGE: usize = 3;
loop {
// Find the unselected node that covers the most under-covered nodes
let best = items.iter()
.filter(|(k, _)| !selected_set.contains(k.as_str()))
.map(|(k, _)| {
let mut value = 0usize;
// Count self if under-covered
if cover_count.get(k).copied().unwrap_or(0) < REQUIRED_COVERAGE {
value += 1;
}
for (nbr, _) in graph.neighbors(k) {
if input_keys.contains(nbr.as_str())
&& cover_count.get(nbr.as_str()).copied().unwrap_or(0) < REQUIRED_COVERAGE {
value += 1;
}
}
(k.clone(), value)
})
.max_by_key(|(_, v)| *v);
let Some((key, value)) = best else { break };
if value == 0 { break; } // everything covered 3x
// Mark coverage
*cover_count.entry(key.clone()).or_default() += 1;
for (nbr, _) in graph.neighbors(&key) {
if let Some(c) = cover_count.get_mut(nbr.as_str()) {
*c += 1;
}
}
let score = items.iter().find(|(k, _)| k == &key).map(|(_, s)| *s).unwrap_or(1.0);
selected.push((key.clone(), score));
selected_set.insert(key);
}
selected
}
}
}
@ -633,13 +597,12 @@ pub fn match_seeds_opts(
// Build component index: word → vec of (original key, weight)
let mut component_map: HashMap<String, Vec<(String, f64)>> = HashMap::new();
// Index-only pass: no capnp reads needed for key matching
store.for_each_key_weight(|key, weight| {
store.for_each_node(|key, _content, weight| {
let lkey = key.to_lowercase();
key_map.insert(lkey.clone(), (key.to_owned(), weight as f64));
// Split key on hyphens, underscores, dots, hashes for component matching
for component in lkey.split(['-', '_', '.', '#']) {
for component in lkey.split(|c: char| c == '-' || c == '_' || c == '.' || c == '#') {
if component.len() >= 3 {
component_map.entry(component.to_owned())
.or_default()
@ -658,8 +621,8 @@ pub fn match_seeds_opts(
}
// Strategy 2: key component match (0.5× weight) — only when explicitly requested
if component_match
&& let Some(matches) = component_map.get(term.as_str()) {
if component_match {
if let Some(matches) = component_map.get(term.as_str()) {
for (orig_key, node_weight) in matches {
let score = term_weight * node_weight * 0.5;
*seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
@ -667,6 +630,7 @@ pub fn match_seeds_opts(
}
continue;
}
}
// Strategy 3: content match (0.2× weight) — only when explicitly requested
if content_fallback {
@ -740,10 +704,10 @@ fn run_spread(
stage: &AlgoStage,
_debug: bool,
) -> Vec<(String, f64)> {
let cfg = crate::config::get();
let max_hops = stage.param_u32("max_hops", cfg.max_hops);
let edge_decay = stage.param_f64("edge_decay", cfg.edge_decay);
let min_activation = stage.param_f64("min_activation", cfg.min_activation * 0.1);
let store_params = store.params();
let max_hops = stage.param_u32("max_hops", store_params.max_hops);
let edge_decay = stage.param_f64("edge_decay", store_params.edge_decay);
let min_activation = stage.param_f64("min_activation", store_params.min_activation * 0.1);
spreading_activation(seeds, graph, store, max_hops, edge_decay, min_activation)
}
@ -1204,7 +1168,7 @@ fn run_manifold(
/// sum at each node, and the combined activation map propagates on
/// the next hop. This creates interference patterns — nodes where
/// multiple wavefronts overlap get reinforced and radiate stronger.
pub fn spreading_activation(
fn spreading_activation(
seeds: &[(String, f64)],
graph: &Graph,
store: &impl StoreView,
@ -1268,6 +1232,15 @@ pub fn search_weighted(
search_weighted_inner(terms, store, false, 5)
}
/// Like search_weighted but with debug output and configurable result count.
pub fn search_weighted_debug(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
max_results: usize,
) -> Vec<SearchResult> {
search_weighted_inner(terms, store, true, max_results)
}
fn search_weighted_inner(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
@ -1314,3 +1287,41 @@ pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
search_weighted(&terms, store)
}
/// Extract meaningful search terms from natural language.
/// Strips common English stop words, returns up to max_terms words.
pub fn extract_query_terms(text: &str, max_terms: usize) -> String {
const STOP_WORDS: &[&str] = &[
"the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
"have", "has", "had", "will", "would", "could", "should", "can",
"may", "might", "shall", "been", "being", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
"no", "if", "then", "than", "that", "this", "it", "its", "my",
"your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
"what", "how", "why", "when", "where", "about", "just", "let",
"want", "tell", "show", "think", "know", "see", "look", "make",
"get", "go", "some", "any", "all", "very", "really", "also", "too",
"so", "up", "out", "here", "there",
];
text.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
.take(max_terms)
.collect::<Vec<_>>()
.join(" ")
}
/// Format search results as text lines (for hook consumption).
pub fn format_results(results: &[SearchResult]) -> String {
let mut out = String::new();
for (i, r) in results.iter().enumerate().take(5) {
let marker = if r.is_direct { "" } else { " " };
out.push_str(&format!("{}{:2}. [{:.2}/{:.2}] {}",
marker, i + 1, r.activation, r.activation, r.key));
out.push('\n');
if let Some(ref snippet) = r.snippet {
out.push_str(&format!(" {}\n", snippet));
}
}
out
}

View file

@ -0,0 +1,135 @@
// Text similarity: Porter stemming + BM25
//
// Used for interference detection (similar content, different communities)
// and schema fit scoring. Intentionally simple — ~100 lines, no
// external dependencies.
use std::collections::HashMap;
/// Minimal Porter stemmer — handles the most common English suffixes.
/// Not linguistically complete but good enough for similarity matching.
pub fn stem(word: &str) -> String {
let w = word.to_lowercase();
if w.len() <= 3 { return w; }
let w = strip_suffix(&w, "ation", "ate");
let w = strip_suffix(&w, "ness", "");
let w = strip_suffix(&w, "ment", "");
let w = strip_suffix(&w, "ting", "t");
let w = strip_suffix(&w, "ling", "l");
let w = strip_suffix(&w, "ring", "r");
let w = strip_suffix(&w, "ning", "n");
let w = strip_suffix(&w, "ding", "d");
let w = strip_suffix(&w, "ping", "p");
let w = strip_suffix(&w, "ging", "g");
let w = strip_suffix(&w, "ying", "y");
let w = strip_suffix(&w, "ied", "y");
let w = strip_suffix(&w, "ies", "y");
let w = strip_suffix(&w, "ing", "");
let w = strip_suffix(&w, "ed", "");
let w = strip_suffix(&w, "ly", "");
let w = strip_suffix(&w, "er", "");
let w = strip_suffix(&w, "al", "");
strip_suffix(&w, "s", "")
}
fn strip_suffix(word: &str, suffix: &str, replacement: &str) -> String {
if word.len() > suffix.len() + 2 && word.ends_with(suffix) {
let base = &word[..word.len() - suffix.len()];
format!("{}{}", base, replacement)
} else {
word.to_string()
}
}
/// Tokenize and stem a text into a term frequency map
pub fn term_frequencies(text: &str) -> HashMap<String, u32> {
let mut tf = HashMap::new();
for word in text.split(|c: char| !c.is_alphanumeric()) {
if word.len() > 2 {
let stemmed = stem(word);
*tf.entry(stemmed).or_default() += 1;
}
}
tf
}
/// Cosine similarity between two documents using stemmed term frequencies.
/// Returns 0.0 for disjoint vocabularies, 1.0 for identical content.
pub fn cosine_similarity(doc_a: &str, doc_b: &str) -> f32 {
let tf_a = term_frequencies(doc_a);
let tf_b = term_frequencies(doc_b);
if tf_a.is_empty() || tf_b.is_empty() {
return 0.0;
}
// Dot product
let mut dot = 0.0f64;
for (term, &freq_a) in &tf_a {
if let Some(&freq_b) = tf_b.get(term) {
dot += freq_a as f64 * freq_b as f64;
}
}
// Magnitudes
let mag_a: f64 = tf_a.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
let mag_b: f64 = tf_b.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
if mag_a < 1e-10 || mag_b < 1e-10 {
return 0.0;
}
(dot / (mag_a * mag_b)) as f32
}
/// Compute pairwise similarity for a set of documents.
/// Returns pairs with similarity above threshold.
pub fn pairwise_similar(
docs: &[(String, String)], // (key, content)
threshold: f32,
) -> Vec<(String, String, f32)> {
let mut results = Vec::new();
for i in 0..docs.len() {
for j in (i + 1)..docs.len() {
let sim = cosine_similarity(&docs[i].1, &docs[j].1);
if sim >= threshold {
results.push((docs[i].0.clone(), docs[j].0.clone(), sim));
}
}
}
results.sort_by(|a, b| b.2.total_cmp(&a.2));
results
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_stem() {
assert_eq!(stem("running"), "runn"); // -ning → n
assert_eq!(stem("talking"), "talk"); // not matched by specific consonant rules
assert_eq!(stem("slowly"), "slow"); // -ly
// The stemmer is minimal — it doesn't need to be perfect,
// just consistent enough that related words collide.
assert_eq!(stem("observations"), "observation"); // -s stripped, -ation stays (word too short after)
}
#[test]
fn test_cosine_identical() {
let text = "the quick brown fox jumps over the lazy dog";
let sim = cosine_similarity(text, text);
assert!((sim - 1.0).abs() < 0.01, "identical docs should have sim ~1.0, got {}", sim);
}
#[test]
fn test_cosine_different() {
let a = "kernel filesystem transaction restart handling";
let b = "cooking recipe chocolate cake baking temperature";
let sim = cosine_similarity(a, b);
assert!(sim < 0.1, "unrelated docs should have low sim, got {}", sim);
}
}

599
poc-memory/src/spectral.rs Normal file
View file

@ -0,0 +1,599 @@
// Spectral decomposition of the memory graph.
//
// Computes eigenvalues and eigenvectors of the normalized graph Laplacian.
// The eigenvectors provide natural coordinates for each node — connected
// nodes land nearby, communities form clusters, bridges sit between clusters.
//
// The eigenvalue spectrum reveals:
// - Number of connected components (count of zero eigenvalues)
// - Number of natural communities (eigenvalues near zero, before the gap)
// - How well-connected the graph is (Fiedler value = second eigenvalue)
//
// The eigenvectors provide:
// - Spectral coordinates for each node (the embedding)
// - Community membership (sign/magnitude of Fiedler vector)
// - Natural projections (select which eigenvectors to include)
use crate::graph::Graph;
use faer::Mat;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
pub struct SpectralResult {
/// Node keys in index order
pub keys: Vec<String>,
/// Eigenvalues in ascending order
pub eigenvalues: Vec<f64>,
/// Eigenvectors: eigvecs[k] is the k-th eigenvector (ascending eigenvalue order),
/// with eigvecs[k][i] being the value for node keys[i]
pub eigvecs: Vec<Vec<f64>>,
}
/// Per-node spectral embedding, serializable to disk.
#[derive(Serialize, Deserialize)]
pub struct SpectralEmbedding {
/// Number of dimensions (eigenvectors)
pub dims: usize,
/// Eigenvalues for each dimension
pub eigenvalues: Vec<f64>,
/// Node key → coordinate vector
pub coords: HashMap<String, Vec<f64>>,
}
fn embedding_path() -> PathBuf {
crate::store::memory_dir().join("spectral-embedding.json")
}
/// Compute spectral decomposition of the memory graph.
///
/// Returns the smallest `k` eigenvalues and their eigenvectors of the
/// normalized Laplacian L_sym = I - D^{-1/2} A D^{-1/2}.
///
/// We compute the full decomposition (it's only 2000×2000, takes <1s)
/// and return the bottom k.
pub fn decompose(graph: &Graph, k: usize) -> SpectralResult {
// Only include nodes with edges (filter isolates)
let mut keys: Vec<String> = graph.nodes().iter()
.filter(|k| graph.degree(k) > 0)
.cloned()
.collect();
keys.sort();
let n = keys.len();
let isolates = graph.nodes().len() - n;
if isolates > 0 {
eprintln!("note: filtered {} isolated nodes, decomposing {} connected nodes", isolates, n);
}
let key_to_idx: HashMap<&str, usize> = keys.iter()
.enumerate()
.map(|(i, k)| (k.as_str(), i))
.collect();
// Build weighted degree vector and adjacency
let mut degree = vec![0.0f64; n];
let mut adj_entries: Vec<(usize, usize, f64)> = Vec::new();
for (i, key) in keys.iter().enumerate() {
for (neighbor, strength) in graph.neighbors(key) {
if let Some(&j) = key_to_idx.get(neighbor.as_str()) {
if j > i { // each edge once
let w = strength as f64;
adj_entries.push((i, j, w));
degree[i] += w;
degree[j] += w;
}
}
}
}
// Build normalized Laplacian: L_sym = I - D^{-1/2} A D^{-1/2}
let mut laplacian = Mat::<f64>::zeros(n, n);
// Diagonal = 1 for nodes with edges, 0 for isolates
for i in 0..n {
if degree[i] > 0.0 {
laplacian[(i, i)] = 1.0;
}
}
// Off-diagonal: -w / sqrt(d_i * d_j)
for &(i, j, w) in &adj_entries {
if degree[i] > 0.0 && degree[j] > 0.0 {
let val = -w / (degree[i] * degree[j]).sqrt();
laplacian[(i, j)] = val;
laplacian[(j, i)] = val;
}
}
// Eigendecompose
let eig = laplacian.self_adjoint_eigen(faer::Side::Lower)
.expect("eigendecomposition failed");
let s = eig.S();
let u = eig.U();
let mut eigenvalues = Vec::with_capacity(k);
let mut eigvecs = Vec::with_capacity(k);
let s_col = s.column_vector();
// Skip trivial eigenvalues (near-zero = null space from disconnected components).
// The number of zero eigenvalues equals the number of connected components.
let mut start = 0;
while start < n && s_col[start].abs() < 1e-8 {
start += 1;
}
let k = k.min(n.saturating_sub(start));
for col in start..start + k {
eigenvalues.push(s_col[col]);
let mut vec = Vec::with_capacity(n);
for row in 0..n {
vec.push(u[(row, col)]);
}
eigvecs.push(vec);
}
SpectralResult { keys, eigenvalues, eigvecs }
}
/// Print the spectral summary: eigenvalue spectrum, then each axis with
/// its extreme nodes (what the axis "means").
pub fn print_summary(result: &SpectralResult, graph: &Graph) {
let n = result.keys.len();
let k = result.eigenvalues.len();
println!("Spectral Decomposition — {} nodes, {} eigenpairs", n, k);
println!("=========================================\n");
// Compact eigenvalue table
println!("Eigenvalue spectrum:");
for (i, &ev) in result.eigenvalues.iter().enumerate() {
let gap = if i > 0 {
ev - result.eigenvalues[i - 1]
} else {
0.0
};
let gap_bar = if i > 0 {
let bars = (gap * 500.0).min(40.0) as usize;
"#".repeat(bars)
} else {
String::new()
};
println!(" λ_{:<2} = {:.6} {}", i, ev, gap_bar);
}
// Connected components
let near_zero = result.eigenvalues.iter()
.filter(|&&v| v.abs() < 1e-6)
.count();
if near_zero > 1 {
println!("\n {} eigenvalues near 0 = {} disconnected components", near_zero, near_zero);
}
// Each axis: what are the extremes?
println!("\n\nNatural axes of the knowledge space");
println!("====================================");
for axis in 0..k {
let ev = result.eigenvalues[axis];
let vec = &result.eigvecs[axis];
// Sort nodes by their value on this axis
let mut indexed: Vec<(usize, f64)> = vec.iter()
.enumerate()
.map(|(i, &v)| (i, v))
.collect();
indexed.sort_by(|a, b| a.1.total_cmp(&b.1));
// Compute the "spread" — how much this axis differentiates
let min_val = indexed.first().map(|x| x.1).unwrap_or(0.0);
let max_val = indexed.last().map(|x| x.1).unwrap_or(0.0);
println!("\n--- Axis {} (λ={:.6}, range={:.4}) ---", axis, ev, max_val - min_val);
// Show extremes: 5 most negative, 5 most positive
let show = 5;
println!(" Negative pole:");
for &(idx, val) in indexed.iter().take(show) {
let key = &result.keys[idx];
// Shorten key for display: take last component
let short = shorten_key(key);
let deg = graph.degree(key);
let comm = graph.communities().get(key).copied().unwrap_or(999);
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
}
println!(" Positive pole:");
for &(idx, val) in indexed.iter().rev().take(show) {
let key = &result.keys[idx];
let short = shorten_key(key);
let deg = graph.degree(key);
let comm = graph.communities().get(key).copied().unwrap_or(999);
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
}
}
}
/// Shorten a node key for display.
fn shorten_key(key: &str) -> &str {
if key.len() > 60 { &key[..60] } else { key }
}
/// Convert SpectralResult to a per-node embedding (transposing the layout).
pub fn to_embedding(result: &SpectralResult) -> SpectralEmbedding {
let dims = result.eigvecs.len();
let mut coords = HashMap::new();
for (i, key) in result.keys.iter().enumerate() {
let mut vec = Vec::with_capacity(dims);
for d in 0..dims {
vec.push(result.eigvecs[d][i]);
}
coords.insert(key.clone(), vec);
}
SpectralEmbedding {
dims,
eigenvalues: result.eigenvalues.clone(),
coords,
}
}
/// Save embedding to disk.
pub fn save_embedding(emb: &SpectralEmbedding) -> Result<(), String> {
let path = embedding_path();
let json = serde_json::to_string(emb)
.map_err(|e| format!("serialize embedding: {}", e))?;
std::fs::write(&path, json)
.map_err(|e| format!("write {}: {}", path.display(), e))?;
eprintln!("Saved {}-dim embedding for {} nodes to {}",
emb.dims, emb.coords.len(), path.display());
Ok(())
}
/// Load embedding from disk.
pub fn load_embedding() -> Result<SpectralEmbedding, String> {
let path = embedding_path();
let data = std::fs::read_to_string(&path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
serde_json::from_str(&data)
.map_err(|e| format!("parse embedding: {}", e))
}
/// Find the k nearest neighbors to a node in spectral space.
///
/// Uses weighted euclidean distance where each dimension is weighted
/// by 1/eigenvalue — lower eigenvalues (coarser structure) matter more.
pub fn nearest_neighbors(
emb: &SpectralEmbedding,
key: &str,
k: usize,
) -> Vec<(String, f64)> {
let target = match emb.coords.get(key) {
Some(c) => c,
None => return vec![],
};
let weights = eigenvalue_weights(&emb.eigenvalues);
let mut distances: Vec<(String, f64)> = emb.coords.iter()
.filter(|(k, _)| k.as_str() != key)
.map(|(k, coords)| (k.clone(), weighted_distance(target, coords, &weights)))
.collect();
distances.sort_by(|a, b| a.1.total_cmp(&b.1));
distances.truncate(k);
distances
}
/// Find nearest neighbors to a set of seed nodes (multi-seed query).
/// Returns nodes ranked by minimum distance to any seed.
pub fn nearest_to_seeds(
emb: &SpectralEmbedding,
seeds: &[&str],
k: usize,
) -> Vec<(String, f64)> {
nearest_to_seeds_weighted(emb, &seeds.iter().map(|&s| (s, 1.0)).collect::<Vec<_>>(), None, k)
}
/// Find nearest neighbors to weighted seed nodes, using link weights.
///
/// Each seed has a weight (from query term weighting). For candidates
/// directly linked to a seed, the spectral distance is scaled by
/// 1/link_strength — strong links make effective distance shorter.
/// Seed weight scales the contribution: high-weight seeds pull harder.
///
/// Returns (key, effective_distance) sorted by distance ascending.
pub fn nearest_to_seeds_weighted(
emb: &SpectralEmbedding,
seeds: &[(&str, f64)], // (key, seed_weight)
graph: Option<&crate::graph::Graph>,
k: usize,
) -> Vec<(String, f64)> {
let seed_set: HashSet<&str> = seeds.iter().map(|(s, _)| *s).collect();
let seed_data: Vec<(&str, &Vec<f64>, f64)> = seeds.iter()
.filter_map(|(s, w)| {
emb.coords.get(*s)
.filter(|c| c.iter().any(|&v| v.abs() > 1e-12)) // skip degenerate seeds
.map(|c| (*s, c, *w))
})
.collect();
if seed_data.is_empty() {
return vec![];
}
// Build seed→neighbor link strength lookup
let link_strengths: HashMap<(&str, &str), f32> = if let Some(g) = graph {
let mut map = HashMap::new();
for &(seed_key, _) in seeds {
for (neighbor, strength) in g.neighbors(seed_key) {
map.insert((seed_key, neighbor.as_str()), strength);
}
}
map
} else {
HashMap::new()
};
let dim_weights = eigenvalue_weights(&emb.eigenvalues);
let mut distances: Vec<(String, f64)> = emb.coords.iter()
.filter(|(k, coords)| {
!seed_set.contains(k.as_str())
&& coords.iter().any(|&v| v.abs() > 1e-12) // skip degenerate zero-coord nodes
})
.map(|(candidate_key, coords)| {
let min_dist = seed_data.iter()
.map(|(seed_key, sc, seed_weight)| {
let raw_dist = weighted_distance(coords, sc, &dim_weights);
// Scale by link strength if directly connected
let link_scale = link_strengths
.get(&(*seed_key, candidate_key.as_str()))
.map(|&s| 1.0 / (1.0 + s as f64)) // strong link → smaller distance
.unwrap_or(1.0);
raw_dist * link_scale / seed_weight
})
.fold(f64::MAX, f64::min);
(candidate_key.clone(), min_dist)
})
.collect();
distances.sort_by(|a, b| a.1.total_cmp(&b.1));
distances.truncate(k);
distances
}
/// Weighted euclidean distance in spectral space.
/// Dimensions weighted by 1/eigenvalue — coarser structure matters more.
fn weighted_distance(a: &[f64], b: &[f64], weights: &[f64]) -> f64 {
a.iter()
.zip(b.iter())
.zip(weights.iter())
.map(|((&x, &y), &w)| w * (x - y) * (x - y))
.sum::<f64>()
.sqrt()
}
/// Compute eigenvalue-inverse weights for distance calculations.
fn eigenvalue_weights(eigenvalues: &[f64]) -> Vec<f64> {
eigenvalues.iter()
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
.collect()
}
/// Compute cluster centers (centroids) in spectral space.
pub fn cluster_centers(
emb: &SpectralEmbedding,
communities: &HashMap<String, u32>,
) -> HashMap<u32, Vec<f64>> {
let mut sums: HashMap<u32, (Vec<f64>, usize)> = HashMap::new();
for (key, coords) in &emb.coords {
if let Some(&comm) = communities.get(key) {
let entry = sums.entry(comm)
.or_insert_with(|| (vec![0.0; emb.dims], 0));
for (i, &c) in coords.iter().enumerate() {
entry.0[i] += c;
}
entry.1 += 1;
}
}
sums.into_iter()
.map(|(comm, (sum, count))| {
let center: Vec<f64> = sum.iter()
.map(|s| s / count as f64)
.collect();
(comm, center)
})
.collect()
}
/// Per-node analysis of spectral position relative to communities.
pub struct SpectralPosition {
pub key: String,
pub community: u32,
/// Distance to own community center
pub dist_to_center: f64,
/// Distance to nearest OTHER community center
pub dist_to_nearest: f64,
/// Which community is nearest (other than own)
pub nearest_community: u32,
/// dist_to_center / median_dist_in_community (>1 = outlier)
pub outlier_score: f64,
/// dist_to_center / dist_to_nearest (>1 = between clusters, potential bridge)
pub bridge_score: f64,
}
/// Analyze spectral positions for all nodes.
///
/// Returns positions sorted by outlier_score descending (most displaced first).
pub fn analyze_positions(
emb: &SpectralEmbedding,
communities: &HashMap<String, u32>,
) -> Vec<SpectralPosition> {
let centers = cluster_centers(emb, communities);
let weights = eigenvalue_weights(&emb.eigenvalues);
// Compute distances to own community center
let mut by_community: HashMap<u32, Vec<f64>> = HashMap::new();
let mut node_dists: Vec<(String, u32, f64)> = Vec::new();
for (key, coords) in &emb.coords {
if let Some(&comm) = communities.get(key) {
if let Some(center) = centers.get(&comm) {
let dist = weighted_distance(coords, center, &weights);
by_community.entry(comm).or_default().push(dist);
node_dists.push((key.clone(), comm, dist));
}
}
}
// Median distance per community for outlier scoring
let medians: HashMap<u32, f64> = by_community.into_iter()
.map(|(comm, mut dists)| {
dists.sort_by(|a, b| a.total_cmp(b));
let median = if dists.is_empty() {
1.0
} else if dists.len() % 2 == 0 {
(dists[dists.len() / 2 - 1] + dists[dists.len() / 2]) / 2.0
} else {
dists[dists.len() / 2]
};
(comm, median.max(1e-6))
})
.collect();
let mut positions: Vec<SpectralPosition> = node_dists.into_iter()
.map(|(key, comm, dist_to_center)| {
let coords = &emb.coords[&key];
let (nearest_community, dist_to_nearest) = centers.iter()
.filter(|(&c, _)| c != comm)
.map(|(&c, center)| (c, weighted_distance(coords, center, &weights)))
.min_by(|a, b| a.1.total_cmp(&b.1))
.unwrap_or((comm, f64::MAX));
let median = medians.get(&comm).copied().unwrap_or(1.0);
let outlier_score = dist_to_center / median;
let bridge_score = if dist_to_nearest > 1e-8 {
dist_to_center / dist_to_nearest
} else {
0.0
};
SpectralPosition {
key, community: comm,
dist_to_center, dist_to_nearest, nearest_community,
outlier_score, bridge_score,
}
})
.collect();
positions.sort_by(|a, b| b.outlier_score.total_cmp(&a.outlier_score));
positions
}
/// Find pairs of nodes that are spectrally close but not linked in the graph.
///
/// These are the most valuable candidates for extractor agents —
/// the spectral structure says they should be related, but nobody
/// has articulated why.
pub fn unlinked_neighbors(
emb: &SpectralEmbedding,
linked_pairs: &HashSet<(String, String)>,
max_pairs: usize,
) -> Vec<(String, String, f64)> {
let weights = eigenvalue_weights(&emb.eigenvalues);
let keys: Vec<&String> = emb.coords.keys().collect();
let mut pairs: Vec<(String, String, f64)> = Vec::new();
for (i, k1) in keys.iter().enumerate() {
let c1 = &emb.coords[*k1];
for k2 in keys.iter().skip(i + 1) {
// Skip if already linked
let pair_fwd = ((*k1).clone(), (*k2).clone());
let pair_rev = ((*k2).clone(), (*k1).clone());
if linked_pairs.contains(&pair_fwd) || linked_pairs.contains(&pair_rev) {
continue;
}
let dist = weighted_distance(c1, &emb.coords[*k2], &weights);
pairs.push(((*k1).clone(), (*k2).clone(), dist));
}
}
pairs.sort_by(|a, b| a.2.total_cmp(&b.2));
pairs.truncate(max_pairs);
pairs
}
/// Approximate spectral coordinates for a new node using Nyström extension.
///
/// Given a new node's edges to existing nodes, estimate where it would
/// land in spectral space without recomputing the full decomposition.
/// Uses weighted average of neighbors' coordinates, weighted by edge strength.
pub fn nystrom_project(
emb: &SpectralEmbedding,
neighbors: &[(&str, f32)], // (key, edge_strength)
) -> Option<Vec<f64>> {
let mut weighted_sum = vec![0.0f64; emb.dims];
let mut total_weight = 0.0f64;
for &(key, strength) in neighbors {
if let Some(coords) = emb.coords.get(key) {
let w = strength as f64;
for (i, &c) in coords.iter().enumerate() {
weighted_sum[i] += w * c;
}
total_weight += w;
}
}
if total_weight < 1e-8 {
return None;
}
Some(weighted_sum.iter().map(|s| s / total_weight).collect())
}
/// Classify a spectral position: well-integrated, outlier, bridge, or orphan.
pub fn classify_position(pos: &SpectralPosition) -> &'static str {
if pos.bridge_score > 0.7 {
"bridge" // between two communities
} else if pos.outlier_score > 2.0 {
"outlier" // far from own community center
} else if pos.outlier_score < 0.5 {
"core" // close to community center
} else {
"peripheral" // normal community member
}
}
/// Identify which spectral dimensions a set of nodes load on most heavily.
/// Returns dimension indices sorted by total loading.
pub fn dominant_dimensions(emb: &SpectralEmbedding, keys: &[&str]) -> Vec<(usize, f64)> {
let coords: Vec<&Vec<f64>> = keys.iter()
.filter_map(|k| emb.coords.get(*k))
.collect();
if coords.is_empty() {
return vec![];
}
let mut dim_loading: Vec<(usize, f64)> = (0..emb.dims)
.map(|d| {
let loading: f64 = coords.iter()
.map(|c| c[d].abs())
.sum();
(d, loading)
})
.collect();
dim_loading.sort_by(|a, b| b.1.total_cmp(&a.1));
dim_loading
}

347
poc-memory/src/store/mod.rs Normal file
View file

@ -0,0 +1,347 @@
// Append-only Cap'n Proto storage + derived KV cache
//
// Two log files are source of truth:
// nodes.capnp - ContentNode messages
// relations.capnp - Relation messages
//
// The Store struct is the derived cache: latest version per UUID,
// rebuilt from logs when stale. Three-tier load strategy:
// 1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize
// 2. bincode cache (state.bin) — ~10ms
// 3. capnp log replay — ~40ms
// Staleness: log file sizes embedded in cache headers.
//
// Module layout:
// types.rs — Node, Relation, enums, capnp macros, path helpers
// parse.rs — markdown → MemoryUnit parsing
// view.rs — zero-copy read-only access (StoreView, MmapView)
// persist.rs — load, save, replay, append, snapshot (all disk IO)
// ops.rs — mutations (upsert, delete, decay, cap_degree, etc.)
// mod.rs — re-exports, key resolution, ingestion, rendering
mod types;
mod parse;
mod view;
mod persist;
mod ops;
// Re-export everything callers need
pub use types::{
memory_dir, nodes_path,
now_epoch, epoch_to_local, format_date, format_datetime, format_datetime_space, compact_timestamp, today,
Node, Relation, NodeType, Provenance, RelationType,
RetrievalEvent, Params, GapRecord, Store,
new_node, new_relation,
};
pub use parse::{MemoryUnit, parse_units};
pub use view::{StoreView, AnyView};
pub use persist::fsck;
pub use persist::strip_md_keys;
use crate::graph::{self, Graph};
use std::fs;
use std::io::Write as IoWrite;
use std::path::Path;
use parse::classify_filename;
/// Strip .md suffix from a key, handling both bare keys and section keys.
/// "journal.md#j-2026" → "journal#j-2026", "identity.md" → "identity", "identity" → "identity"
pub fn strip_md_suffix(key: &str) -> String {
if let Some((file, section)) = key.split_once('#') {
let bare = file.strip_suffix(".md").unwrap_or(file);
format!("{}#{}", bare, section)
} else {
key.strip_suffix(".md").unwrap_or(key).to_string()
}
}
impl Store {
pub fn build_graph(&self) -> Graph {
graph::build_graph(self)
}
pub fn resolve_key(&self, target: &str) -> Result<String, String> {
// Strip .md suffix if present — keys no longer use it
let bare = strip_md_suffix(target);
if self.nodes.contains_key(&bare) {
return Ok(bare);
}
let matches: Vec<_> = self.nodes.keys()
.filter(|k| k.to_lowercase().contains(&target.to_lowercase()))
.cloned().collect();
match matches.len() {
0 => Err(format!("No entry for '{}'. Run 'init'?", target)),
1 => Ok(matches[0].clone()),
n if n <= 10 => {
let list = matches.join("\n ");
Err(format!("Ambiguous '{}'. Matches:\n {}", target, list))
}
n => Err(format!("Too many matches for '{}' ({}). Be more specific.", target, n)),
}
}
/// Resolve a link target to (key, uuid).
fn resolve_node_uuid(&self, target: &str) -> Option<(String, [u8; 16])> {
let bare = strip_md_suffix(target);
let n = self.nodes.get(&bare)?;
Some((bare, n.uuid))
}
/// Append retrieval event to retrieval.log without needing a Store instance.
pub fn log_retrieval_static(query: &str, results: &[String]) {
let path = memory_dir().join("retrieval.log");
let line = format!("[{}] q=\"{}\" hits={}\n", today(), query, results.len());
if let Ok(mut f) = fs::OpenOptions::new()
.create(true).append(true).open(&path) {
let _ = f.write_all(line.as_bytes());
}
}
/// Scan markdown files and index all memory units
pub fn init_from_markdown(&mut self) -> Result<usize, String> {
let dir = memory_dir();
let mut count = 0;
if dir.exists() {
// Build edge set for O(1) dedup during ingestion
let mut edge_set = self.build_edge_set();
count = self.scan_dir_for_init(&dir, &mut edge_set)?;
}
Ok(count)
}
/// Build a HashSet of existing (source, target) UUID pairs for O(1) dedup.
fn build_edge_set(&self) -> std::collections::HashSet<([u8; 16], [u8; 16])> {
let mut set = std::collections::HashSet::with_capacity(self.relations.len() * 2);
for r in &self.relations {
set.insert((r.source, r.target));
set.insert((r.target, r.source));
}
set
}
fn scan_dir_for_init(
&mut self,
dir: &Path,
edge_set: &mut std::collections::HashSet<([u8; 16], [u8; 16])>,
) -> Result<usize, String> {
let mut count = 0;
let entries = fs::read_dir(dir)
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
count += self.scan_dir_for_init(&path, edge_set)?;
continue;
}
let Some(ext) = path.extension() else { continue };
if ext != "md" { continue }
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = fs::read_to_string(&path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
let units = parse_units(&filename, &content);
let (new_count, _) = self.ingest_units(&units, &filename)?;
count += new_count;
// Create relations from links
let mut new_relations = Vec::new();
for unit in &units {
let source_uuid = match self.nodes.get(&unit.key) {
Some(n) => n.uuid,
None => continue,
};
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
let Some((key, uuid)) = self.resolve_node_uuid(link) else { continue };
if !edge_set.contains(&(source_uuid, uuid)) {
edge_set.insert((source_uuid, uuid));
edge_set.insert((uuid, source_uuid));
new_relations.push(new_relation(
source_uuid, uuid, RelationType::Link, 1.0,
&unit.key, &key,
));
}
}
for cause in &unit.causes {
let Some((key, uuid)) = self.resolve_node_uuid(cause) else { continue };
if !edge_set.contains(&(uuid, source_uuid)) {
edge_set.insert((uuid, source_uuid));
new_relations.push(new_relation(
uuid, source_uuid, RelationType::Causal, 1.0,
&key, &unit.key,
));
}
}
}
if !new_relations.is_empty() {
self.append_relations(&new_relations)?;
self.relations.extend(new_relations);
}
}
Ok(count)
}
/// Process parsed memory units: diff against existing nodes, persist changes.
/// Holds StoreLock across refresh + check + write to prevent duplicate UUIDs.
fn ingest_units(&mut self, units: &[MemoryUnit], filename: &str) -> Result<(usize, usize), String> {
let _lock = types::StoreLock::acquire()?;
self.refresh_nodes()?;
let node_type = classify_filename(filename);
let mut new_nodes = Vec::new();
let mut updated_nodes = Vec::new();
for (pos, unit) in units.iter().enumerate() {
if let Some(existing) = self.nodes.get(&unit.key) {
if existing.content != unit.content || existing.position != pos as u32 {
let mut node = existing.clone();
node.content = unit.content.clone();
node.position = pos as u32;
node.version += 1;
if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
updated_nodes.push(node);
}
} else {
let mut node = new_node(&unit.key, &unit.content);
node.node_type = node_type;
node.position = pos as u32;
if let Some(ref s) = unit.state { node.state_tag = s.clone(); }
if let Some(ref s) = unit.source_ref { node.source_ref = s.clone(); }
new_nodes.push(node);
}
}
if !new_nodes.is_empty() {
self.append_nodes_unlocked(&new_nodes)?;
for node in &new_nodes {
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node.clone());
}
}
if !updated_nodes.is_empty() {
self.append_nodes_unlocked(&updated_nodes)?;
for node in &updated_nodes {
self.nodes.insert(node.key.clone(), node.clone());
}
}
Ok((new_nodes.len(), updated_nodes.len()))
}
/// Import a markdown file into the store, parsing it into nodes.
pub fn import_file(&mut self, path: &Path) -> Result<(usize, usize), String> {
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = fs::read_to_string(path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
let units = parse_units(&filename, &content);
self.ingest_units(&units, &filename)
}
/// Gather all sections for a file key, sorted by position.
pub fn file_sections(&self, file_key: &str) -> Option<Vec<&Node>> {
let prefix = format!("{}#", file_key);
let mut sections: Vec<_> = self.nodes.values()
.filter(|n| n.key == file_key || n.key.starts_with(&prefix))
.collect();
if sections.is_empty() {
return None;
}
sections.sort_by_key(|n| n.position);
Some(sections)
}
/// Render a file key as plain content (no mem markers).
pub fn render_file(&self, file_key: &str) -> Option<String> {
let sections = self.file_sections(file_key)?;
let mut output = String::new();
for node in &sections {
output.push_str(&node.content);
if !node.content.ends_with('\n') {
output.push('\n');
}
output.push('\n');
}
Some(output.trim_end().to_string())
}
/// Render a file key back to markdown with reconstituted mem markers.
pub fn export_to_markdown(&self, file_key: &str) -> Option<String> {
let sections = self.file_sections(file_key)?;
let mut output = String::new();
for node in &sections {
if node.key.contains('#') {
let section_id = node.key.rsplit_once('#').map_or("", |(_, s)| s);
let links: Vec<_> = self.relations.iter()
.filter(|r| r.source_key == node.key && !r.deleted
&& r.rel_type != RelationType::Causal)
.map(|r| r.target_key.clone())
.collect();
let causes: Vec<_> = self.relations.iter()
.filter(|r| r.target_key == node.key && !r.deleted
&& r.rel_type == RelationType::Causal)
.map(|r| r.source_key.clone())
.collect();
let mut marker_parts = vec![format!("id={}", section_id)];
if !links.is_empty() {
marker_parts.push(format!("links={}", links.join(",")));
}
if !causes.is_empty() {
marker_parts.push(format!("causes={}", causes.join(",")));
}
output.push_str(&format!("<!-- mem: {} -->\n", marker_parts.join(" ")));
}
output.push_str(&node.content);
if !node.content.ends_with('\n') {
output.push('\n');
}
output.push('\n');
}
Some(output.trim_end().to_string())
}
/// Find the episodic node that best matches the given entry text.
pub fn find_journal_node(&self, entry_text: &str) -> Option<String> {
if entry_text.is_empty() {
return None;
}
let words: Vec<&str> = entry_text.split_whitespace()
.filter(|w| w.len() > 5)
.take(5)
.collect();
let mut best_key = None;
let mut best_score = 0;
for (key, node) in &self.nodes {
if node.node_type != NodeType::EpisodicSession {
continue;
}
let content_lower = node.content.to_lowercase();
let score: usize = words.iter()
.filter(|w| content_lower.contains(&w.to_lowercase()))
.count();
if score > best_score {
best_score = score;
best_key = Some(key.clone());
}
}
best_key
}
}

284
poc-memory/src/store/ops.rs Normal file
View file

@ -0,0 +1,284 @@
// Mutation operations on the store
//
// CRUD (upsert, delete, modify), feedback tracking (mark_used, mark_wrong),
// maintenance (decay, fix_categories, cap_degree), and graph metrics.
use super::types::*;
use std::collections::{HashMap, HashSet};
impl Store {
/// Add or update a node (appends to log + updates cache).
/// Holds StoreLock across refresh + check + write to prevent duplicate UUIDs.
pub fn upsert_node(&mut self, mut node: Node) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
self.refresh_nodes()?;
if let Some(existing) = self.nodes.get(&node.key) {
node.uuid = existing.uuid;
node.version = existing.version + 1;
}
self.append_nodes_unlocked(&[node.clone()])?;
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node);
Ok(())
}
/// Add a relation (appends to log + updates cache)
pub fn add_relation(&mut self, rel: Relation) -> Result<(), String> {
self.append_relations(std::slice::from_ref(&rel))?;
self.relations.push(rel);
Ok(())
}
/// Upsert a node: update if exists (and content changed), create if not.
/// Returns: "created", "updated", or "unchanged".
///
/// Provenance is determined by the POC_PROVENANCE env var if set,
/// otherwise defaults to Manual.
pub fn upsert(&mut self, key: &str, content: &str) -> Result<&'static str, String> {
let prov = Provenance::from_env()
.map(|p| p.label().to_string())
.unwrap_or_else(|| "manual".to_string());
self.upsert_provenance(key, content, &prov)
}
/// Upsert with explicit provenance (for agent-created nodes).
/// Holds StoreLock across refresh + check + write to prevent duplicate UUIDs.
pub fn upsert_provenance(&mut self, key: &str, content: &str, provenance: &str) -> Result<&'static str, String> {
let _lock = StoreLock::acquire()?;
self.refresh_nodes()?;
if let Some(existing) = self.nodes.get(key) {
if existing.content == content {
return Ok("unchanged");
}
let mut node = existing.clone();
node.content = content.to_string();
node.provenance = provenance.to_string();
node.version += 1;
self.append_nodes_unlocked(std::slice::from_ref(&node))?;
self.nodes.insert(key.to_string(), node);
Ok("updated")
} else {
let mut node = new_node(key, content);
node.provenance = provenance.to_string();
self.append_nodes_unlocked(std::slice::from_ref(&node))?;
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(key.to_string(), node);
Ok("created")
}
}
/// Soft-delete a node (appends deleted version, removes from cache).
/// Holds StoreLock across refresh + write to see concurrent creates.
pub fn delete_node(&mut self, key: &str) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
self.refresh_nodes()?;
let node = self.nodes.get(key)
.ok_or_else(|| format!("No node '{}'", key))?;
let mut deleted = node.clone();
deleted.deleted = true;
deleted.version += 1;
self.append_nodes_unlocked(std::slice::from_ref(&deleted))?;
self.nodes.remove(key);
Ok(())
}
/// Rename a node: change its key, update debug strings on all edges.
///
/// Graph edges (source/target UUIDs) are unaffected — they're already
/// UUID-based. We update the human-readable source_key/target_key strings
/// on relations, and created_at is preserved untouched.
///
/// Appends: (new_key, v+1) + (old_key, deleted, v+1) + updated relations.
/// Holds StoreLock across refresh + write to prevent races.
pub fn rename_node(&mut self, old_key: &str, new_key: &str) -> Result<(), String> {
if old_key == new_key {
return Ok(());
}
let _lock = StoreLock::acquire()?;
self.refresh_nodes()?;
if self.nodes.contains_key(new_key) {
return Err(format!("Key '{}' already exists", new_key));
}
let node = self.nodes.get(old_key)
.ok_or_else(|| format!("No node '{}'", old_key))?
.clone();
// New version under the new key
let mut renamed = node.clone();
renamed.key = new_key.to_string();
renamed.version += 1;
// Deletion record for the old key (same UUID, independent version counter)
let mut tombstone = node.clone();
tombstone.deleted = true;
tombstone.version += 1;
// Collect affected relations and update their debug key strings
let updated_rels: Vec<_> = self.relations.iter()
.filter(|r| r.source_key == old_key || r.target_key == old_key)
.map(|r| {
let mut r = r.clone();
r.version += 1;
if r.source_key == old_key { r.source_key = new_key.to_string(); }
if r.target_key == old_key { r.target_key = new_key.to_string(); }
r
})
.collect();
// Persist under single lock
self.append_nodes_unlocked(&[renamed.clone(), tombstone])?;
if !updated_rels.is_empty() {
self.append_relations_unlocked(&updated_rels)?;
}
// Update in-memory cache
self.nodes.remove(old_key);
self.uuid_to_key.insert(renamed.uuid, new_key.to_string());
self.nodes.insert(new_key.to_string(), renamed);
for updated in &updated_rels {
if let Some(r) = self.relations.iter_mut().find(|r| r.uuid == updated.uuid) {
r.source_key = updated.source_key.clone();
r.target_key = updated.target_key.clone();
r.version = updated.version;
}
}
Ok(())
}
/// Modify a node in-place, bump version, and persist to capnp log.
fn modify_node(&mut self, key: &str, f: impl FnOnce(&mut Node)) -> Result<(), String> {
let node = self.nodes.get_mut(key)
.ok_or_else(|| format!("No node '{}'", key))?;
f(node);
node.version += 1;
let node = node.clone();
self.append_nodes(&[node])
}
pub fn mark_used(&mut self, key: &str) {
let boost = self.params.use_boost as f32;
let _ = self.modify_node(key, |n| {
n.uses += 1;
n.weight = (n.weight + boost).min(1.0);
if n.spaced_repetition_interval < 30 {
n.spaced_repetition_interval = match n.spaced_repetition_interval {
1 => 3, 3 => 7, 7 => 14, 14 => 30, _ => 30,
};
}
n.last_replayed = now_epoch();
});
}
pub fn mark_wrong(&mut self, key: &str, _ctx: Option<&str>) {
let _ = self.modify_node(key, |n| {
n.wrongs += 1;
n.weight = (n.weight - 0.1).max(0.0);
n.spaced_repetition_interval = 1;
});
}
pub fn record_gap(&mut self, desc: &str) {
self.gaps.push(GapRecord {
description: desc.to_string(),
timestamp: today(),
});
}
/// Cap node degree by soft-deleting edges from mega-hubs.
pub fn cap_degree(&mut self, max_degree: usize) -> Result<(usize, usize), String> {
let mut node_degree: HashMap<String, usize> = HashMap::new();
for rel in &self.relations {
if rel.deleted { continue; }
*node_degree.entry(rel.source_key.clone()).or_default() += 1;
*node_degree.entry(rel.target_key.clone()).or_default() += 1;
}
let mut node_edges: HashMap<String, Vec<usize>> = HashMap::new();
for (i, rel) in self.relations.iter().enumerate() {
if rel.deleted { continue; }
node_edges.entry(rel.source_key.clone()).or_default().push(i);
node_edges.entry(rel.target_key.clone()).or_default().push(i);
}
let mut to_delete: HashSet<usize> = HashSet::new();
let mut hubs_capped = 0;
for (_key, edge_indices) in &node_edges {
let active: Vec<usize> = edge_indices.iter()
.filter(|&&i| !to_delete.contains(&i))
.copied()
.collect();
if active.len() <= max_degree { continue; }
let mut auto_indices: Vec<(usize, f32)> = Vec::new();
let mut link_indices: Vec<(usize, usize)> = Vec::new();
for &i in &active {
let rel = &self.relations[i];
if rel.rel_type == RelationType::Auto {
auto_indices.push((i, rel.strength));
} else {
let other = if &rel.source_key == _key {
&rel.target_key
} else {
&rel.source_key
};
let other_deg = node_degree.get(other).copied().unwrap_or(0);
link_indices.push((i, other_deg));
}
}
let excess = active.len() - max_degree;
auto_indices.sort_by(|a, b| a.1.total_cmp(&b.1));
let auto_prune = excess.min(auto_indices.len());
for &(i, _) in auto_indices.iter().take(auto_prune) {
to_delete.insert(i);
}
let remaining_excess = excess.saturating_sub(auto_prune);
if remaining_excess > 0 {
link_indices.sort_by(|a, b| b.1.cmp(&a.1));
let link_prune = remaining_excess.min(link_indices.len());
for &(i, _) in link_indices.iter().take(link_prune) {
to_delete.insert(i);
}
}
hubs_capped += 1;
}
let mut pruned_rels = Vec::new();
for &i in &to_delete {
self.relations[i].deleted = true;
self.relations[i].version += 1;
pruned_rels.push(self.relations[i].clone());
}
if !pruned_rels.is_empty() {
self.append_relations(&pruned_rels)?;
}
self.relations.retain(|r| !r.deleted);
Ok((hubs_capped, to_delete.len()))
}
/// Update graph-derived fields on all nodes
pub fn update_graph_metrics(&mut self) {
let g = self.build_graph();
let communities = g.communities();
for (key, node) in &mut self.nodes {
node.community_id = communities.get(key).copied();
node.clustering_coefficient = Some(g.clustering_coefficient(key));
node.degree = Some(g.degree(key) as u32);
}
}
}

View file

@ -0,0 +1,173 @@
// Markdown parsing for memory files
//
// Splits markdown files into MemoryUnit structs based on `<!-- mem: ... -->`
// markers. Each marker starts a new section; content before the first marker
// becomes the file-level unit. Links and causal edges are extracted from
// both marker attributes and inline markdown links.
use super::NodeType;
use regex::Regex;
use std::collections::HashMap;
use std::path::Path;
use std::sync::OnceLock;
pub struct MemoryUnit {
pub key: String,
pub content: String,
pub marker_links: Vec<String>,
pub md_links: Vec<String>,
pub causes: Vec<String>,
pub state: Option<String>,
pub source_ref: Option<String>,
}
pub fn classify_filename(filename: &str) -> NodeType {
let bare = filename.strip_suffix(".md").unwrap_or(filename);
if bare.starts_with("daily-") { NodeType::EpisodicDaily }
else if bare.starts_with("weekly-") { NodeType::EpisodicWeekly }
else if bare.starts_with("monthly-") { NodeType::EpisodicMonthly }
else if bare == "journal" { NodeType::EpisodicSession }
else { NodeType::Semantic }
}
pub fn parse_units(raw_filename: &str, content: &str) -> Vec<MemoryUnit> {
let filename = raw_filename.strip_suffix(".md").unwrap_or(raw_filename);
static MARKER_RE: OnceLock<Regex> = OnceLock::new();
static SOURCE_RE: OnceLock<Regex> = OnceLock::new();
static MD_LINK_RE: OnceLock<Regex> = OnceLock::new();
let marker_re = MARKER_RE.get_or_init(||
Regex::new(r"<!--\s*mem:\s*((?:id|links|tags|causes|state)\s*=\s*[^\s].*?)-->").unwrap());
let source_re = SOURCE_RE.get_or_init(||
Regex::new(r"<!--\s*source:\s*(.+?)\s*-->").unwrap());
let md_link_re = MD_LINK_RE.get_or_init(||
Regex::new(r"\[[^\]]*\]\(([^):]+(?:#[^)]*)?)\)").unwrap());
let markers: Vec<_> = marker_re.captures_iter(content)
.map(|cap| {
let full_match = cap.get(0).unwrap();
let attrs_str = &cap[1];
(full_match.start(), full_match.end(), parse_marker_attrs(attrs_str))
})
.collect();
let find_source = |text: &str| -> Option<String> {
source_re.captures(text).map(|c| c[1].trim().to_string())
};
if markers.is_empty() {
let source_ref = find_source(content);
let md_links = extract_md_links(content, md_link_re, filename);
return vec![MemoryUnit {
key: filename.to_string(),
content: content.to_string(),
marker_links: Vec::new(),
md_links,
causes: Vec::new(),
state: None,
source_ref,
}];
}
let mut units = Vec::new();
let first_start = markers[0].0;
let pre_content = content[..first_start].trim();
if !pre_content.is_empty() {
let source_ref = find_source(pre_content);
let md_links = extract_md_links(pre_content, md_link_re, filename);
units.push(MemoryUnit {
key: filename.to_string(),
content: pre_content.to_string(),
marker_links: Vec::new(),
md_links,
causes: Vec::new(),
state: None,
source_ref,
});
}
for (i, (_, end, attrs)) in markers.iter().enumerate() {
let unit_end = if i + 1 < markers.len() {
markers[i + 1].0
} else {
content.len()
};
let unit_content = content[*end..unit_end].trim();
let id = attrs.get("id").cloned().unwrap_or_default();
let key = if id.is_empty() {
format!("{}#unnamed-{}", filename, i)
} else {
format!("{}#{}", filename, id)
};
let marker_links = attrs.get("links")
.map(|l| l.split(',').map(|s| normalize_link(s.trim(), filename)).collect())
.unwrap_or_default();
let causes = attrs.get("causes")
.map(|l| l.split(',').map(|s| normalize_link(s.trim(), filename)).collect())
.unwrap_or_default();
let state = attrs.get("state").cloned();
let source_ref = find_source(unit_content);
let md_links = extract_md_links(unit_content, md_link_re, filename);
units.push(MemoryUnit {
key,
content: unit_content.to_string(),
marker_links,
md_links,
causes,
state,
source_ref,
});
}
units
}
fn parse_marker_attrs(attrs_str: &str) -> HashMap<String, String> {
static ATTR_RE: OnceLock<Regex> = OnceLock::new();
let attr_re = ATTR_RE.get_or_init(|| Regex::new(r"(\w+)\s*=\s*(\S+)").unwrap());
let mut attrs = HashMap::new();
for cap in attr_re.captures_iter(attrs_str) {
attrs.insert(cap[1].to_string(), cap[2].to_string());
}
attrs
}
fn extract_md_links(content: &str, re: &Regex, source_file: &str) -> Vec<String> {
re.captures_iter(content)
.map(|cap| normalize_link(&cap[1], source_file))
.filter(|link| !link.starts_with(source_file) || link.contains('#'))
.collect()
}
pub fn normalize_link(target: &str, source_file: &str) -> String {
let source_bare = source_file.strip_suffix(".md").unwrap_or(source_file);
if target.starts_with('#') {
return format!("{}{}", source_bare, target);
}
let (path_part, fragment) = if let Some(hash_pos) = target.find('#') {
(&target[..hash_pos], Some(&target[hash_pos..]))
} else {
(target, None)
};
let basename = Path::new(path_part)
.file_name()
.map(|f| f.to_string_lossy().to_string())
.unwrap_or_else(|| path_part.to_string());
let bare = basename.strip_suffix(".md").unwrap_or(&basename);
match fragment {
Some(frag) => format!("{}{}", bare, frag),
None => bare.to_string(),
}
}

View file

@ -0,0 +1,806 @@
// Persistence layer: load, save, replay, append, snapshot
//
// Three-tier loading strategy:
// 1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize
// 2. bincode cache (state.bin) — ~10ms
// 3. capnp log replay — ~40ms
//
// Logs are append-only; cache staleness uses log file sizes, not mtimes.
use super::types::*;
use crate::memory_capnp;
use capnp::message;
use capnp::serialize;
use std::collections::HashMap;
use std::fs;
use std::io::{BufReader, BufWriter, Seek};
use std::path::Path;
impl Store {
/// Load store from state.bin cache if fresh, otherwise rebuild from capnp logs.
///
/// Staleness check uses log file sizes (not mtimes). Since logs are
/// append-only, any write grows the file, invalidating the cache.
/// This avoids the mtime race that caused data loss with concurrent
/// writers (dream loop, link audit, journal enrichment).
pub fn load() -> Result<Store, String> {
// 1. Try rkyv mmap snapshot (~4ms with deserialize, <1ms zero-copy)
match Self::load_snapshot_mmap() {
Ok(Some(mut store)) => {
// rkyv snapshot doesn't include visits — replay from log
let visits_p = visits_path();
if visits_p.exists() {
store.replay_visits(&visits_p).ok();
}
return Ok(store);
},
Ok(None) => {},
Err(e) => eprintln!("rkyv snapshot: {}", e),
}
// 2. Try bincode state.bin cache (~10ms)
let nodes_p = nodes_path();
let rels_p = relations_path();
let state_p = state_path();
let nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
if let Ok(data) = fs::read(&state_p) {
if data.len() >= CACHE_HEADER_LEN && data[..4] == CACHE_MAGIC {
let cached_nodes = u64::from_le_bytes(data[4..12].try_into().unwrap());
let cached_rels = u64::from_le_bytes(data[12..20].try_into().unwrap());
if cached_nodes == nodes_size && cached_rels == rels_size {
if let Ok(mut store) = bincode::deserialize::<Store>(&data[CACHE_HEADER_LEN..]) {
// Rebuild uuid_to_key (skipped by serde)
for (key, node) in &store.nodes {
store.uuid_to_key.insert(node.uuid, key.clone());
}
store.loaded_nodes_size = nodes_size;
store.loaded_rels_size = rels_size;
// Bootstrap: write rkyv snapshot if missing
if !snapshot_path().exists() {
if let Err(e) = store.save_snapshot(cached_nodes, cached_rels) {
eprintln!("rkyv bootstrap: {}", e);
}
}
return Ok(store);
}
}
}
}
// Stale or no cache — rebuild from capnp logs
let mut store = Store::default();
if nodes_p.exists() {
store.replay_nodes(&nodes_p)?;
}
if rels_p.exists() {
store.replay_relations(&rels_p)?;
}
let visits_p = visits_path();
if visits_p.exists() {
store.replay_visits(&visits_p)?;
}
// Record log sizes after replay — this is the state we reflect
store.loaded_nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
store.loaded_rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
// Drop edges referencing deleted/missing nodes
store.relations.retain(|r|
store.nodes.contains_key(&r.source_key) &&
store.nodes.contains_key(&r.target_key)
);
store.save()?;
Ok(store)
}
/// Load store directly from capnp logs, bypassing all caches.
/// Used by fsck to verify cache consistency.
pub fn load_from_logs() -> Result<Store, String> {
let nodes_p = nodes_path();
let rels_p = relations_path();
let mut store = Store::default();
if nodes_p.exists() {
store.replay_nodes(&nodes_p)?;
}
if rels_p.exists() {
store.replay_relations(&rels_p)?;
}
let visits_p = visits_path();
if visits_p.exists() {
store.replay_visits(&visits_p)?;
}
Ok(store)
}
/// Replay node log, keeping latest version per UUID.
/// Tracks all UUIDs seen per key to detect duplicates.
fn replay_nodes(&mut self, path: &Path) -> Result<(), String> {
let file = fs::File::open(path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
// Track all non-deleted UUIDs per key to detect duplicates
let mut key_uuids: HashMap<String, Vec<[u8; 16]>> = HashMap::new();
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
let log = msg.get_root::<memory_capnp::node_log::Reader>()
.map_err(|e| format!("read node log: {}", e))?;
for node_reader in log.get_nodes()
.map_err(|e| format!("get nodes: {}", e))? {
let node = Node::from_capnp_migrate(node_reader)?;
let existing_version = self.nodes.get(&node.key)
.map(|n| n.version)
.unwrap_or(0);
if node.version >= existing_version {
if node.deleted {
self.nodes.remove(&node.key);
self.uuid_to_key.remove(&node.uuid);
if let Some(uuids) = key_uuids.get_mut(&node.key) {
uuids.retain(|u| *u != node.uuid);
}
} else {
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node.clone());
let uuids = key_uuids.entry(node.key).or_default();
if !uuids.contains(&node.uuid) {
uuids.push(node.uuid);
}
}
}
}
}
// Report duplicate keys
for (key, uuids) in &key_uuids {
if uuids.len() > 1 {
eprintln!("WARNING: key '{}' has {} UUIDs (duplicate nodes)", key, uuids.len());
}
}
Ok(())
}
/// Replay relation log, keeping latest version per UUID
fn replay_relations(&mut self, path: &Path) -> Result<(), String> {
let file = fs::File::open(path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
// Collect all, then deduplicate by UUID keeping latest version
let mut by_uuid: HashMap<[u8; 16], Relation> = HashMap::new();
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
let log = msg.get_root::<memory_capnp::relation_log::Reader>()
.map_err(|e| format!("read relation log: {}", e))?;
for rel_reader in log.get_relations()
.map_err(|e| format!("get relations: {}", e))? {
let rel = Relation::from_capnp_migrate(rel_reader)?;
let existing_version = by_uuid.get(&rel.uuid)
.map(|r| r.version)
.unwrap_or(0);
if rel.version >= existing_version {
by_uuid.insert(rel.uuid, rel);
}
}
}
self.relations = by_uuid.into_values()
.filter(|r| !r.deleted)
.collect();
Ok(())
}
/// Find all duplicate keys: keys with multiple live UUIDs in the log.
/// Returns a map from key → vec of all live Node versions (one per UUID).
/// The "winner" in self.nodes is always one of them.
pub fn find_duplicates(&self) -> Result<HashMap<String, Vec<Node>>, String> {
let path = nodes_path();
if !path.exists() { return Ok(HashMap::new()); }
let file = fs::File::open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
// Track latest version of each UUID
let mut by_uuid: HashMap<[u8; 16], Node> = HashMap::new();
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
let log = msg.get_root::<memory_capnp::node_log::Reader>()
.map_err(|e| format!("read node log: {}", e))?;
for node_reader in log.get_nodes()
.map_err(|e| format!("get nodes: {}", e))? {
let node = Node::from_capnp_migrate(node_reader)?;
let dominated = by_uuid.get(&node.uuid)
.map(|n| node.version >= n.version)
.unwrap_or(true);
if dominated {
by_uuid.insert(node.uuid, node);
}
}
}
// Group live (non-deleted) nodes by key
let mut by_key: HashMap<String, Vec<Node>> = HashMap::new();
for node in by_uuid.into_values() {
if !node.deleted {
by_key.entry(node.key.clone()).or_default().push(node);
}
}
// Keep only duplicates
by_key.retain(|_, nodes| nodes.len() > 1);
Ok(by_key)
}
/// Append nodes to the log file.
/// Serializes to a Vec first, then does a single write() syscall
/// so the append is atomic with O_APPEND even without flock.
pub fn append_nodes(&mut self, nodes: &[Node]) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
self.append_nodes_unlocked(nodes)
}
/// Append nodes without acquiring the lock. Caller must hold StoreLock.
pub(crate) fn append_nodes_unlocked(&mut self, nodes: &[Node]) -> Result<(), String> {
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::node_log::Builder>();
let mut list = log.init_nodes(nodes.len() as u32);
for (i, node) in nodes.iter().enumerate() {
node.to_capnp(list.reborrow().get(i as u32));
}
}
let mut buf = Vec::new();
serialize::write_message(&mut buf, &msg)
.map_err(|e| format!("serialize nodes: {}", e))?;
let path = nodes_path();
let file = fs::OpenOptions::new()
.create(true).append(true).open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
use std::io::Write;
(&file).write_all(&buf)
.map_err(|e| format!("write nodes: {}", e))?;
self.loaded_nodes_size = file.metadata().map(|m| m.len()).unwrap_or(0);
Ok(())
}
/// Replay only new entries appended to the node log since we last loaded.
/// Call under StoreLock to catch writes from concurrent processes.
pub(crate) fn refresh_nodes(&mut self) -> Result<(), String> {
let path = nodes_path();
let current_size = fs::metadata(&path).map(|m| m.len()).unwrap_or(0);
if current_size <= self.loaded_nodes_size {
return Ok(()); // no new data
}
let file = fs::File::open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
reader.seek(std::io::SeekFrom::Start(self.loaded_nodes_size))
.map_err(|e| format!("seek nodes log: {}", e))?;
while let Ok(msg) = serialize::read_message(&mut reader, message::ReaderOptions::new()) {
let log = msg.get_root::<memory_capnp::node_log::Reader>()
.map_err(|e| format!("read node log delta: {}", e))?;
for node_reader in log.get_nodes()
.map_err(|e| format!("get nodes delta: {}", e))? {
let node = Node::from_capnp_migrate(node_reader)?;
let dominated = self.nodes.get(&node.key)
.map(|n| node.version >= n.version)
.unwrap_or(true);
if dominated {
if node.deleted {
self.nodes.remove(&node.key);
self.uuid_to_key.remove(&node.uuid);
} else {
self.uuid_to_key.insert(node.uuid, node.key.clone());
self.nodes.insert(node.key.clone(), node);
}
}
}
}
self.loaded_nodes_size = current_size;
Ok(())
}
/// Append relations to the log file.
/// Single write() syscall for atomic O_APPEND.
pub fn append_relations(&mut self, relations: &[Relation]) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
self.append_relations_unlocked(relations)
}
/// Append relations without acquiring the lock. Caller must hold StoreLock.
pub(crate) fn append_relations_unlocked(&mut self, relations: &[Relation]) -> Result<(), String> {
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::relation_log::Builder>();
let mut list = log.init_relations(relations.len() as u32);
for (i, rel) in relations.iter().enumerate() {
rel.to_capnp(list.reborrow().get(i as u32));
}
}
let mut buf = Vec::new();
serialize::write_message(&mut buf, &msg)
.map_err(|e| format!("serialize relations: {}", e))?;
let path = relations_path();
let file = fs::OpenOptions::new()
.create(true).append(true).open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
use std::io::Write;
(&file).write_all(&buf)
.map_err(|e| format!("write relations: {}", e))?;
self.loaded_rels_size = file.metadata().map(|m| m.len()).unwrap_or(0);
Ok(())
}
/// Append agent visit records to the visits log.
pub fn append_visits(&mut self, visits: &[AgentVisit]) -> Result<(), String> {
if visits.is_empty() { return Ok(()); }
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::agent_visit_log::Builder>();
let mut list = log.init_visits(visits.len() as u32);
for (i, visit) in visits.iter().enumerate() {
visit.to_capnp(list.reborrow().get(i as u32));
}
}
let mut buf = Vec::new();
serialize::write_message(&mut buf, &msg)
.map_err(|e| format!("serialize visits: {}", e))?;
let path = visits_path();
let file = fs::OpenOptions::new()
.create(true).append(true).open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
use std::io::Write;
(&file).write_all(&buf)
.map_err(|e| format!("write visits: {}", e))?;
// Update in-memory index
for v in visits {
self.visits
.entry(v.node_key.clone())
.or_default()
.insert(v.agent.clone(), v.timestamp);
}
Ok(())
}
/// Replay visits log to rebuild in-memory index.
fn replay_visits(&mut self, path: &Path) -> Result<(), String> {
let file = fs::File::open(path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
while reader.stream_position().map_err(|e| e.to_string())?
< fs::metadata(path).map_err(|e| e.to_string())?.len()
{
let msg = match serialize::read_message(&mut reader, Default::default()) {
Ok(m) => m,
Err(_) => break,
};
let log = msg.get_root::<memory_capnp::agent_visit_log::Reader>()
.map_err(|e| format!("read visit log: {}", e))?;
for visit in log.get_visits().map_err(|e| e.to_string())? {
let key = visit.get_node_key().ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string();
let agent = visit.get_agent().ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string();
let ts = visit.get_timestamp();
if !key.is_empty() && !agent.is_empty() {
let entry = self.visits.entry(key).or_default();
// Keep latest timestamp per agent
let existing = entry.entry(agent).or_insert(0);
if ts > *existing {
*existing = ts;
}
}
}
}
Ok(())
}
/// Record visits for a batch of node keys from a successful agent run.
pub fn record_agent_visits(&mut self, node_keys: &[String], agent: &str) -> Result<(), String> {
let visits: Vec<AgentVisit> = node_keys.iter()
.filter_map(|key| {
let node = self.nodes.get(key)?;
Some(new_visit(node.uuid, key, agent, "processed"))
})
.collect();
self.append_visits(&visits)
}
/// Get the last time an agent visited a node. Returns 0 if never visited.
pub fn last_visited(&self, node_key: &str, agent: &str) -> i64 {
self.visits.get(node_key)
.and_then(|agents| agents.get(agent))
.copied()
.unwrap_or(0)
}
/// Save the derived cache with log size header for staleness detection.
/// Uses atomic write (tmp + rename) to prevent partial reads.
pub fn save(&self) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
let path = state_path();
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).ok();
}
// Use log sizes from load time, not current filesystem sizes.
// If another writer appended since we loaded, our recorded size
// will be smaller than the actual log → next reader detects stale
// cache and replays the (correct, append-only) log.
let nodes_size = self.loaded_nodes_size;
let rels_size = self.loaded_rels_size;
let bincode_data = bincode::serialize(self)
.map_err(|e| format!("bincode serialize: {}", e))?;
let mut data = Vec::with_capacity(CACHE_HEADER_LEN + bincode_data.len());
data.extend_from_slice(&CACHE_MAGIC);
data.extend_from_slice(&nodes_size.to_le_bytes());
data.extend_from_slice(&rels_size.to_le_bytes());
data.extend_from_slice(&bincode_data);
// Atomic write: tmp file + rename
let tmp_path = path.with_extension("bin.tmp");
fs::write(&tmp_path, &data)
.map_err(|e| format!("write {}: {}", tmp_path.display(), e))?;
fs::rename(&tmp_path, &path)
.map_err(|e| format!("rename {}{}: {}", tmp_path.display(), path.display(), e))?;
// Also write rkyv snapshot (mmap-friendly)
if let Err(e) = self.save_snapshot(nodes_size, rels_size) {
eprintln!("rkyv snapshot save: {}", e);
}
Ok(())
}
/// Serialize store as rkyv snapshot with staleness header.
/// Assumes StoreLock is already held by caller.
fn save_snapshot(&self, nodes_size: u64, rels_size: u64) -> Result<(), String> {
let snap = Snapshot {
nodes: self.nodes.clone(),
relations: self.relations.iter().filter(|r| !r.deleted).cloned().collect(),
gaps: self.gaps.clone(),
params: self.params,
};
let rkyv_data = rkyv::to_bytes::<_, 256>(&snap)
.map_err(|e| format!("rkyv serialize: {}", e))?;
let mut data = Vec::with_capacity(RKYV_HEADER_LEN + rkyv_data.len());
data.extend_from_slice(&RKYV_MAGIC);
data.extend_from_slice(&1u32.to_le_bytes()); // format version
data.extend_from_slice(&nodes_size.to_le_bytes());
data.extend_from_slice(&rels_size.to_le_bytes());
data.extend_from_slice(&(rkyv_data.len() as u64).to_le_bytes());
data.extend_from_slice(&rkyv_data);
let path = snapshot_path();
let tmp_path = path.with_extension("rkyv.tmp");
fs::write(&tmp_path, &data)
.map_err(|e| format!("write {}: {}", tmp_path.display(), e))?;
fs::rename(&tmp_path, &path)
.map_err(|e| format!("rename: {}", e))?;
Ok(())
}
/// Try loading store from mmap'd rkyv snapshot.
/// Returns None if snapshot is missing or stale (log sizes don't match).
fn load_snapshot_mmap() -> Result<Option<Store>, String> {
let path = snapshot_path();
if !path.exists() { return Ok(None); }
let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0);
let file = fs::File::open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mmap = unsafe { memmap2::Mmap::map(&file) }
.map_err(|e| format!("mmap {}: {}", path.display(), e))?;
if mmap.len() < RKYV_HEADER_LEN { return Ok(None); }
if mmap[..4] != RKYV_MAGIC { return Ok(None); }
// [4..8] = version, skip for now
let cached_nodes = u64::from_le_bytes(mmap[8..16].try_into().unwrap());
let cached_rels = u64::from_le_bytes(mmap[16..24].try_into().unwrap());
let data_len = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize;
if cached_nodes != nodes_size || cached_rels != rels_size {
return Ok(None); // stale
}
if mmap.len() < RKYV_HEADER_LEN + data_len {
return Ok(None); // truncated
}
let rkyv_data = &mmap[RKYV_HEADER_LEN..RKYV_HEADER_LEN + data_len];
// SAFETY: we wrote this file ourselves via save_snapshot().
// Skip full validation (check_archived_root) — the staleness header
// already confirms this snapshot matches the current log state.
let archived = unsafe { rkyv::archived_root::<Snapshot>(rkyv_data) };
let snap: Snapshot = <ArchivedSnapshot as rkyv::Deserialize<Snapshot, rkyv::Infallible>>
::deserialize(archived, &mut rkyv::Infallible).unwrap();
let mut store = Store {
nodes: snap.nodes,
relations: snap.relations,
gaps: snap.gaps,
params: snap.params,
..Default::default()
};
// Rebuild uuid_to_key (not serialized)
for (key, node) in &store.nodes {
store.uuid_to_key.insert(node.uuid, key.clone());
}
store.loaded_nodes_size = nodes_size;
store.loaded_rels_size = rels_size;
Ok(Some(store))
}
}
/// Strip .md suffix from all node keys and relation key strings.
/// Merges duplicates (bare key + .md key) by keeping the latest version.
pub fn strip_md_keys() -> Result<(), String> {
use super::strip_md_suffix;
let mut store = Store::load()?;
let mut renamed_nodes = 0usize;
let mut renamed_rels = 0usize;
let mut merged = 0usize;
// Collect keys that need renaming
let old_keys: Vec<String> = store.nodes.keys()
.filter(|k| k.ends_with(".md") || k.contains(".md#"))
.cloned()
.collect();
for old_key in &old_keys {
let new_key = strip_md_suffix(old_key);
if new_key == *old_key { continue; }
let mut node = store.nodes.remove(old_key).unwrap();
store.uuid_to_key.remove(&node.uuid);
if let Some(existing) = store.nodes.get(&new_key) {
// Merge: keep whichever has the higher version
if existing.version >= node.version {
eprintln!(" merge {}{} (keeping existing v{})",
old_key, new_key, existing.version);
merged += 1;
continue;
}
eprintln!(" merge {}{} (replacing v{} with v{})",
old_key, new_key, existing.version, node.version);
merged += 1;
}
node.key = new_key.clone();
node.version += 1;
store.uuid_to_key.insert(node.uuid, new_key.clone());
store.nodes.insert(new_key, node);
renamed_nodes += 1;
}
// Fix relation key strings
for rel in &mut store.relations {
let new_source = strip_md_suffix(&rel.source_key);
let new_target = strip_md_suffix(&rel.target_key);
if new_source != rel.source_key || new_target != rel.target_key {
rel.source_key = new_source;
rel.target_key = new_target;
rel.version += 1;
renamed_rels += 1;
}
}
if renamed_nodes == 0 && renamed_rels == 0 && merged == 0 {
eprintln!("No .md suffixes found — store is clean");
return Ok(());
}
eprintln!("Renamed {} nodes, {} relations, merged {} duplicates",
renamed_nodes, renamed_rels, merged);
// Write fresh logs from the migrated state
rewrite_store(&store)?;
eprintln!("Store rewritten successfully");
Ok(())
}
/// Rewrite the entire store from scratch (fresh logs + caches).
/// Used after migrations that change keys across all nodes/relations.
fn rewrite_store(store: &Store) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
// Write fresh node log
let nodes: Vec<_> = store.nodes.values().cloned().collect();
let nodes_path = nodes_path();
{
let file = fs::File::create(&nodes_path)
.map_err(|e| format!("create {}: {}", nodes_path.display(), e))?;
let mut writer = BufWriter::new(file);
// Write in chunks to keep message sizes reasonable
for chunk in nodes.chunks(100) {
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::node_log::Builder>();
let mut list = log.init_nodes(chunk.len() as u32);
for (i, node) in chunk.iter().enumerate() {
node.to_capnp(list.reborrow().get(i as u32));
}
}
serialize::write_message(&mut writer, &msg)
.map_err(|e| format!("write nodes: {}", e))?;
}
}
// Write fresh relation log
let rels_path = relations_path();
{
let file = fs::File::create(&rels_path)
.map_err(|e| format!("create {}: {}", rels_path.display(), e))?;
let mut writer = BufWriter::new(file);
let rels: Vec<_> = store.relations.iter().filter(|r| !r.deleted).cloned().collect();
if !rels.is_empty() {
for chunk in rels.chunks(100) {
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::relation_log::Builder>();
let mut list = log.init_relations(chunk.len() as u32);
for (i, rel) in chunk.iter().enumerate() {
rel.to_capnp(list.reborrow().get(i as u32));
}
}
serialize::write_message(&mut writer, &msg)
.map_err(|e| format!("write relations: {}", e))?;
}
}
}
// Nuke caches so next load rebuilds from fresh logs
for p in [state_path(), snapshot_path()] {
if p.exists() {
fs::remove_file(&p).ok();
}
}
Ok(())
}
/// Check and repair corrupt capnp log files.
///
/// Reads each message sequentially, tracking file position. On the first
/// corrupt message, truncates the file to the last good position. Also
/// removes stale caches so the next load replays from the repaired log.
pub fn fsck() -> Result<(), String> {
let mut any_corrupt = false;
for (path, kind) in [
(nodes_path(), "node"),
(relations_path(), "relation"),
] {
if !path.exists() { continue; }
let file = fs::File::open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let file_len = file.metadata()
.map_err(|e| format!("stat {}: {}", path.display(), e))?.len();
let mut reader = BufReader::new(file);
let mut good_messages = 0u64;
let mut last_good_pos = 0u64;
loop {
let pos = reader.stream_position()
.map_err(|e| format!("tell {}: {}", path.display(), e))?;
let msg = match serialize::read_message(&mut reader, message::ReaderOptions::new()) {
Ok(m) => m,
Err(_) => {
// read_message fails at EOF (normal) or on corrupt framing
if pos < file_len {
// Not at EOF — corrupt framing
eprintln!("{}: corrupt message at offset {}, truncating", kind, pos);
any_corrupt = true;
drop(reader);
let file = fs::OpenOptions::new().write(true).open(&path)
.map_err(|e| format!("open for truncate: {}", e))?;
file.set_len(pos)
.map_err(|e| format!("truncate {}: {}", path.display(), e))?;
eprintln!("{}: truncated from {} to {} bytes ({} good messages)",
kind, file_len, pos, good_messages);
}
break;
}
};
// Validate the message content too
let valid = if kind == "node" {
msg.get_root::<memory_capnp::node_log::Reader>()
.and_then(|l| l.get_nodes().map(|_| ()))
.is_ok()
} else {
msg.get_root::<memory_capnp::relation_log::Reader>()
.and_then(|l| l.get_relations().map(|_| ()))
.is_ok()
};
if valid {
good_messages += 1;
last_good_pos = reader.stream_position()
.map_err(|e| format!("tell {}: {}", path.display(), e))?;
} else {
eprintln!("{}: corrupt message content at offset {}, truncating to {}",
kind, pos, last_good_pos);
any_corrupt = true;
drop(reader);
let file = fs::OpenOptions::new().write(true).open(&path)
.map_err(|e| format!("open for truncate: {}", e))?;
file.set_len(last_good_pos)
.map_err(|e| format!("truncate {}: {}", path.display(), e))?;
eprintln!("{}: truncated from {} to {} bytes ({} good messages)",
kind, file_len, last_good_pos, good_messages);
break;
}
}
if !any_corrupt {
eprintln!("{}: {} messages, all clean", kind, good_messages);
}
}
if any_corrupt {
// Nuke caches so next load replays from the repaired logs
for p in [state_path(), snapshot_path()] {
if p.exists() {
fs::remove_file(&p)
.map_err(|e| format!("remove {}: {}", p.display(), e))?;
eprintln!("removed stale cache: {}", p.display());
}
}
eprintln!("repair complete — run `poc-memory status` to verify");
} else {
eprintln!("store is clean");
}
Ok(())
}

Some files were not shown because too many files have changed in this diff Show more