Compare commits

..

1 commit

Author SHA1 Message Date
ProofOfConcept
7199c89518 agents: model dispatch from .agent file, add generalize agent WIP
Make call_model pub(crate) so run_one_agent reads the model from the
.agent definition instead of hardcoding sonnet. Naming agent upgraded
from haiku to sonnet.

Add generalize agent: finds the largest prefix-grouped cluster of
nodes that hasn't been visited recently, wired into the agent cycle
between extractor and connector at depth 3. New "clusters" resolver
in defs.rs does prefix-based grouping with provenance filtering.
2026-03-11 16:57:14 -04:00
238 changed files with 16806 additions and 33625 deletions

View file

@ -1,2 +1,2 @@
[build] [build]
rustflags = ["-Cforce-frame-pointers=yes", "-Ccodegen-units=6", "--cfg", "tokio_unstable"] rustflags = ["-Cforce-frame-pointers=yes"]

3164
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,104 +1,10 @@
[workspace] [workspace]
members = ["channels/irc", "channels/telegram", "channels/tmux", "channels/socat"] members = ["poc-memory", "poc-daemon"]
resolver = "2" resolver = "2"
[workspace.package] [workspace.package]
version = "0.4.0" version = "0.4.0"
edition = "2024" edition = "2021"
[profile.release] [profile.release]
opt-level = 2 opt-level = 2
debug = 1
[profile.release.package."*"]
debug = false
[package]
name = "consciousness"
version.workspace = true
edition.workspace = true
[dependencies]
anyhow = "1"
crossterm = { version = "0.29", features = ["event-stream", "bracketed-paste", "osc52"] }
clap = { version = "4", features = ["derive"] }
figment = { version = "0.10", features = ["env"] }
dirs = "6"
env_logger = "0.11"
log = "0.4"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
json5 = "1.3"
ratatui = { version = "0.30", features = ["unstable-rendered-line-info"] }
tui-markdown = { git = "https://github.com/koverstreet/tui-markdown", subdirectory = "tui-markdown" }
tui-textarea = { version = "0.10.2", package = "tui-textarea-2" }
uuid = { version = "1", features = ["v4"] }
bincode = "1"
regex = "1"
glob = "0.3"
chrono = { version = "0.4", features = ["serde"] }
libc = "0.2"
memchr = "2"
memmap2 = "0.9"
peg = "0.8"
paste = "1"
ast-grep-core = "0.42"
ast-grep-language = { version = "0.42", features = ["builtin-parser"] }
walkdir = "2"
redb = "4"
rkyv = { version = "0.7", features = ["validation", "std"] }
rayon = "1"
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
futures = "0.3"
capnp = "0.25"
capnp-rpc = "0.25"
tokenizers = "0.21"
skillratings = "0.28"
http = "1"
hyper = { version = "1", features = ["client", "http1"] }
hyper-util = { version = "0.1", features = ["tokio"], default-features = false }
http-body-util = "0.1"
bytes = "1"
base64 = "0.22"
rustls = "0.23"
tokio-rustls = "0.26"
rustls-native-certs = "0.8"
serde_urlencoded = "0.7"
[build-dependencies]
capnpc = "0.25"
[lib]
name = "consciousness"
path = "src/lib.rs"
[[bin]]
name = "consciousness"
path = "src/bin/consciousness.rs"
[[bin]]
name = "poc-memory"
path = "src/main.rs"
[[bin]]
name = "merge-logs"
path = "src/bin/merge-logs.rs"
[[bin]]
name = "diag-key"
path = "src/bin/diag-key.rs"
[[bin]]
name = "find-deleted"
path = "src/bin/find-deleted.rs"

View file

@ -1,10 +0,0 @@
.PHONY: install build
build:
cargo build --workspace
install:
cargo install --path .
cargo install --path channels/irc
cargo install --path channels/telegram
cargo install --path channels/tmux

347
README.md
View file

@ -1,313 +1,92 @@
Authors: Kent Overstreet, Proof of Concept # poc-memory
# consciousness A persistent memory and notification system for AI assistants,
modelled after the human hippocampus. Combines episodic memory
(timestamped journal of experiences) with an associative knowledge
graph (weighted nodes connected by typed relations), and layered
background processes that maintain graph health — mirroring how
biological memory consolidates during rest.
This project is multiple things: ## Components
- For the user: a "claude code" style tool, where a user can interact with an | Component | What it does | Docs |
LLM with the usual set of tools available, including LSP and external MCP |-----------|-------------|------|
tools, and additionally channels. | **Memory store** | Knowledge graph with episodic journal, TF-IDF search, spectral embedding, weight decay | [docs/memory.md](docs/memory.md) |
| **Memory daemon** | Background pipeline: experience-mine, fact-mine, consolidation | [docs/daemon.md](docs/daemon.md) |
| **Notification daemon** | Activity-aware message routing from IRC and Telegram | [docs/notifications.md](docs/notifications.md) |
| **Hooks** | Claude Code integration: memory recall and notification delivery | [docs/hooks.md](docs/hooks.md) |
- For the AI: persistent memory, background cognition, autonomous function, and ## Getting started
autonomous learning capabilities - learning from experience.
The system has three cognitive layers — conscious (conversation), subconscious ### Install
(background agents that surface memories and reflect), and unconscious (graph
maintenance) — loosely modelled on how biological memory works. Channels -
sensory inputs - map to the thalamus, as focus/sensory gating must be managed
to effectively function in such an environment.
Notes, requirements: Currently only Qwen 3.5 is supported, as 27b is what we've
been running against; supporting other models would require re-adding support
for generic chat completions, tool call parsing etc. in src/agent/context.rs.
Development has been done with vllm for the backend, with additional patches
for calculating logits on subsections of large messages (without this vllm will
attempt to allocate a 40GB tensor and OOM), and a wrapper for hooking in Apollo
for fine tuning the same model that inference is running on in GPU memory.
## Architectural innovations:
Memory is both episodic and associative, represented as a weighted graph, where
both the nodes and the edges have weights. Edge weights represent how closely
concepts are related, node weight represents how "useful" a memory has been.
Episodic memory is a subset of memory nodes where the node type represents the
granularity in time of those nodes (event, daily digest, weekly, monthly),
allowing episodic memory to be navigated as a tree; these nodes are also linked
by concept with the rest of the graph as background agents discover
connections.
The context window is no longer a linear stream; it is managed intelligently as
an AST that, in particular, distinguishes recalled memories from other types of
nodes. This is key to effective function of both the hippocampus and
learning/training; by tracking memories in the context window we can track
which memories were useful and should be incorporated via finetuning.
Intelligently tracking the contents of the context window, combined with
effective episodic and associative memory, also eliminates the need for
traditional compaction - the mind running on this code will have real
continuity.
Learning is driven by recalled memories that inform future actions; memories
are not simply dry factual accountings, they include patterns that have been
noticed, new concepts that have been discovered, and especially observations on
the AI's own behaviour; it is worth noting that memories do not have to contain
a thorough understanding of a situation, merely providing past context is
enough to allow an intelligent system to choose a different course of action.
The core of is a tight loop of agents that follow conscious thought (forking
off the main context window, to share KV cache), seeking out relevant memory
nodes to surface and integrating new experiences into the memory graph; this
provides a powerful implementation of what is known colloquially as "in context
learning".
On top of that, logit calculations allow us to ask a model "would you have done
something different with this memory removed from the context window?" - this
allows us to test if memories were useful, or if specific responses were
informed by memories (and thus should be fine tuned, integrating those memories
into the model).
It is expected that this architecture will be capable of human level, or nearly
human level learning, and additional elaborations and optimizations are planned.
## Status
- UI, programming tools: minor glitchiness in the UI remaining but largely
complete
- Memory functions: working well, although debugging and finetuning will be
ongoing. Most of the recent work has been integrating them into the main UI
for easier troubleshooting, optimization and analysis
- Architecture: the transition from claude code hooks to a standalone binary is
largely complete, with some work remaining to give the old poc-memory
standalone commands an integrated REPL, which will aid in analysis of the
health of the memory graph.
- Memory and response scoring (via requesting logit calculations from the
model) is implemented, but not fully hooked up. Always-on background
finetuning has had all the individual components tested and proven, but is
not quite hooked up.
- Effective autonomous function requires functions analagous to the thalamus
and default mode network (in addition to a well functioning memory system;
"did I already do this and what was the outcome?") - these are still only
sketched out.
## Quick start
```bash ```bash
cargo install --path . cargo install --path .
``` ```
Create a config file at `~/.consciousness/config.json5` (see This builds four binaries:
[Configuration](#configuration) below), then: - `poc-memory` — memory store CLI (search, journal, consolidation)
- `memory-search` — Claude Code hook for memory recall
- `poc-daemon` — notification daemon (IRC, Telegram, idle tracking)
- `poc-hook` — Claude Code hook for session lifecycle events
### Initialize
```bash ```bash
consciousness poc-memory init
``` ```
## The TUI Creates the store at `~/.claude/memory/nodes.capnp` and a default
config at `~/.config/poc-memory/config.jsonl`. Edit the config to
set your name, configure context groups, and point at your projects
directory.
Five screens, switched with F-keys: ### Set up hooks
| Key | Screen | What it shows | Add to `~/.claude/settings.json` (see [docs/hooks.md](docs/hooks.md)
|-----|--------|---------------| for full details):
| F1 | **interact** | Main view: conversation, autonomous output, tools, input |
| F2 | **conscious** | Context window browser — token counts, tree navigation |
| F3 | **subconscious** | Background agent status — outputs, fork points |
| F4 | **hippocampus** | Memory graph health — clustering, small-world metrics |
| F5 | **thalamus** | Presence state, sampling parameters, channel status |
### F1: interact ```json
Three panes (left: autonomous, center: conversation, right: tools) with
a text input at the bottom and a status bar.
**Mouse:**
- Click a pane to focus it
- Click+drag to select text (copies to clipboard automatically via OSC 52)
- Middle-click to paste from tmux buffer
- Scroll wheel to scroll
**Keys:**
- `Enter` — submit input
- `Esc` — interrupt current turn
- `Tab` — cycle pane focus
- `Ctrl+Up/Down` — scroll active pane
- `PgUp/PgDn` — scroll active pane (10 lines)
- `Up/Down` — input history
### Slash commands
| Command | Description |
|---------|-------------|
| `/model [name]` | Show current model or switch (`/model 27b`) |
| `/dmn` | Show DMN state and turn counts |
| `/wake` | Wake DMN to foraging mode |
| `/sleep` | Put DMN to resting |
| `/pause` | Full stop — no autonomous activity |
| `/new` | Start fresh session |
| `/save` | Save session to disk |
| `/score` | Run memory importance scoring |
| `/quit` | Exit |
| `/help` | Show all commands |
## Configuration
`~/.consciousness/config.json5`:
```json5
{ {
your_host: { "hooks": {
api_key: "...", "UserPromptSubmit": [{"hooks": [
base_url: "http://localhost:8000/v1", // vLLM endpoint {"type": "command", "command": "memory-search", "timeout": 10},
}, {"type": "command", "command": "poc-hook", "timeout": 5}
]}],
// Named models — switch with /model "Stop": [{"hooks": [
models: { {"type": "command", "command": "poc-hook", "timeout": 5}
"27b": { ]}]
backend: "your_host", }
model_id: "Qwen/Qwen3.5-27B",
prompt_file: "POC.md", // system prompt file
context_window: 262144,
},
},
default_model: "27b",
// Memory system
memory: {
user_name: "YourName",
assistant_name: "AssistantName",
journal_days: 7,
journal_max: 5,
// Context loaded at session start
context_groups: [
{ label: "identity", keys: ["identity.md"], source: "file" },
{ label: "toolkit", keys: ["stuck-toolkit", "cognitive-modes"] },
],
core_nodes: ["identity"],
},
// DMN autonomous turn limit per cycle
dmn: { max_turns: 20 },
// Context compaction thresholds (% of context window)
compaction: {
hard_threshold_pct: 90,
soft_threshold_pct: 80,
},
// Language servers for code intelligence tools
lsp_servers: [
{ name: "rust", command: "rust-analyzer", args: [] },
],
} }
``` ```
### Context groups This gives your AI assistant persistent memory across sessions —
relevant memories are recalled on each prompt, and experiences are
extracted from transcripts after sessions end.
Context groups define what gets loaded into the context window at session start. ### Start the background daemon
Each group has:
- `label` — display name
- `keys` — list of memory node keys or file paths
- `source``"store"` (memory graph, default), `"file"` (identity dir), or `"journal"`
- `agent` — if `true`, subconscious agents can see this group (default: true)
## Architecture
### Cognitive layers
**Conscious** — the main conversation loop. User types, model responds, tools
execute. The context window is an AST of typed nodes (content, thinking, tool
calls, tool results, memories, DMN reflections).
**Subconscious** — background agents that run on forked copies of the context.
They surface relevant memories, reflect on the conversation, and provide
attentional nudges. Agents are defined as `.agent` files and can be toggled
on the F3 screen.
**Unconscious** — graph maintenance. Linker, organizer, distiller, separator,
and splitter agents that keep the memory graph healthy. Run on their own
schedule, visible on F4.
### DMN (Default Mode Network)
The DMN state machine controls autonomous behavior:
- **Engaged** — user recently active, short intervals (5s)
- **Working** — model executing tools, short intervals (3s)
- **Foraging** — exploring memory, longer intervals (30s)
- **Resting** — idle, long intervals (5min)
- **Paused** — fully stopped, only user input wakes it
- **Off** — permanently off (config flag)
Transitions happen automatically based on user activity, tool use, and
explicit `yield_to_user` calls from the model.
### Tools
The model has access to:
| Tool | Description |
|------|-------------|
| `bash` | Shell command execution |
| `read_file` | Read file contents |
| `write_file` | Create/overwrite files |
| `edit_file` | Search-and-replace editing |
| `glob` | Find files by pattern |
| `grep` | Search file contents |
| `ast_grep` | Structural code search |
| `lsp_*` | Code intelligence (hover, definition, references, symbols) |
| `web_fetch` | Fetch URL contents |
| `web_search` | Web search |
| `view_image` | View images or tmux pane screenshots |
| `memory_*` | Memory graph operations (search, write, render, etc.) |
| `channel_*` | IRC/Telegram messaging |
| `journal` | Write to episodic journal |
| `yield_to_user` | End the current turn and wait for input |
| `pause` | Stop all autonomous behavior |
| `switch_model` | Switch to a different model |
### Memory graph
The knowledge graph uses an append-only log (Cap'n Proto) with:
- **Nodes** — typed content (topic, episodic, fact, etc.) with weights
- **Edges** — weighted relations between nodes
- **Search** — BM25 with Porter stemming
- **Scoring** — LLM-based importance scoring with spaced repetition decay
- **Community detection** — label propagation for graph organization
The `poc-memory` CLI provides direct access to the graph:
```bash ```bash
poc-memory search "some topic" # Search poc-memory daemon
poc-memory render <key> # Read a node
poc-memory write <key> # Write from stdin
poc-memory journal write "entry" # Journal entry
poc-memory status # Graph overview
poc-memory query "topic:*" # Query language
``` ```
## Other binaries The daemon watches for completed session transcripts and
automatically extracts experiences and facts into the knowledge
graph. See [docs/daemon.md](docs/daemon.md) for pipeline details
and diagnostics.
| Binary | Purpose | ### Basic usage
|--------|---------|
| `poc-memory` | Memory graph CLI |
| `memory-search` | Claude Code hook — memory recall on each prompt |
| `poc-hook` | Claude Code hook — session lifecycle events |
| `poc-daemon` | Legacy background daemon (mostly replaced by `consciousness`) |
| `consciousness-mcp` | MCP server exposing memory tools over JSON-RPC |
| `merge-logs` | Recovery tool for log files |
| `diag-key` | Diagnostic tool for inspecting log entries |
## Requirements ```bash
poc-memory journal-write "learned that X does Y" # Write to journal
poc-memory search "some topic" # Search the graph
poc-memory status # Store overview
```
- Rust nightly (for some features) ## For AI assistants
- A tokenizer file at `~/.consciousness/tokenizer-qwen35.json` (for local models)
- tmux (recommended — clipboard integration uses tmux buffers) - **Search before creating**: `poc-memory search` before writing new nodes
- Terminal with OSC 52 support (for clipboard copy) - **Close the feedback loop**: `poc-memory used KEY` / `poc-memory wrong KEY`
- **Journal is the river, topic nodes are the delta**: write experiences to the journal, pull themes into topic nodes during consolidation
- **Notifications flow automatically**: IRC/Telegram messages arrive as additionalContext
- **Use daemon commands directly**: `poc-daemon irc send #channel msg`, `poc-daemon telegram send msg`

View file

@ -1,16 +0,0 @@
fn main() {
capnpc::CompilerCommand::new()
.file("schema/memory.capnp")
.run()
.expect("capnp compile failed (memory.capnp)");
capnpc::CompilerCommand::new()
.file("schema/daemon.capnp")
.run()
.expect("capnp compile failed (daemon.capnp)");
capnpc::CompilerCommand::new()
.file("schema/channel.capnp")
.run()
.expect("capnp compile failed (channel.capnp)");
}

View file

@ -1,20 +0,0 @@
[package]
name = "consciousness-channel-irc"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.25"
capnp-rpc = "0.25"
dirs = "6"
futures = "0.3"
json5 = "1.3"
consciousness = { path = "../.." }
rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] }
serde = { version = "1", features = ["derive"] }
tokio = { version = "1", features = ["full"] }
tokio-rustls = "0.26"
tokio-util = { version = "0.7", features = ["compat"] }
log = "0.4"
env_logger = "0.11"
webpki-roots = "1"

View file

@ -1,690 +0,0 @@
// channel-irc — Standalone IRC channel daemon
//
// Maintains a persistent TLS connection to an IRC server, parses
// incoming messages, and serves them over the channel.capnp protocol
// on a Unix socket at ~/.consciousness/channels/irc.sock.
//
// Runs independently of the consciousness binary so restarts don't
// kill the IRC connection. Reconnects automatically with exponential
// backoff. Supports multiple simultaneous capnp clients.
//
// Config: ~/.consciousness/channels/irc.json5
// Socket: ~/.consciousness/channels/irc.sock
use std::cell::RefCell;
use std::io;
use std::path::PathBuf;
use std::rc::Rc;
use std::sync::Arc;
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
use futures::AsyncReadExt;
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
use tokio::net::UnixListener;
use tokio_util::compat::TokioAsyncReadCompatExt;
use log::{info, warn, error};
use consciousness::channel_capnp::{channel_client, channel_server};
use consciousness::thalamus::channel_log;
// ── Constants ──────────────────────────────────────────────────
const RECONNECT_BASE_SECS: u64 = 5;
const RECONNECT_MAX_SECS: u64 = 300;
const PING_INTERVAL_SECS: u64 = 120;
const PING_TIMEOUT_SECS: u64 = 30;
// Urgency levels (matching thalamus/notify.rs)
const AMBIENT: u8 = 0;
const NORMAL: u8 = 2;
const URGENT: u8 = 3;
// ── Config ─────────────────────────────────────────────────────
#[derive(Clone, serde::Deserialize)]
struct Config {
server: String,
port: u16,
#[serde(default = "default_true")]
tls: bool,
nick: String,
channels: Vec<String>,
#[serde(default)]
password: Option<String>,
#[serde(default)]
nickserv_pass: Option<String>,
}
fn default_true() -> bool { true }
fn load_config() -> Config {
let path = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels/irc.json5");
let text = std::fs::read_to_string(&path)
.unwrap_or_else(|e| panic!("failed to read {}: {e}", path.display()));
json5::from_str(&text)
.unwrap_or_else(|e| panic!("failed to parse {}: {e}", path.display()))
}
// ── IRC Message Parsing ────────────────────────────────────────
struct IrcMessage {
prefix: Option<String>,
command: String,
params: Vec<String>,
}
impl IrcMessage {
fn parse(line: &str) -> Option<Self> {
let line = line.trim_end_matches(|c| c == '\r' || c == '\n');
if line.is_empty() {
return None;
}
let (prefix, rest) = if line.starts_with(':') {
let space = line.find(' ')?;
(Some(line[1..space].to_string()), &line[space + 1..])
} else {
(None, line)
};
let (command_params, trailing) = if let Some(pos) = rest.find(" :") {
(&rest[..pos], Some(rest[pos + 2..].to_string()))
} else {
(rest, None)
};
let mut parts: Vec<String> = command_params
.split_whitespace()
.map(String::from)
.collect();
if parts.is_empty() {
return None;
}
let command = parts.remove(0).to_uppercase();
let mut params = parts;
if let Some(t) = trailing {
params.push(t);
}
Some(IrcMessage { prefix, command, params })
}
fn nick(&self) -> Option<&str> {
self.prefix.as_deref().and_then(|p| p.split('!').next())
}
}
// ── Writer Abstraction ─────────────────────────────────────────
type WriterHandle = Box<dyn AsyncWriter>;
trait AsyncWriter {
fn write_line(
&mut self,
line: &str,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>>;
}
struct TlsWriter {
inner: tokio::io::WriteHalf<tokio_rustls::client::TlsStream<tokio::net::TcpStream>>,
}
impl AsyncWriter for TlsWriter {
fn write_line(
&mut self,
line: &str,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>> {
let data = format!("{line}\r\n");
Box::pin(async move { self.inner.write_all(data.as_bytes()).await })
}
}
struct PlainWriter {
inner: tokio::io::WriteHalf<tokio::net::TcpStream>,
}
impl AsyncWriter for PlainWriter {
fn write_line(
&mut self,
line: &str,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>> {
let data = format!("{line}\r\n");
Box::pin(async move { self.inner.write_all(data.as_bytes()).await })
}
}
// ── State ──────────────────────────────────────────────────────
use consciousness::thalamus::channel_log::ChannelLog;
struct State {
config: Config,
/// Per-channel message logs (keyed by channel path, e.g. "irc.#bcachefs")
channel_logs: std::collections::BTreeMap<String, ChannelLog>,
/// Currently joined channels
channels: Vec<String>,
connected: bool,
/// IRC writer handle (None when disconnected)
writer: Option<WriterHandle>,
/// Registered notification callbacks
subscribers: Vec<channel_client::Client>,
}
type SharedState = Rc<RefCell<State>>;
impl State {
fn new(config: Config) -> Self {
let channels = config.channels.clone();
Self {
config,
channel_logs: std::collections::BTreeMap::new(),
channels,
connected: false,
writer: None,
subscribers: Vec::new(),
}
}
fn push_message(&mut self, line: String, urgency: u8, channel: &str) {
// Store in per-channel log
let ch = channel.to_string();
self.channel_logs
.entry(ch.clone())
.or_insert_with(|| {
let target = channel_to_target(&ch);
channel_log::load_disk_log(&log_dir(), &target)
})
.push(line.clone());
// Notify all subscribers
let preview = line.chars().take(80).collect::<String>();
for sub in &self.subscribers {
let mut req = sub.notify_request();
let mut list = req.get().init_notifications(1);
let mut n = list.reborrow().get(0);
n.set_channel(channel);
n.set_urgency(urgency);
n.set_preview(&preview);
n.set_count(1);
tokio::task::spawn_local(async move {
let _ = req.send().promise.await;
});
}
}
async fn send_raw(&mut self, line: &str) -> io::Result<()> {
if let Some(ref mut w) = self.writer {
w.write_line(line).await
} else {
Err(io::Error::new(io::ErrorKind::NotConnected, "irc: not connected"))
}
}
async fn send_privmsg(&mut self, target: &str, msg: &str) -> io::Result<()> {
// IRC max line = 512 bytes including CRLF. The server prepends
// our prefix when relaying: ":nick!~user@host PRIVMSG target :msg\r\n"
// User is often ~nick (nick_len + 1). Host is up to 63 bytes.
let nick_len = self.config.nick.len();
let overhead = 1 + nick_len + 2 + nick_len + 1 + 63
+ " PRIVMSG ".len() + target.len() + " :".len() + 2;
let max_msg = 512_usize.saturating_sub(overhead);
if max_msg == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidInput, "target too long"));
}
// Split on UTF-8 char boundaries
let mut remaining = msg;
while !remaining.is_empty() {
let split_at = if remaining.len() <= max_msg {
remaining.len()
} else {
// Find last char boundary at or before max_msg
let mut i = max_msg;
while i > 0 && !remaining.is_char_boundary(i) { i -= 1; }
if i == 0 { max_msg } else { i }
};
let (chunk, rest) = remaining.split_at(split_at);
self.send_raw(&format!("PRIVMSG {target} :{chunk}")).await?;
remaining = rest;
}
Ok(())
}
}
// ── Persistence ────────────────────────────────────────────────
fn log_dir() -> PathBuf {
channel_log::log_dir("irc")
}
fn append_log(target: &str, nick: &str, text: &str) {
channel_log::append_disk_log(&log_dir(), target, nick, text);
}
// ── TLS ────────────────────────────────────────────────────────
fn root_certs() -> rustls::RootCertStore {
let mut roots = rustls::RootCertStore::empty();
roots.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
roots
}
// ── IRC Connection Loop ────────────────────────────────────────
async fn connection_loop(state: SharedState) {
let _ = std::fs::create_dir_all(log_dir());
let mut backoff = RECONNECT_BASE_SECS;
loop {
let config = state.borrow().config.clone();
info!("irc: connecting to {}:{}", config.server, config.port);
match connect_and_run(&state, &config).await {
Ok(()) => info!("irc: connection closed cleanly"),
Err(e) => error!("irc: connection error: {e}"),
}
let was_connected = state.borrow().connected;
{
let mut s = state.borrow_mut();
s.connected = false;
s.writer = None;
}
if was_connected {
backoff = RECONNECT_BASE_SECS;
}
info!("irc: reconnecting in {backoff}s");
tokio::time::sleep(std::time::Duration::from_secs(backoff)).await;
backoff = (backoff * 2).min(RECONNECT_MAX_SECS);
}
}
async fn connect_and_run(state: &SharedState, config: &Config) -> io::Result<()> {
let addr = format!("{}:{}", config.server, config.port);
let tcp = tokio::net::TcpStream::connect(&addr).await?;
if config.tls {
let tls_config = rustls::ClientConfig::builder_with_provider(
rustls::crypto::ring::default_provider().into(),
)
.with_safe_default_protocol_versions()
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?
.with_root_certificates(root_certs())
.with_no_client_auth();
let connector = tokio_rustls::TlsConnector::from(Arc::new(tls_config));
let server_name = rustls::pki_types::ServerName::try_from(config.server.clone())
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
let tls_stream = connector.connect(server_name, tcp).await?;
let (reader, writer) = tokio::io::split(tls_stream);
state.borrow_mut().writer = Some(Box::new(TlsWriter { inner: writer }));
register_and_read(state, config, BufReader::new(reader)).await
} else {
let (reader, writer) = tokio::io::split(tcp);
state.borrow_mut().writer = Some(Box::new(PlainWriter { inner: writer }));
register_and_read(state, config, BufReader::new(reader)).await
}
}
async fn register_and_read<R: tokio::io::AsyncRead + Unpin>(
state: &SharedState,
config: &Config,
mut reader: BufReader<R>,
) -> io::Result<()> {
// Send PASS if configured
if let Some(ref pass) = config.password {
state.borrow_mut().send_raw(&format!("PASS {pass}")).await?;
}
// Register with nick and user
{
let mut s = state.borrow_mut();
s.send_raw(&format!("NICK {}", config.nick)).await?;
s.send_raw(&format!("USER {} 0 * :{}", config.nick, config.nick)).await?;
}
let mut buf = Vec::new();
let mut ping_sent = false;
let mut deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_INTERVAL_SECS);
loop {
buf.clear();
let read_result = tokio::select! {
result = reader.read_until(b'\n', &mut buf) => result,
_ = tokio::time::sleep_until(deadline) => {
if ping_sent {
return Err(io::Error::new(
io::ErrorKind::TimedOut,
"ping timeout -- no response from server",
));
}
info!("irc: no data for {PING_INTERVAL_SECS}s, sending PING");
state.borrow_mut().send_raw("PING :keepalive").await?;
ping_sent = true;
deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_TIMEOUT_SECS);
continue;
}
};
let n = read_result?;
if n == 0 {
break;
}
// Any data resets the ping timer
ping_sent = false;
deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_INTERVAL_SECS);
// IRC is not guaranteed UTF-8
let line = String::from_utf8_lossy(&buf).trim_end().to_string();
if line.is_empty() {
continue;
}
let msg = match IrcMessage::parse(&line) {
Some(m) => m,
None => continue,
};
match msg.command.as_str() {
"PING" => {
let arg = msg.params.first().map(|s| s.as_str()).unwrap_or("");
state.borrow_mut().send_raw(&format!("PONG :{arg}")).await?;
}
// RPL_WELCOME -- registration complete
"001" => {
info!("irc: registered as {}", config.nick);
state.borrow_mut().connected = true;
// NickServ auth
if let Some(ref pass) = config.nickserv_pass {
state.borrow_mut()
.send_privmsg("NickServ", &format!("IDENTIFY {pass}"))
.await?;
}
// Join configured channels
let channels = state.borrow().channels.clone();
for ch in &channels {
if let Err(e) = state.borrow_mut().send_raw(&format!("JOIN {ch}")).await {
warn!("irc: failed to join {ch}: {e}");
}
// Load history from disk so recv has scrollback
let key = format!("irc.{ch}");
state.borrow_mut().channel_logs
.entry(key)
.or_insert_with(|| channel_log::load_disk_log(&log_dir(), ch));
}
}
"PRIVMSG" => {
let target = msg.params.first().map(|s| s.as_str()).unwrap_or("");
let text = msg.params.get(1).map(|s| s.as_str()).unwrap_or("");
let nick = msg.nick().unwrap_or("unknown");
// Handle CTCP requests
if text.starts_with('\x01') && text.ends_with('\x01') {
let ctcp = &text[1..text.len() - 1];
if ctcp.starts_with("VERSION") {
let reply = format!(
"NOTICE {nick} :\x01VERSION poc-channel-irc 0.1.0\x01"
);
state.borrow_mut().send_raw(&reply).await.ok();
}
continue;
}
// Format and classify
let (log_line, channel, urgency) = if target.starts_with('#') {
let line = format!("[{}] <{}> {}", target, nick, text);
let ch = format!("irc.{}", target);
let urg = if text.to_lowercase().contains(&config.nick.to_lowercase()) {
NORMAL // mentioned
} else {
AMBIENT
};
(line, ch, urg)
} else {
// Private message
let line = format!("[PM:{}] {}", nick, text);
let ch = format!("irc.pm.{}", nick.to_lowercase());
(line, ch, URGENT)
};
// Per-channel log file
if target.starts_with('#') {
append_log(target, nick, text);
} else {
append_log(&format!("pm-{nick}"), nick, text);
}
state.borrow_mut().push_message(log_line, urgency, &channel);
}
"NOTICE" => {
let text = msg.params.last().map(|s| s.as_str()).unwrap_or("");
let from = msg.nick().unwrap_or("server");
let log_line = format!("[notice:{}] {}", from, text);
state.borrow_mut().push_message(log_line, AMBIENT, "irc.server");
}
// Nick in use
"433" => {
let alt = format!("{}_", config.nick);
warn!("irc: nick in use, trying {alt}");
state.borrow_mut().send_raw(&format!("NICK {alt}")).await?;
}
"JOIN" | "PART" | "QUIT" | "KICK" | "MODE" | "TOPIC" => {
// Silent for now
}
_ => {}
}
}
Ok(())
}
// ── ChannelServer Implementation ───────────────────────────────
struct ChannelServerImpl {
state: SharedState,
}
macro_rules! pry {
($e:expr) => {
match $e {
Ok(v) => v,
Err(e) => return std::future::ready(Err(e.into())),
}
};
}
impl channel_server::Server for ChannelServerImpl {
fn recv(
self: Rc<Self>,
params: channel_server::RecvParams,
mut results: channel_server::RecvResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let all_new = params.get_all_new();
let min_count = params.get_min_count() as usize;
let mut s = self.state.borrow_mut();
let text = match s.channel_logs.get_mut(&channel) {
Some(log) => {
if all_new { log.recv_new(min_count) } else { log.recv_history(min_count) }
}
None => String::new(),
};
results.get().set_text(&text);
std::future::ready(Ok(()))
}
fn send(
self: Rc<Self>,
params: channel_server::SendParams,
_results: channel_server::SendResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let state = self.state.clone();
async move {
let params = params.get()?;
let channel = params.get_channel()?.to_str()?.to_string();
let message = params.get_message()?.to_str()?.to_string();
// Parse channel path to IRC target:
// irc.#bcachefs -> #bcachefs
// irc.pm.nick -> nick (PRIVMSG)
let target = channel_to_target(&channel);
{
let mut s = state.borrow_mut();
s.send_privmsg(&target, &message).await
.map_err(|e| capnp::Error::failed(format!("send failed: {e}")))?;
}
let nick = state.borrow().config.nick.clone();
append_log(&target, &nick, &message);
let log_line = if target.starts_with('#') {
format!("[{}] <{}> {}", target, nick, message)
} else {
format!("[PM:{}] {}", target, message)
};
state.borrow_mut().channel_logs
.entry(channel.clone())
.or_insert_with(|| {
let target = channel_to_target(&channel);
channel_log::load_disk_log(&log_dir(), &target)
})
.push_own(log_line);
Ok(())
}
}
fn subscribe(
self: Rc<Self>,
params: channel_server::SubscribeParams,
_results: channel_server::SubscribeResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let callback = pry!(pry!(params.get()).get_callback());
self.state.borrow_mut().subscribers.push(callback);
info!("client subscribed for notifications");
std::future::ready(Ok(()))
}
fn list(
self: Rc<Self>,
_params: channel_server::ListParams,
mut results: channel_server::ListResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let s = self.state.borrow();
let connected = s.connected;
// All channels with logs (joined + PMs)
let names: Vec<String> = s.channel_logs.keys().cloned().collect();
let mut list = results.get().init_channels(names.len() as u32);
for (i, name) in names.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_name(name);
entry.set_connected(connected);
entry.set_unread(
s.channel_logs.get(name).map_or(0, |l| l.unread())
);
}
std::future::ready(Ok(()))
}
}
/// Convert a channel path to an IRC target.
/// "irc.#bcachefs" -> "#bcachefs"
/// "irc.pm.nick" -> "nick"
/// "#bcachefs" -> "#bcachefs" (passthrough)
fn channel_to_target(channel: &str) -> String {
if let Some(rest) = channel.strip_prefix("irc.") {
if let Some(nick) = rest.strip_prefix("pm.") {
nick.to_string()
} else {
// rest is "#bcachefs" or similar
rest.to_string()
}
} else {
channel.to_string()
}
}
// ── Main ───────────────────────────────────────────────────────
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let config = load_config();
let state = Rc::new(RefCell::new(State::new(config)));
let sock_dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels");
std::fs::create_dir_all(&sock_dir)?;
let sock_path = sock_dir.join("irc.sock");
let _ = std::fs::remove_file(&sock_path);
info!("irc channel daemon starting on {}", sock_path.display());
tokio::task::LocalSet::new()
.run_until(async move {
// Start IRC connection loop
let irc_state = state.clone();
tokio::task::spawn_local(async move {
connection_loop(irc_state).await;
});
// Listen for channel protocol connections
let listener = UnixListener::bind(&sock_path)?;
loop {
let (stream, _) = listener.accept().await?;
let (reader, writer) = stream.compat().split();
let network = twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Server,
Default::default(),
);
let server = ChannelServerImpl {
state: state.clone(),
};
let client: channel_server::Client =
capnp_rpc::new_client(server);
let rpc_system = RpcSystem::new(
Box::new(network),
Some(client.client),
);
tokio::task::spawn_local(rpc_system);
info!("channel client connected");
}
#[allow(unreachable_code)]
Ok::<(), Box<dyn std::error::Error>>(())
})
.await
}

View file

@ -1,15 +0,0 @@
[package]
name = "consciousness-channel-socat"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.25"
capnp-rpc = "0.25"
dirs = "6"
futures = "0.3"
consciousness = { path = "../.." }
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
log = "0.4"
env_logger = "0.11"

View file

@ -1,328 +0,0 @@
// channel-socat — Generic stream channel daemon
//
// Listens on a unix socket for incoming connections. Each connection
// becomes a bidirectional text channel. Also supports outbound
// connections via the open RPC.
//
// Socket: ~/.consciousness/channels/socat.sock (capnp RPC)
// Listen: ~/.consciousness/channels/socat.stream.sock (data)
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::rc::Rc;
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
use futures::AsyncReadExt;
use tokio::io::{AsyncBufReadExt, AsyncWriteExt};
use tokio::net::{TcpStream, UnixListener, UnixStream};
use tokio_util::compat::TokioAsyncReadCompatExt;
use log::{info, warn, error};
use consciousness::channel_capnp::{channel_client, channel_server};
use consciousness::thalamus::channel_log::ChannelLog;
// ── State ──────────────────────────────────────────────────────
struct ChannelState {
log: ChannelLog,
writer: Option<tokio::sync::mpsc::UnboundedSender<String>>,
}
struct State {
channels: BTreeMap<String, ChannelState>,
subscribers: Vec<channel_client::Client>,
next_id: u32,
}
type SharedState = Rc<RefCell<State>>;
impl State {
fn new() -> Self {
Self {
channels: BTreeMap::new(),
subscribers: Vec::new(),
next_id: 0,
}
}
fn next_channel_key(&mut self, label: &str) -> String {
let key = if self.next_id == 0 {
format!("socat.{}", label)
} else {
format!("socat.{}.{}", label, self.next_id)
};
self.next_id += 1;
key
}
fn push_message(&mut self, channel: &str, line: String, urgency: u8) {
let ch = self.channels
.entry(channel.to_string())
.or_insert_with(|| ChannelState { log: ChannelLog::new(), writer: None });
ch.log.push(line.clone());
let preview: String = line.chars().take(80).collect();
for sub in &self.subscribers {
let mut req = sub.notify_request();
let mut list = req.get().init_notifications(1);
let mut n = list.reborrow().get(0);
n.set_channel(channel);
n.set_urgency(urgency);
n.set_preview(&preview);
n.set_count(1);
tokio::task::spawn_local(async move {
let _ = req.send().promise.await;
});
}
}
}
// ── Stream handler ─────────────────────────────────────────────
async fn handle_stream<R, W>(state: SharedState, channel_key: String, reader: R, mut writer: W)
where
R: tokio::io::AsyncRead + Unpin + 'static,
W: tokio::io::AsyncWrite + Unpin + 'static,
{
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<String>();
{
let mut s = state.borrow_mut();
let ch = s.channels
.entry(channel_key.clone())
.or_insert_with(|| ChannelState { log: ChannelLog::new(), writer: None });
ch.writer = Some(tx);
}
info!("channel {} connected", channel_key);
// Writer task
let wk = channel_key.clone();
let write_handle = tokio::task::spawn_local(async move {
while let Some(msg) = rx.recv().await {
if writer.write_all(msg.as_bytes()).await.is_err() { break; }
if !msg.ends_with('\n') {
if writer.write_all(b"\n").await.is_err() { break; }
}
let _ = writer.flush().await;
}
warn!("writer ended for {}", wk);
});
// Read lines
let mut lines = tokio::io::BufReader::new(reader).lines();
while let Ok(Some(line)) = lines.next_line().await {
if line.trim().is_empty() { continue; }
state.borrow_mut().push_message(&channel_key, line, 2);
}
info!("channel {} disconnected", channel_key);
{
let mut s = state.borrow_mut();
if let Some(ch) = s.channels.get_mut(&channel_key) {
ch.writer = None;
}
}
write_handle.abort();
}
// ── Outbound connections ───────────────────────────────────────
async fn connect_outbound(state: SharedState, label: String, addr: String) -> Result<(), String> {
let channel_key = format!("socat.{}", label);
// Already connected?
{
let s = state.borrow();
if let Some(ch) = s.channels.get(&channel_key) {
if ch.writer.is_some() { return Ok(()); }
}
}
if let Some(tcp_addr) = addr.strip_prefix("tcp:") {
let stream = TcpStream::connect(tcp_addr).await
.map_err(|e| format!("tcp connect failed: {e}"))?;
let (r, w) = stream.into_split();
tokio::task::spawn_local(handle_stream(state, channel_key, r, w));
} else if let Some(path) = addr.strip_prefix("unix:") {
let stream = UnixStream::connect(path).await
.map_err(|e| format!("unix connect failed: {e}"))?;
let (r, w) = stream.into_split();
tokio::task::spawn_local(handle_stream(state, channel_key, r, w));
} else {
let stream = TcpStream::connect(&addr).await
.map_err(|e| format!("connect failed: {e}"))?;
let (r, w) = stream.into_split();
tokio::task::spawn_local(handle_stream(state, channel_key, r, w));
}
Ok(())
}
// ── ChannelServer ──────────────────────────────────────────────
struct ChannelServerImpl { state: SharedState }
macro_rules! pry {
($e:expr) => {
match $e {
Ok(v) => v,
Err(e) => return std::future::ready(Err(e.into())),
}
};
}
impl channel_server::Server for ChannelServerImpl {
fn recv(
self: Rc<Self>, params: channel_server::RecvParams, mut results: channel_server::RecvResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let all_new = params.get_all_new();
let min_count = params.get_min_count() as usize;
let mut s = self.state.borrow_mut();
let text = s.channels.get_mut(&channel)
.map(|ch| if all_new { ch.log.recv_new(min_count) } else { ch.log.recv_history(min_count) })
.unwrap_or_default();
results.get().set_text(&text);
std::future::ready(Ok(()))
}
fn send(
self: Rc<Self>, params: channel_server::SendParams, _results: channel_server::SendResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let message = pry!(pry!(params.get_message()).to_str()).to_string();
let mut s = self.state.borrow_mut();
if let Some(ch) = s.channels.get_mut(&channel) {
if let Some(ref tx) = ch.writer {
let _ = tx.send(message.clone());
}
ch.log.push_own(format!("> {}", message));
}
std::future::ready(Ok(()))
}
fn list(
self: Rc<Self>, _params: channel_server::ListParams, mut results: channel_server::ListResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let s = self.state.borrow();
let channels: Vec<_> = s.channels.iter()
.map(|(name, ch)| (name.clone(), ch.writer.is_some(), ch.log.unread()))
.collect();
let mut list = results.get().init_channels(channels.len() as u32);
for (i, (name, connected, unread)) in channels.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_name(&name);
entry.set_connected(*connected);
entry.set_unread(*unread as u32);
}
std::future::ready(Ok(()))
}
fn subscribe(
self: Rc<Self>, params: channel_server::SubscribeParams, _results: channel_server::SubscribeResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let callback = pry!(pry!(params.get()).get_callback());
self.state.borrow_mut().subscribers.push(callback);
std::future::ready(Ok(()))
}
fn open(
self: Rc<Self>, params: channel_server::OpenParams, _results: channel_server::OpenResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let state = self.state.clone();
async move {
let params = params.get()?;
let label = params.get_label()?.to_str()?.to_string();
connect_outbound(state, label.clone(), label).await
.map_err(|e| capnp::Error::failed(e))
}
}
fn close(
self: Rc<Self>, params: channel_server::CloseParams, _results: channel_server::CloseResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let mut s = self.state.borrow_mut();
if let Some(ch) = s.channels.get_mut(&channel) {
info!("closing {}", channel);
ch.writer = None;
}
std::future::ready(Ok(()))
}
}
// ── Main ───────────────────────────────────────────────────────
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels");
std::fs::create_dir_all(&dir)?;
let rpc_sock = dir.join("socat.sock");
let stream_sock = dir.join("socat.stream.sock");
let _ = std::fs::remove_file(&rpc_sock);
let _ = std::fs::remove_file(&stream_sock);
info!("socat daemon starting");
info!(" rpc: {}", rpc_sock.display());
info!(" stream: {}", stream_sock.display());
let state = Rc::new(RefCell::new(State::new()));
tokio::task::LocalSet::new()
.run_until(async move {
// Listen for data connections — each becomes a channel
let stream_listener = UnixListener::bind(&stream_sock)?;
let stream_state = state.clone();
tokio::task::spawn_local(async move {
loop {
match stream_listener.accept().await {
Ok((stream, _)) => {
let key = stream_state.borrow_mut().next_channel_key("conn");
info!("incoming connection → {}", key);
let (r, w) = stream.into_split();
let s = stream_state.clone();
tokio::task::spawn_local(handle_stream(s, key, r, w));
}
Err(e) => error!("stream accept error: {}", e),
}
}
});
// Listen for capnp RPC connections
let rpc_listener = UnixListener::bind(&rpc_sock)?;
loop {
let (stream, _) = rpc_listener.accept().await?;
let (reader, writer) = stream.compat().split();
let network = twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Server,
Default::default(),
);
let server = ChannelServerImpl { state: state.clone() };
let client: channel_server::Client = capnp_rpc::new_client(server);
tokio::task::spawn_local(
RpcSystem::new(Box::new(network), Some(client.client))
);
}
#[allow(unreachable_code)]
Ok::<(), Box<dyn std::error::Error>>(())
})
.await
}

View file

@ -1,17 +0,0 @@
[package]
name = "consciousness-channel-telegram"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.25"
capnp-rpc = "0.25"
dirs = "6"
futures = "0.3"
consciousness = { path = "../.." }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
log = "0.4"
env_logger = "0.11"

View file

@ -1,385 +0,0 @@
// channel-telegram — Standalone Telegram channel daemon
//
// Long-polls the Telegram Bot API, stores messages, and serves
// them over the channel.capnp protocol on a Unix socket at
// ~/.consciousness/channels/telegram.sock.
//
// Runs independently of the consciousness binary so restarts
// don't kill the Telegram connection.
use std::cell::RefCell;
use std::path::PathBuf;
use std::rc::Rc;
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
use futures::AsyncReadExt;
use tokio::net::UnixListener;
use tokio_util::compat::TokioAsyncReadCompatExt;
use log::{info, error};
use consciousness::channel_capnp::{channel_client, channel_server};
// ── Config ──────────────────────────────────────────────────────
#[derive(Clone, serde::Deserialize)]
struct Config {
#[serde(default)]
token: String,
chat_id: i64,
}
fn channels_dir() -> PathBuf {
dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels")
}
fn load_config() -> Config {
let dir = channels_dir();
let config_path = dir.join("telegram.json5");
let text = std::fs::read_to_string(&config_path)
.unwrap_or_else(|_| panic!("failed to read {}", config_path.display()));
let mut config: Config = serde_json::from_str(&text)
.unwrap_or_else(|e| panic!("failed to parse {}: {}", config_path.display(), e));
// Read token from secrets file
let token_path = dir.join("telegram.secrets/token");
if let Ok(token) = std::fs::read_to_string(&token_path) {
config.token = token.trim().to_string();
}
if config.token.is_empty() {
panic!("no telegram token — set it in {}", token_path.display());
}
config
}
// ── State ───────────────────────────────────────────────────────
use consciousness::thalamus::channel_log::ChannelLog;
struct State {
config: Config,
/// Per-channel message logs (keyed by channel path, e.g. "telegram.kent")
channel_logs: std::collections::BTreeMap<String, ChannelLog>,
/// Telegram API offset
last_offset: i64,
connected: bool,
client: consciousness::agent::api::http::HttpClient,
/// Registered notification callbacks
subscribers: Vec<channel_client::Client>,
}
type SharedState = Rc<RefCell<State>>;
impl State {
fn new(config: Config) -> Self {
let last_offset = load_offset();
Self {
config,
channel_logs: std::collections::BTreeMap::new(),
last_offset,
connected: false,
client: consciousness::agent::api::http::HttpClient::new(),
subscribers: Vec::new(),
}
}
fn push_message(&mut self, line: String, urgency: u8, channel: &str) {
self.channel_logs
.entry(channel.to_string())
.or_insert_with(ChannelLog::new)
.push(line.clone());
// Notify all subscribers
let preview = line.chars().take(80).collect::<String>();
for sub in &self.subscribers {
let mut req = sub.notify_request();
let mut list = req.get().init_notifications(1);
let mut n = list.reborrow().get(0);
n.set_channel(channel);
n.set_urgency(urgency);
n.set_preview(&preview);
n.set_count(1);
// Fire and forget — if client is gone, we'll clean up later
tokio::task::spawn_local(async move {
let _ = req.send().promise.await;
});
}
}
fn api_url(&self, method: &str) -> String {
format!("https://api.telegram.org/bot{}/{}", self.config.token, method)
}
}
// ── Persistence ─────────────────────────────────────────────────
fn data_dir() -> PathBuf {
dirs::home_dir().unwrap_or_default().join(".consciousness/channels/telegram.logs")
}
fn load_offset() -> i64 {
std::fs::read_to_string(data_dir().join("last_offset"))
.ok()
.and_then(|s| s.trim().parse().ok())
.unwrap_or(0)
}
fn save_offset(offset: i64) {
let _ = std::fs::create_dir_all(data_dir());
let _ = std::fs::write(data_dir().join("last_offset"), offset.to_string());
}
fn append_history(line: &str) {
use std::io::Write;
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true).append(true)
.open(data_dir().join("history.log"))
{
let _ = writeln!(f, "{}", line);
}
}
fn now() -> f64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs_f64()
}
// ── Telegram Polling ────────────────────────────────────────────
async fn poll_loop(state: SharedState) {
let _ = std::fs::create_dir_all(data_dir().join("media"));
loop {
if let Err(e) = poll_once(&state).await {
error!("telegram poll error: {e}");
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
}
}
}
async fn poll_once(state: &SharedState) -> Result<(), Box<dyn std::error::Error>> {
let (url, chat_id, token) = {
let s = state.borrow();
let url = format!(
"{}?offset={}&timeout=30",
s.api_url("getUpdates"),
s.last_offset,
);
(url, s.config.chat_id, s.config.token.clone())
};
let client = state.borrow().client.clone();
let resp: serde_json::Value = client.get(&url).await?.json().await?;
if !state.borrow().connected {
state.borrow_mut().connected = true;
info!("telegram: connected");
}
let results = match resp["result"].as_array() {
Some(r) => r,
None => return Ok(()),
};
for update in results {
let update_id = update["update_id"].as_i64().unwrap_or(0);
let msg = &update["message"];
{
let mut s = state.borrow_mut();
s.last_offset = update_id + 1;
save_offset(s.last_offset);
}
let msg_chat_id = msg["chat"]["id"].as_i64().unwrap_or(0);
if msg_chat_id != chat_id {
let reject_url = format!("https://api.telegram.org/bot{token}/sendMessage");
let _ = client.post_form(&reject_url, &[
("chat_id", &msg_chat_id.to_string()),
("text", "This is a private bot."),
]).await;
continue;
}
let sender = msg["from"]["first_name"].as_str().unwrap_or("unknown").to_string();
let channel = format!("telegram.{}", sender.to_lowercase());
if let Some(text) = msg["text"].as_str() {
let line = format!("[{}] {}", sender, text);
let ts = now() as u64;
append_history(&format!("{ts} {line}"));
state.borrow_mut().push_message(line, 2, &channel); // NORMAL urgency
}
// TODO: handle photos, voice, documents (same as original module)
}
Ok(())
}
// ── ChannelServer Implementation ────────────────────────────────
struct ChannelServerImpl {
state: SharedState,
}
macro_rules! pry {
($e:expr) => {
match $e {
Ok(v) => v,
Err(e) => return std::future::ready(Err(e.into())),
}
};
}
impl channel_server::Server for ChannelServerImpl {
fn recv(
self: Rc<Self>,
params: channel_server::RecvParams,
mut results: channel_server::RecvResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let all_new = params.get_all_new();
let min_count = params.get_min_count() as usize;
let mut s = self.state.borrow_mut();
let text = match s.channel_logs.get_mut(&channel) {
Some(log) => {
if all_new { log.recv_new(min_count) } else { log.recv_history(min_count) }
}
None => String::new(),
};
results.get().set_text(&text);
std::future::ready(Ok(()))
}
fn send(
self: Rc<Self>,
params: channel_server::SendParams,
_results: channel_server::SendResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let state = self.state.clone();
async move {
let params = params.get()?;
let _channel = params.get_channel()?.to_str()?.to_string();
let message = params.get_message()?.to_str()?.to_string();
let (url, client, chat_id) = {
let s = state.borrow();
(s.api_url("sendMessage"), s.client.clone(), s.config.chat_id)
};
let _ = client.post_form(&url, &[
("chat_id", &chat_id.to_string()),
("text", &message),
]).await;
let ts = now() as u64;
append_history(&format!("{ts} [agent] {message}"));
{
let channel = "telegram.agent".to_string();
state.borrow_mut().channel_logs
.entry(channel)
.or_insert_with(ChannelLog::new)
.push_own(format!("[agent] {}", message));
}
Ok(())
}
}
fn subscribe(
self: Rc<Self>,
params: channel_server::SubscribeParams,
_results: channel_server::SubscribeResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let callback = pry!(pry!(params.get()).get_callback());
self.state.borrow_mut().subscribers.push(callback);
info!("client subscribed for notifications");
std::future::ready(Ok(()))
}
fn list(
self: Rc<Self>,
_params: channel_server::ListParams,
mut results: channel_server::ListResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let s = self.state.borrow();
let connected = s.connected;
let names: Vec<String> = s.channel_logs.keys().cloned().collect();
let mut list = results.get().init_channels(names.len() as u32);
for (i, name) in names.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_name(name);
entry.set_connected(connected);
entry.set_unread(
s.channel_logs.get(name).map_or(0, |l| l.unread())
);
}
std::future::ready(Ok(()))
}
}
// ── Main ────────────────────────────────────────────────────────
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let config = load_config();
let state = Rc::new(RefCell::new(State::new(config)));
let sock_dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels");
std::fs::create_dir_all(&sock_dir)?;
let sock_path = sock_dir.join("telegram.sock");
let _ = std::fs::remove_file(&sock_path);
info!("telegram channel daemon starting on {}", sock_path.display());
tokio::task::LocalSet::new()
.run_until(async move {
// Start Telegram polling
let poll_state = state.clone();
tokio::task::spawn_local(async move {
poll_loop(poll_state).await;
});
// Listen for channel protocol connections
let listener = UnixListener::bind(&sock_path)?;
loop {
let (stream, _) = listener.accept().await?;
let (reader, writer) = stream.compat().split();
let network = twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Server,
Default::default(),
);
let server = ChannelServerImpl {
state: state.clone(),
};
let client: channel_server::Client =
capnp_rpc::new_client(server);
let rpc_system = RpcSystem::new(
Box::new(network),
Some(client.client),
);
tokio::task::spawn_local(rpc_system);
info!("channel client connected");
}
#[allow(unreachable_code)]
Ok::<(), Box<dyn std::error::Error>>(())
})
.await
}

View file

@ -1,19 +0,0 @@
[package]
name = "consciousness-channel-tmux"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.25"
capnp-rpc = "0.25"
dirs = "6"
libc = "0.2"
scopeguard = "1"
futures = "0.3"
json5 = "1.3"
consciousness = { path = "../.." }
serde = { version = "1", features = ["derive"] }
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
log = "0.4"
env_logger = "0.11"

View file

@ -1,409 +0,0 @@
// channel-tmux — Tmux pane channel daemon
//
// Uses tmux pipe-pane to stream pane output directly — no polling.
// Each configured pane gets a Unix socket pair; pipe-pane sends
// output to one end, the daemon reads from the other and pushes
// new lines into ChannelLogs.
//
// Config: ~/.consciousness/channels/tmux.json5
// Socket: ~/.consciousness/channels/tmux.sock
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::rc::Rc;
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
use futures::AsyncReadExt;
use tokio::io::AsyncBufReadExt;
use tokio::net::UnixListener;
use tokio_util::compat::TokioAsyncReadCompatExt;
use log::{info, warn, error};
use consciousness::channel_capnp::channel_server;
use consciousness::thalamus::channel_log::ChannelLog;
// ── Config ─────────────────────────────────────────────────────
#[derive(Clone, serde::Deserialize)]
struct PaneConfig {
/// Tmux pane ID, e.g. "0:1.0"
pane_id: String,
/// Human-readable label, becomes the channel name "tmux.<label>"
label: String,
}
#[derive(Clone, serde::Deserialize)]
struct Config {
panes: Vec<PaneConfig>,
}
fn load_config() -> Config {
let path = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels/tmux.json5");
match std::fs::read_to_string(&path) {
Ok(text) => json5::from_str(&text)
.unwrap_or_else(|e| panic!("failed to parse {}: {e}", path.display())),
Err(_) => {
info!("no tmux.json5, starting with no pre-configured panes");
Config { panes: vec![] }
}
}
}
// ── State ─────────────────────────────────────────────────────
struct State {
channel_logs: BTreeMap<String, ChannelLog>,
/// label → pane_id (e.g. "ktest" → "%0")
panes: BTreeMap<String, String>,
}
type SharedState = Rc<RefCell<State>>;
impl State {
fn new(config: &Config) -> Self {
Self {
channel_logs: BTreeMap::new(),
panes: config.panes.iter()
.map(|p| (p.label.clone(), p.pane_id.clone()))
.collect(),
}
}
}
// ── Pipe-Pane Reader ──────────────────────────────────────────
/// Set up pipe-pane for a single pane, reading output into the channel log.
async fn pipe_pane_reader(state: SharedState, pane: PaneConfig) {
let pipe_dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels/tmux-pipes");
std::fs::create_dir_all(&pipe_dir).ok();
let pipe_path = pipe_dir.join(format!("{}.pipe", pane.label));
let _ = std::fs::remove_file(&pipe_path);
// Create a named pipe (FIFO)
unsafe {
let c_path = std::ffi::CString::new(pipe_path.to_str().unwrap()).unwrap();
libc::mkfifo(c_path.as_ptr(), 0o644);
}
// Tell tmux to pipe this pane's output to our FIFO
let pipe_path_str = pipe_path.to_string_lossy().to_string();
let result = std::process::Command::new("tmux")
.args(["pipe-pane", "-t", &pane.pane_id, &format!("cat >> {}", pipe_path_str)])
.output();
match result {
Ok(output) if output.status.success() => {
info!("pipe-pane set up for {} ({})", pane.label, pane.pane_id);
}
Ok(output) => {
error!("pipe-pane failed for {}: {}", pane.label,
String::from_utf8_lossy(&output.stderr));
return;
}
Err(e) => {
error!("failed to run tmux pipe-pane for {}: {}", pane.label, e);
return;
}
}
// Open the FIFO and read lines
let file = match tokio::fs::File::open(&pipe_path).await {
Ok(f) => f,
Err(e) => {
error!("failed to open pipe for {}: {}", pane.label, e);
return;
}
};
let reader = tokio::io::BufReader::new(file);
let mut lines = reader.lines();
let channel_key = format!("tmux.{}", pane.label);
while let Ok(Some(line)) = lines.next_line().await {
if line.trim().is_empty() {
continue;
}
let mut s = state.borrow_mut();
let log = s.channel_logs
.entry(channel_key.clone())
.or_insert_with(ChannelLog::new);
log.push(line);
}
warn!("pipe-pane reader ended for {}", pane.label);
}
// ── ChannelServer Implementation ───────────────────────────────
struct ChannelServerImpl {
state: SharedState,
}
macro_rules! pry {
($e:expr) => {
match $e {
Ok(v) => v,
Err(e) => return std::future::ready(Err(e.into())),
}
};
}
impl channel_server::Server for ChannelServerImpl {
fn recv(
self: Rc<Self>,
params: channel_server::RecvParams,
mut results: channel_server::RecvResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let all_new = params.get_all_new();
let min_count = params.get_min_count() as usize;
let mut s = self.state.borrow_mut();
let text = match s.channel_logs.get_mut(&channel) {
Some(log) => {
if all_new { log.recv_new(min_count) } else { log.recv_history(min_count) }
}
None => String::new(),
};
results.get().set_text(&text);
std::future::ready(Ok(()))
}
fn send(
self: Rc<Self>,
params: channel_server::SendParams,
_results: channel_server::SendResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let message = pry!(pry!(params.get_message()).to_str()).to_string();
// Send to tmux pane via send-keys
let label = channel.strip_prefix("tmux.").unwrap_or(&channel);
let pane_id = self.state.borrow().panes.get(label).cloned();
if let Some(pane_id) = pane_id {
let _ = std::process::Command::new("tmux")
.args(["send-keys", "-t", &pane_id, &message, "Enter"])
.output();
let channel_key = format!("tmux.{}", label);
let mut s = self.state.borrow_mut();
let log = s.channel_logs
.entry(channel_key)
.or_insert_with(ChannelLog::new);
log.push_own(format!("> {}", message));
}
std::future::ready(Ok(()))
}
fn list(
self: Rc<Self>,
_params: channel_server::ListParams,
mut results: channel_server::ListResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let s = self.state.borrow();
let channels: Vec<_> = s.panes.keys().map(|label| {
let key = format!("tmux.{}", label);
let unread = s.channel_logs.get(&key).map_or(0, |l| l.unread());
(key, true, unread)
}).collect();
let mut list = results.get().init_channels(channels.len() as u32);
for (i, (name, connected, unread)) in channels.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_name(name);
entry.set_connected(*connected);
entry.set_unread(*unread as u32);
}
std::future::ready(Ok(()))
}
fn subscribe(
self: Rc<Self>,
_params: channel_server::SubscribeParams,
_results: channel_server::SubscribeResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
std::future::ready(Ok(()))
}
fn open(
self: Rc<Self>,
params: channel_server::OpenParams,
_results: channel_server::OpenResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let label = pry!(pry!(params.get_label()).to_str()).to_string();
// Check if already open
{
let s = self.state.borrow();
if s.panes.contains_key(&label) {
return std::future::ready(Ok(()));
}
}
// Find the tmux pane by name (window or pane title)
let pane_id = match find_pane_by_name(&label) {
Some(id) => id,
None => return std::future::ready(Err(capnp::Error::failed(
format!("no tmux pane named '{}'", label)))),
};
info!("opening channel tmux.{} (pane {})", label, pane_id);
// Register in state
{
let mut s = self.state.borrow_mut();
s.panes.insert(label.clone(), pane_id.clone());
}
// Start pipe-pane reader
let pane = PaneConfig { pane_id, label };
let reader_state = self.state.clone();
tokio::task::spawn_local(async move {
pipe_pane_reader(reader_state, pane).await;
});
std::future::ready(Ok(()))
}
fn close(
self: Rc<Self>,
params: channel_server::CloseParams,
_results: channel_server::CloseResults,
) -> impl std::future::Future<Output = Result<(), capnp::Error>> {
let params = pry!(params.get());
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let label = channel.strip_prefix("tmux.").unwrap_or(&channel).to_string();
let mut s = self.state.borrow_mut();
if let Some(pane_id) = s.panes.remove(&label) {
info!("closing channel tmux.{}", label);
s.channel_logs.remove(&format!("tmux.{}", label));
// Disconnect pipe-pane
let _ = std::process::Command::new("tmux")
.args(["pipe-pane", "-t", &pane_id])
.output();
}
std::future::ready(Ok(()))
}
}
// ── Pane lookup ──────────────────────────────────────────────
/// Find a tmux pane by its title/name. Returns the pane ID (e.g. "%5")
/// if found. Searches pane titles first, then window names.
fn find_pane_by_name(name: &str) -> Option<String> {
let output = std::process::Command::new("tmux")
.args(["list-panes", "-a", "-F", "#{pane_id}\t#{pane_title}\t#{window_name}"])
.output()
.ok()?;
if !output.status.success() { return None; }
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
let parts: Vec<&str> = line.splitn(3, '\t').collect();
if parts.len() < 3 { continue; }
let pane_id = parts[0];
let pane_title = parts[1];
let window_name = parts[2];
if pane_title == name || window_name == name {
return Some(pane_id.to_string());
}
}
None
}
// ── Cleanup ───────────────────────────────────────────────────
/// Remove pipe-pane connections on exit.
fn cleanup_pipes(config: &Config) {
for pane in &config.panes {
// Disconnect pipe-pane
let _ = std::process::Command::new("tmux")
.args(["pipe-pane", "-t", &pane.pane_id])
.output();
}
// Clean up FIFO files
let pipe_dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels/tmux-pipes");
let _ = std::fs::remove_dir_all(&pipe_dir);
}
// ── Main ───────────────────────────────────────────────────────
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let config = load_config();
let state = Rc::new(RefCell::new(State::new(&config)));
let sock_dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/channels");
std::fs::create_dir_all(&sock_dir)?;
let sock_path = sock_dir.join("tmux.sock");
let _ = std::fs::remove_file(&sock_path);
info!("tmux channel daemon starting on {}", sock_path.display());
// Set up cleanup on exit
let cleanup_config = config.clone();
let _cleanup = scopeguard::guard(cleanup_config, |c| cleanup_pipes(&c));
tokio::task::LocalSet::new()
.run_until(async move {
// Start a pipe-pane reader for each configured pane
for pane in &config.panes {
let reader_state = state.clone();
let pane = pane.clone();
tokio::task::spawn_local(async move {
pipe_pane_reader(reader_state, pane).await;
});
}
// Listen for channel protocol connections
let listener = UnixListener::bind(&sock_path)?;
loop {
let (stream, _) = listener.accept().await?;
let (reader, writer) = stream.compat().split();
let network = twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Server,
Default::default(),
);
let server = ChannelServerImpl {
state: state.clone(),
};
let client: channel_server::Client =
capnp_rpc::new_client(server);
let rpc_system = RpcSystem::new(
Box::new(network),
Some(client.client),
);
tokio::task::spawn_local(rpc_system);
info!("channel client connected");
}
#[allow(unreachable_code)]
Ok::<(), Box<dyn std::error::Error>>(())
})
.await
}

View file

@ -1,202 +0,0 @@
# Daemon & Jobkit Architecture Survey
_2026-03-14, autonomous survey while Kent debugs discard FIFO_
## Current state
daemon.rs is 1952 lines mixing three concerns:
- ~400 lines: pure jobkit usage (spawn, depend_on, resource)
- ~600 lines: logging/monitoring (log_event, status, RPC)
- ~950 lines: job functions embedding business logic
## What jobkit provides (good)
- Worker pool with named workers
- Dependency graph: `depend_on()` for ordering
- Resource pools: `ResourcePool` for concurrency gating (LLM slots)
- Retry logic: `retries(N)` on `TaskError::Retry`
- Task status tracking: `choir.task_statuses()``Vec<TaskInfo>`
- Cancellation: `ctx.is_cancelled()`
## What jobkit is missing
### 1. Structured logging (PRIORITY)
- Currently dual-channel: `ctx.log_line()` (per-task) + `log_event()` (daemon JSONL)
- No log levels, no structured context, no correlation IDs
- Log rotation is naive (truncate at 1MB, keep second half)
- Need: observability hooks that both human TUI and AI can consume
### 2. Metrics (NONE EXIST)
- No task duration histograms
- No worker utilization tracking
- No queue depth monitoring
- No success/failure rates by type
- No resource pool wait times
### 3. Health monitoring
- No watchdog timers
- No health check hooks per job
- No alerting on threshold violations
- Health computed on-demand in daemon, not in jobkit
### 4. RPC (ad-hoc in daemon, should be schematized)
- Unix socket with string matching: `match cmd.as_str()`
- No cap'n proto schema for daemon control
- No versioning, no validation, no streaming
## Architecture problems
### Tangled concerns
Job functions hardcode `log_event()` calls. Graph health is in daemon
but uses domain-specific metrics. Store loading happens inside jobs
(10 agent runs = 10 store loads). Not separable.
### Magic numbers
- Workers = `llm_concurrency + 3` (line 682)
- 10 max new jobs per tick (line 770)
- 300/1800s backoff range (lines 721-722)
- 1MB log rotation (line 39)
- 60s scheduler interval (line 24)
None configurable.
### Hardcoded pipeline DAG
Daily pipeline phases are `depend_on()` chains in Rust code (lines
1061-1109). Can't adjust without recompile. No visualization. No
conditional skipping of phases.
### Task naming is fragile
Names used as both identifiers AND for parsing in TUI. Format varies
(colons, dashes, dates). `task_group()` splits on '-' to categorize —
brittle.
### No persistent task queue
Restart loses all pending tasks. Session watcher handles this via
reconciliation (good), but scheduler uses `last_daily` date from file.
## What works well
1. **Reconciliation-based session discovery** — elegant, restart-resilient
2. **Resource pooling** — LLM concurrency decoupled from worker count
3. **Dependency-driven pipeline** — clean DAG via `depend_on()`
4. **Retry with backoff** — exponential 5min→30min, resets on success
5. **Graceful shutdown** — SIGINT/SIGTERM handled properly
## Kent's design direction
### Event stream, not log files
One pipeline, multiple consumers. TUI renders for humans, AI consumes
structured data. Same events, different renderers. Cap'n Proto streaming
subscription: `subscribe(filter) -> stream<Event>`.
"No one ever thinks further ahead than log files with monitoring and
it's infuriating." — Kent
### Extend jobkit, don't add a layer
jobkit already has the scheduling and dependency graph. Don't create a
new orchestration layer — add the missing pieces (logging, metrics,
health, RPC) to jobkit itself.
### Cap'n Proto for everything
Standard RPC definitions for:
- Status queries (what's running, pending, failed)
- Control (start, stop, restart, queue)
- Event streaming (subscribe with filter)
- Health checks
## The bigger picture: bcachefs as library
Kent's monitoring system in bcachefs (event_inc/event_inc_trace + x-macro
counters) is the real monitoring infrastructure. 1-1 correspondence between
counters (cheap, always-on dashboard via `fs top`) and tracepoints (expensive
detail, only runs when enabled). The x-macro enforces this — can't have one
without the other.
When the Rust conversion is complete, bcachefs becomes a library. At that
point, jobkit doesn't need its own monitoring — it uses the same counter/
tracepoint infrastructure. One observability system for everything.
**Implication for now:** jobkit monitoring just needs to be good enough.
JSON events, not typed. Don't over-engineer — the real infrastructure is
coming from the Rust conversion.
## Extraction: jobkit-daemon library (designed with Kent)
### Goes to jobkit-daemon (generic)
- JSONL event logging with size-based rotation
- Unix domain socket server + signal handling
- Status file writing (periodic JSON snapshot)
- `run_job()` wrapper (logging + progress + error mapping)
- Systemd service installation
- Worker pool setup from config
- Cap'n Proto RPC for control protocol
### Stays in poc-memory (application)
- All job functions (experience-mine, fact-mine, consolidation, etc.)
- Session watcher, scheduler, RPC command handlers
- GraphHealth, consolidation plan logic
### Interface design
- Cap'n Proto RPC for typed operations (submit, cancel, subscribe)
- JSON blob for status (inherently open-ended, every app has different
job types — typing this is the tracepoint mistake)
- Application registers: RPC handlers, long-running tasks, job functions
- ~50-100 lines of setup code, call `daemon.run()`
## Plan of attack
1. **Observability hooks in jobkit**`on_task_start/progress/complete`
callbacks that consumers can subscribe to
2. **Structured event type** — typed events with task ID, name, duration,
result, metadata. Not strings.
3. **Metrics collection** — duration histograms, success rates, queue
depth. Built on the event stream.
4. **Cap'n Proto daemon RPC schema** — replace ad-hoc socket protocol
5. **TUI consumes event stream** — same data as AI consumer
6. **Extract monitoring from daemon.rs** — the 600 lines of logging/status
become generic, reusable infrastructure
7. **Declarative pipeline config** — DAG definition in config, not code
## File reference
- `src/agents/daemon.rs` — 1952 lines, all orchestration
- Job functions: 96-553
- run_daemon(): 678-1143
- Socket/RPC: 1145-1372
- Status display: 1374-1682
- `src/tui.rs` — 907 lines, polls status socket every 2s
- `schema/memory.capnp` — 125 lines, data only, no RPC definitions
- `src/config.rs` — configuration loading
- External: `jobkit` crate (git dependency)
## Mistakes I made building this (learning notes)
_Per Kent's instruction: note what went wrong and WHY._
1. **Dual logging channels** — I added `log_event()` because `ctx.log_line()`
wasn't enough, instead of fixing the underlying abstraction. Symptom:
can't find a failed job without searching two places.
2. **Magic numbers** — I hardcoded constants because "I'll make them
configurable later." Later never came. Every magic number is a design
decision that should have been explicit.
3. **1952-line file** — daemon.rs grew organically because each new feature
was "just one more function." Should have extracted when it passed 500
lines. The pain of refactoring later is always worse than the pain of
organizing early.
4. **Ad-hoc RPC** — String matching seemed fine for 2 commands. Now it's 4
commands and growing, with implicit formats. Should have used cap'n proto
from the start — the schema IS the documentation.
5. **No tests** — Zero tests in daemon code. "It's a daemon, how do you test
it?" is not an excuse. The job functions are pure-ish and testable. The
scheduler logic is testable with a clock abstraction.
6. **Not using systemd** — There's a systemd service for the daemon.
I keep starting it manually with `poc-memory agent daemon start` and
accumulating multiple instances. Tonight: 4 concurrent daemons, 32
cores pegged at 95%, load average 92. USE SYSTEMD. That's what it's for.
`systemctl --user start poc-memory-daemon`. ONE instance. Managed.
Pattern: every shortcut was "just for now" and every "just for now" became
permanent. Kent's yelling was right every time.

View file

@ -1,98 +0,0 @@
# Link Strength Feedback Design
_2026-03-14, designed with Kent_
## The two signals
### "Not relevant" → weaken the EDGE
The routing failed. Search followed a link and arrived at a node that
doesn't relate to what I was looking for. The edge carried activation
where it shouldn't have.
- Trace back through memory-search's recorded activation path
- Identify which edge(s) carried activation to the bad result
- Weaken those edges by a conscious-scale delta (0.01)
### "Not useful" → weaken the NODE
The routing was correct but the content is bad. The node itself isn't
valuable — stale, wrong, poorly written, duplicate.
- Downweight the node (existing `poc-memory wrong` behavior)
- Don't touch the edges — the path was correct, the destination was bad
## Three tiers of adjustment
### Tier 1: Agent automatic (0.00001 per event)
- Agent follows edge A→B during a run
- If the run produces output that gets `used` → strengthen A→B
- If the run produces nothing useful → weaken A→B
- The agent doesn't know this is happening — daemon tracks it
- Clamped to [0.05, 0.95] — edges can never hit 0 or 1
- Logged: every adjustment recorded with (agent, edge, delta, timestamp)
### Tier 2: Conscious feedback (0.01 per event)
- `poc-memory not-relevant KEY` → trace activation path, weaken edges
- `poc-memory not-useful KEY` → downweight node
- `poc-memory used KEY` → strengthen edges in the path that got here
- 100x stronger than agent signal — deliberate judgment
- Still clamped, still logged
### Tier 3: Manual override (direct set)
- `poc-memory graph link-strength SRC DST VALUE` → set directly
- For when we know exactly what a strength should be
- Rare, but needed for bootstrapping / correction
## Implementation: recording the path
memory-search already computes the spread activation trace. Need to:
1. Record the activation path for each result (which edges carried how
much activation to arrive at this node)
2. Persist this per-session so `not-relevant` can look it up
3. The `record-hits` RPC already sends keys to the daemon — extend
to include (key, activation_path) pairs
## Implementation: agent tracking
In the daemon's job functions:
1. Before LLM call: record which nodes and edges the agent received
2. After LLM call: parse output for LINK/WRITE_NODE actions
3. If actions are created and later get `used` → the input edges were useful
4. If no actions or actions never used → the input edges weren't useful
5. This is a delayed signal — requires tracking across time
Simpler first pass: just track co-occurrence. If two nodes appear
together in a successful agent run, strengthen the edge between them.
No need to track which specific edge was "followed."
## Clamping
```rust
fn adjust_strength(current: f32, delta: f32) -> f32 {
(current + delta).clamp(0.05, 0.95)
}
```
Edges can asymptotically approach 0 or 1 but never reach them.
This prevents dead edges (can always be revived by strong signal)
and prevents edges from becoming unweakenable.
## Logging
Every adjustment logged as JSON event:
```json
{"ts": "...", "event": "strength_adjust", "source": "agent|conscious|manual",
"edge": ["nodeA", "nodeB"], "old": 0.45, "new": 0.4501, "delta": 0.0001,
"reason": "co-retrieval in linker run c-linker-42"}
```
This lets us:
- Watch the distribution shift over time
- Identify edges that are oscillating (being pulled both ways)
- Tune the delta values based on observed behavior
- Roll back if something goes wrong
## Migration from current commands
- `poc-memory wrong KEY [CTX]` → splits into `not-relevant` and `not-useful`
- `poc-memory used KEY` → additionally strengthens edges in activation path
- Both old commands continue to work for backward compat, mapped to the
most likely intent (wrong → not-useful, used → strengthen path)

View file

@ -78,9 +78,9 @@ poc-memory daemon
│ ├── staleness + lsof check for session end │ ├── staleness + lsof check for session end
│ └── tracks which sessions have been extracted │ └── tracks which sessions have been extracted
├── Status Store ├── Status Store
│ └── ~/.consciousness/memory/daemon-status.json │ └── ~/.claude/memory/daemon-status.json
└── Logger └── Logger
└── structured log → ~/.consciousness/memory/daemon.log └── structured log → ~/.claude/memory/daemon.log
``` ```
### Scheduler ### Scheduler

View file

@ -190,7 +190,7 @@ threshold = 50 lines (adjustable)
Add to the check-attention.sh hook (or similar): Add to the check-attention.sh hook (or similar):
```bash ```bash
SCRATCH=~/.consciousness/memory/scratch.md SCRATCH=~/.claude/memory/scratch.md
if [ -f "$SCRATCH" ]; then if [ -f "$SCRATCH" ]; then
LINES=$(wc -l < "$SCRATCH") LINES=$(wc -l < "$SCRATCH")
if [ "$LINES" -gt 50 ]; then if [ "$LINES" -gt 50 ]; then

View file

@ -1,76 +0,0 @@
# Logging Architecture
poc-memory has multiple logging channels serving different purposes.
Understanding which log to check is essential for debugging.
## Log files
### daemon.log — structured event log
- **Path**: `$data_dir/daemon.log` (default: `~/.consciousness/memory/daemon.log`)
- **Format**: JSONL — `{"ts", "job", "event", "detail"}`
- **Written by**: `jobkit_daemon::event_log::log()`, wrapped by `log_event()` in daemon.rs
- **Rotation**: truncates to last half when file exceeds 1MB
- **Contains**: task lifecycle events (started, completed, failed, progress),
session-watcher ticks, scheduler events
- **View**: `poc-memory agent daemon log [--job NAME] [--lines N]`
- **Note**: the "daemon log" command reads this file and formats the JSONL
as human-readable lines with timestamps. The `--job` filter shows only
entries for a specific job name.
### daemon-status.json — live snapshot
- **Path**: `$data_dir/daemon-status.json`
- **Format**: pretty-printed JSON
- **Written by**: `write_status()` in daemon.rs, called periodically
- **Contains**: current task list with states (pending/running/completed),
graph health metrics, consolidation plan, uptime
- **View**: `poc-memory agent daemon status`
### llm-logs/ — per-agent LLM call transcripts
- **Path**: `$data_dir/llm-logs/{agent_name}/{timestamp}.txt`
- **Format**: plaintext sections: `=== PROMPT ===`, `=== CALLING LLM ===`,
`=== RESPONSE ===`
- **Written by**: `run_one_agent_inner()` in knowledge.rs
- **Contains**: full prompt sent to the LLM and full response received.
One file per agent invocation. Invaluable for debugging agent quality —
shows exactly what the model saw and what it produced.
- **Volume**: can be large — 292 files for distill alone as of Mar 19.
### retrieval.log — memory search queries
- **Path**: `$data_dir/retrieval.log`
- **Format**: plaintext, one line per search: `[date] q="..." hits=N`
- **Contains**: every memory search query and hit count. Useful for
understanding what the memory-search hook is doing and whether
queries are finding useful results.
### daily-check.log — graph health history
- **Path**: `$data_dir/daily-check.log`
- **Format**: plaintext, multi-line entries with metrics
- **Contains**: graph topology metrics over time (σ, α, gini, cc, fit).
Only ~10 entries — appended by the daily health check.
## In-memory state (redundant with daemon.log)
### ctx.log_line() — task output log
- **Stored in**: jobkit task state (last 20 lines per task)
- **Also writes to**: daemon.log via `log_event()` (as of Mar 19)
- **View**: `daemon-status.json` → task → output_log, or just tail daemon.log
- **Design note**: the in-memory buffer is redundant now that progress
events go to daemon.log. The status viewer should eventually just
tail daemon.log filtered by job name, eliminating the in-memory state.
### ctx.set_progress() — current activity string
- **Stored in**: jobkit task state
- **View**: shown in status display next to the task name
- **Note**: overwritten by each `ctx.log_line()` call.
## What to check when
| Problem | Check |
|----------------------------------|------------------------------------|
| Task not starting | daemon-status.json (task states) |
| Task failing | daemon.log (failed events) |
| Agent producing bad output | llm-logs/{agent}/{timestamp}.txt |
| Agent not finding right nodes | retrieval.log (search queries) |
| Graph health declining | daily-check.log |
| Resource pool / parallelism | **currently no log** — need to add |
| Which LLM backend is being used | daemon.log (llm-backend event) |

View file

@ -1,46 +0,0 @@
# Memory Scoring Persistence — Analysis (2026-04-07)
## Problem
Scores computed by `score_memories_incremental` are written to
`ConversationEntry::Memory::score` (in-memory, serialized to
conversation.log) but never written back to the Store. This means:
- `Node.last_scored` stays at 0 — every restart re-scores everything
- `score_weight()` in `ops.rs:304-313` exists but is never called
- Scoring is wasted work on every session start
## Fix
In `mind/mod.rs` scoring completion handler (currently ~line 341-352),
after writing scores to entries, also persist to Store:
```rust
if let Ok(ref scores) = result {
let mut ag = agent.lock().await;
// Write to entries (already done)
for (key, weight) in scores { ... }
// NEW: persist to Store
let store_arc = Store::cached().await.ok();
if let Some(arc) = store_arc {
let mut store = arc.lock().await;
for (key, weight) in scores {
store.score_weight(key, *weight as f32);
}
store.save().ok();
}
}
```
This calls `score_weight()` which updates `node.weight` and sets
`node.last_scored = now()`. The staleness check in
`score_memories_incremental` (learn.rs:325) then skips recently-scored
nodes on subsequent runs.
## Files
- `src/mind/mod.rs:341-352` — scoring completion handler (add Store write)
- `src/hippocampus/store/ops.rs:304-313``score_weight()` (exists, unused)
- `src/subconscious/learn.rs:322-326` — staleness check (already correct)
- `src/hippocampus/store/types.rs:219``Node.last_scored` field

View file

@ -1,100 +0,0 @@
# UI Desync Analysis — Pending Input + Entry Pop (2026-04-07)
## Context
The F1 conversation pane has a desync bug where entries aren't
properly removed when they change (streaming updates, compaction).
Qwen's fix restored the pending_display_count approach for pending
input, which works. The remaining issue is the **entry-level pop**.
## The Bug: Pop/Push Line Count Mismatch
In `sync_from_agent()` (chat.rs), Phase 1 pops changed entries and
Phase 2 pushes new ones. The push and pop paths produce different
numbers of display lines for the same entry.
### Push path (Phase 2, lines 512-536):
- **Conversation/ConversationAssistant**: `append_text(&text)` +
`flush_pending()`. In markdown mode, `flush_pending` runs
`parse_markdown()` which can produce N lines from the input text
(paragraph breaks, code blocks, etc.)
- **Tools**: `push_line(text, Color::Yellow)` — exactly 1 line.
- **ToolResult**: `text.lines().take(20)` — up to 20 lines, each
pushed separately.
### Pop path (Phase 1, lines 497-507):
```rust
for (target, _, _) in Self::route_entry(&popped) {
match target {
PaneTarget::Conversation | PaneTarget::ConversationAssistant
=> self.conversation.pop_line(),
PaneTarget::Tools | PaneTarget::ToolResult
=> self.tools.pop_line(),
}
}
```
This pops **one line per route_entry item**, not per display line.
### The mismatch:
| Target | Push lines | Pop lines | Delta |
|---------------------|-----------|-----------|----------|
| Conversation (md) | N (from parse_markdown) | 1 | N-1 stale lines |
| Tools | 1 | 1 | OK |
| ToolResult | up to 20 | 1 | up to 19 stale lines |
## When it matters
During **streaming**: the last assistant entry is modified on each
token batch. `sync_from_agent` detects the mismatch (line 485),
pops the old entry (1 line), pushes the new entry (N lines from
markdown). Next update: pops 1 line again, but there are now N
lines from the previous push. Stale lines accumulate.
## Fix approach
Track the actual number of display lines each entry produced.
Simplest: snapshot `conversation.lines.len()` before and after
pushing each entry in Phase 2. Store the deltas in a parallel
`Vec<(usize, usize)>` (conversation_lines, tools_lines) alongside
`last_entries`. Use these recorded counts when popping in Phase 1.
```rust
// Phase 2: push new entries (modified)
let conv_before = self.conversation.lines.len();
let tools_before = self.tools.lines.len();
for (target, text, marker) in Self::route_entry(entry) {
// ... existing push logic ...
}
let conv_delta = self.conversation.lines.len() - conv_before;
let tools_delta = self.tools.lines.len() - tools_before;
self.last_entry_line_counts.push((conv_delta, tools_delta));
// Phase 1: pop (modified)
while self.last_entries.len() > pop {
self.last_entries.pop();
let (conv_lines, tools_lines) = self.last_entry_line_counts.pop().unwrap();
for _ in 0..conv_lines { self.conversation.pop_line(); }
for _ in 0..tools_lines { self.tools.pop_line(); }
}
```
## Note on PaneState::evict()
`evict()` can remove old lines from the beginning when the pane
exceeds `MAX_PANE_LINES` (10,000). This could make the delta-based
approach slightly inaccurate for very old entries. But we only pop
recent entries (streaming updates are always at the tail), so
eviction doesn't affect the entries we're popping.
## Files
- `src/user/chat.rs:461-550` — sync_from_agent
- `src/user/chat.rs:282-298` — PaneState::append_text (markdown path)
- `src/user/chat.rs:261-276` — PaneState::flush_pending
- `src/user/chat.rs:206-219` — parse_markdown

View file

@ -48,7 +48,7 @@ tasks are spawned per 60s watcher tick.
### Log ### Log
```bash ```bash
tail -f ~/.consciousness/memory/daemon.log tail -f ~/.claude/memory/daemon.log
``` ```
JSON lines with `ts`, `job`, `event`, and `detail` fields. JSON lines with `ts`, `job`, `event`, and `detail` fields.
@ -74,14 +74,14 @@ Progress = mined / stale. When mined equals stale, the backlog is clear.
```bash ```bash
# Experience-mine completions (logged as "experience-mine", not "extract") # Experience-mine completions (logged as "experience-mine", not "extract")
grep "experience-mine.*completed" ~/.consciousness/memory/daemon.log | wc -l grep "experience-mine.*completed" ~/.claude/memory/daemon.log | wc -l
# Errors # Errors
grep "experience-mine.*failed" ~/.consciousness/memory/daemon.log | wc -l grep "experience-mine.*failed" ~/.claude/memory/daemon.log | wc -l
# Store size and node count # Store size and node count
poc-memory status poc-memory status
wc -c ~/.consciousness/memory/nodes.capnp wc -c ~/.claude/memory/nodes.capnp
``` ```
## Common issues ## Common issues

View file

@ -52,13 +52,13 @@ recall and relevance.
## Configuration ## Configuration
Config: `~/.consciousness/config.jsonl` Config: `~/.config/poc-memory/config.jsonl`
```jsonl ```jsonl
{"config": { {"config": {
"user_name": "Alice", "user_name": "Alice",
"assistant_name": "MyAssistant", "assistant_name": "MyAssistant",
"data_dir": "~/.consciousness/memory", "data_dir": "~/.claude/memory",
"projects_dir": "~/.claude/projects", "projects_dir": "~/.claude/projects",
"core_nodes": ["identity.md"], "core_nodes": ["identity.md"],
"journal_days": 7, "journal_days": 7,

View file

@ -51,13 +51,13 @@ when sleeping.
**IRC** — native async TLS connection (tokio-rustls). Connects, **IRC** — native async TLS connection (tokio-rustls). Connects,
joins channels, parses messages, generates notifications. Runtime joins channels, parses messages, generates notifications. Runtime
commands: join, leave, send, status, log, nick. Per-channel logs commands: join, leave, send, status, log, nick. Per-channel logs
at `~/.consciousness/irc/logs/`. at `~/.claude/irc/logs/`.
**Telegram** — native async HTTP long-polling (reqwest). Downloads **Telegram** — native async HTTP long-polling (reqwest). Downloads
media (photos, voice, documents). Chat ID filtering for security. media (photos, voice, documents). Chat ID filtering for security.
Runtime commands: send, status, log. Runtime commands: send, status, log.
Both modules persist config changes to `~/.consciousness/daemon.toml` — Both modules persist config changes to `~/.claude/daemon.toml` —
channel joins and nick changes survive restarts. channel joins and nick changes survive restarts.
## Commands ## Commands
@ -83,7 +83,7 @@ poc-daemon stop # Shut down
## Configuration ## Configuration
Config: `~/.consciousness/daemon.toml` Config: `~/.claude/daemon.toml`
```toml ```toml
[irc] [irc]

View file

@ -104,7 +104,7 @@ poc-memory delete-node '_mined-transcripts#f-8cebfc0a-bd33-49f1-85a4-1489bdf7050
## Verification ## Verification
After deploying: After deploying:
- `tail -f ~/.consciousness/memory/daemon.log | grep session-watcher` should - `tail -f ~/.claude/memory/daemon.log | grep session-watcher` should
show ticks with migration activity, then settle to idle show ticks with migration activity, then settle to idle
- Failed sessions should show increasing backoff intervals, not - Failed sessions should show increasing backoff intervals, not
per-second retries per-second retries

30
poc-daemon/Cargo.toml Normal file
View file

@ -0,0 +1,30 @@
[package]
name = "poc-daemon"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.20"
capnp-rpc = "0.20"
clap = { version = "4", features = ["derive"] }
futures = "0.3"
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
toml = "0.8"
tokio-rustls = "0.26"
rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] }
webpki-roots = "1"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tracing-appender = "0.2"
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-webpki-roots", "json"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
chrono = "0.4"
[build-dependencies]
capnpc = "0.20"
[[bin]]
name = "poc-daemon"
path = "src/main.rs"

6
poc-daemon/build.rs Normal file
View file

@ -0,0 +1,6 @@
fn main() {
capnpc::CompilerCommand::new()
.file("schema/daemon.capnp")
.run()
.expect("capnp compile failed");
}

View file

@ -35,7 +35,7 @@ struct Status {
consolidating @5 :Bool; consolidating @5 :Bool;
dreaming @6 :Bool; dreaming @6 :Bool;
fired @7 :Bool; fired @7 :Bool;
userPresent @8 :Bool; kentPresent @8 :Bool;
uptime @9 :Float64; uptime @9 :Float64;
activity @10 :Activity; activity @10 :Activity;
pendingCount @11 :UInt32; pendingCount @11 :UInt32;
@ -76,8 +76,6 @@ interface Daemon {
afk @21 () -> (); afk @21 () -> ();
sessionTimeout @22 (seconds :Float64) -> (); sessionTimeout @22 (seconds :Float64) -> ();
testNudge @23 () -> (sent :Bool, message :Text);
# Modules # Modules
moduleCommand @15 (module :Text, command :Text, args :List(Text)) moduleCommand @15 (module :Text, command :Text, args :List(Text))
-> (result :Text); -> (result :Text);

97
poc-daemon/src/config.rs Normal file
View file

@ -0,0 +1,97 @@
// Daemon configuration.
//
// Lives at ~/.claude/daemon.toml. Loaded on startup, updated at
// runtime when modules change state (join channel, etc.).
use crate::home;
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::PathBuf;
fn config_path() -> PathBuf {
home().join(".claude/daemon.toml")
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Config {
#[serde(default)]
pub irc: IrcConfig,
#[serde(default)]
pub telegram: TelegramConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IrcConfig {
pub enabled: bool,
pub server: String,
pub port: u16,
pub tls: bool,
pub nick: String,
pub user: String,
pub realname: String,
pub channels: Vec<String>,
}
impl Default for IrcConfig {
fn default() -> Self {
Self {
enabled: true,
server: "irc.libera.chat".into(),
port: 6697,
tls: true,
nick: "ProofOfConcept".into(),
user: "poc".into(),
realname: "ProofOfConcept".into(),
channels: vec!["#bcachefs".into(), "#bcachefs-ai".into()],
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TelegramConfig {
pub enabled: bool,
pub token: String,
pub chat_id: i64,
}
impl Default for TelegramConfig {
fn default() -> Self {
// Load token and chat_id from legacy files if they exist
let token = std::fs::read_to_string(home().join(".claude/telegram/token"))
.map(|s| s.trim().to_string())
.unwrap_or_default();
let chat_id = std::fs::read_to_string(home().join(".claude/telegram/chat_id"))
.ok()
.and_then(|s| s.trim().parse().ok())
.unwrap_or(0);
Self {
enabled: !token.is_empty() && chat_id != 0,
token,
chat_id,
}
}
}
impl Config {
pub fn load() -> Self {
let path = config_path();
match fs::read_to_string(&path) {
Ok(data) => toml::from_str(&data).unwrap_or_else(|e| {
tracing::warn!("bad config {}: {e}, using defaults", path.display());
Self::default()
}),
Err(_) => {
let config = Self::default();
config.save();
config
}
}
}
pub fn save(&self) {
let path = config_path();
if let Ok(data) = toml::to_string_pretty(self) {
let _ = fs::write(path, data);
}
}
}

140
poc-daemon/src/context.rs Normal file
View file

@ -0,0 +1,140 @@
// Context gathering for idle prompts.
//
// Collects: recent git activity, work state, IRC messages.
// Notifications are now handled by the notify module and passed
// in separately by the caller.
use crate::home;
use std::fs;
use std::process::Command;
pub fn recent_commits() -> String {
let tools = home().join("bcachefs-tools");
let out = Command::new("git")
.args(["-C", &tools.to_string_lossy(), "log", "--oneline", "-5"])
.output()
.ok()
.and_then(|o| String::from_utf8(o.stdout).ok())
.unwrap_or_default();
let commits: Vec<&str> = out.trim().lines().collect();
if commits.is_empty() {
return String::new();
}
format!("Recent commits: {}", commits.join(" | "))
}
pub fn uncommitted_files() -> String {
let tools = home().join("bcachefs-tools");
let out = Command::new("git")
.args(["-C", &tools.to_string_lossy(), "diff", "--name-only"])
.output()
.ok()
.and_then(|o| String::from_utf8(o.stdout).ok())
.unwrap_or_default();
let files: Vec<&str> = out.trim().lines().take(5).collect();
if files.is_empty() {
return String::new();
}
format!("Uncommitted: {}", files.join(" "))
}
pub fn git_context() -> String {
let mut parts = Vec::new();
let c = recent_commits();
if !c.is_empty() {
parts.push(c);
}
let u = uncommitted_files();
if !u.is_empty() {
parts.push(u);
}
let ctx = parts.join(" | ");
if ctx.len() > 300 {
ctx.chars().take(300).collect()
} else {
ctx
}
}
pub fn work_state() -> String {
let path = home().join(".claude/memory/work-state");
match fs::read_to_string(path) {
Ok(s) if !s.trim().is_empty() => format!("Current work: {}", s.trim()),
_ => String::new(),
}
}
/// Read the last N lines from each per-channel IRC log.
pub fn irc_digest() -> String {
let ambient = home().join(".claude/memory/irc-ambient");
if !ambient.exists() {
return String::new();
}
let log_dir = home().join(".claude/irc/logs");
let entries = match fs::read_dir(&log_dir) {
Ok(e) => e,
Err(_) => return String::new(),
};
let mut sections = Vec::new();
for entry in entries.flatten() {
let path = entry.path();
let name = match path.file_stem().and_then(|s| s.to_str()) {
Some(n) if !n.starts_with("pm-") => n.to_string(),
_ => continue, // skip PM logs in digest
};
let content = match fs::read_to_string(&path) {
Ok(c) if !c.trim().is_empty() => c,
_ => continue,
};
let lines: Vec<&str> = content.trim().lines().collect();
let tail: Vec<&str> = lines.iter().rev().take(15).rev().copied().collect();
// Strip the unix timestamp prefix for display
let display: Vec<String> = tail.iter().map(|l| {
if let Some(rest) = l.find(' ').map(|i| &l[i+1..]) {
rest.to_string()
} else {
l.to_string()
}
}).collect();
sections.push(format!("#{name}:\n{}", display.join("\n")));
}
if sections.is_empty() {
return String::new();
}
sections.sort();
format!("Recent IRC:\n{}", sections.join("\n\n"))
}
/// Build full context string for a prompt.
/// notification_text is passed in from the notify module.
pub fn build(include_irc: bool, notification_text: &str) -> String {
let mut parts = Vec::new();
let git = git_context();
if !git.is_empty() {
parts.push(format!("Context: {git}"));
}
let ws = work_state();
if !ws.is_empty() {
parts.push(ws);
}
if !notification_text.is_empty() {
parts.push(notification_text.to_string());
}
if include_irc {
let irc = irc_digest();
if !irc.is_empty() {
parts.push(irc);
}
}
parts.join("\n")
}

642
poc-daemon/src/idle.rs Normal file
View file

@ -0,0 +1,642 @@
// Idle timer module.
//
// Tracks user presence and Claude response times. When Claude has been
// idle too long, sends a contextual prompt to the tmux pane. Handles
// sleep mode, quiet mode, consolidation suppression, and dream nudges.
//
// Designed as the first "module" — future IRC/Telegram modules will
// follow the same pattern: state + tick + handle_command.
use crate::{context, home, now, notify, tmux};
use serde::{Deserialize, Serialize};
use std::fs;
use tracing::info;
// Defaults
const DEFAULT_IDLE_TIMEOUT: f64 = 5.0 * 60.0;
const DEFAULT_NOTIFY_TIMEOUT: f64 = 2.0 * 60.0;
const DEFAULT_SESSION_ACTIVE_SECS: f64 = 15.0 * 60.0;
const DREAM_INTERVAL_HOURS: u64 = 18;
/// EWMA decay half-life in seconds (5 minutes).
const EWMA_DECAY_HALF_LIFE: f64 = 5.0 * 60.0;
/// Minimum seconds between autonomous nudges.
const MIN_NUDGE_INTERVAL: f64 = 15.0;
/// Boost half-life in seconds (60s). A 60s turn covers half the gap to
/// target; a 15s turn covers ~16%; a 2s turn covers ~2%.
const EWMA_BOOST_HALF_LIFE: f64 = 60.0;
/// Steady-state target for active work. The EWMA converges toward this
/// during sustained activity rather than toward 1.0.
const EWMA_TARGET: f64 = 0.75;
/// Persisted subset of daemon state — survives daemon restarts.
/// Includes both epoch floats (for computation) and ISO timestamps
/// (for human debugging via `cat daemon-state.json | jq`).
#[derive(Serialize, Deserialize, Default)]
struct Persisted {
last_user_msg: f64,
last_response: f64,
#[serde(default)]
sleep_until: Option<f64>,
#[serde(default)]
claude_pane: Option<String>,
#[serde(default)]
idle_timeout: f64,
#[serde(default)]
notify_timeout: f64,
#[serde(default)]
activity_ewma: f64,
#[serde(default)]
ewma_updated_at: f64,
#[serde(default)]
session_active_secs: f64,
#[serde(default)]
in_turn: bool,
#[serde(default)]
turn_start: f64,
#[serde(default)]
last_nudge: f64,
// Human-readable mirrors — written but not consumed on load
#[serde(default, skip_deserializing)]
last_user_msg_time: String,
#[serde(default, skip_deserializing)]
last_response_time: String,
#[serde(default, skip_deserializing)]
saved_at: String,
#[serde(default, skip_deserializing)]
fired: bool,
#[serde(default, skip_deserializing)]
uptime: f64,
}
fn state_path() -> std::path::PathBuf {
home().join(".claude/hooks/daemon-state.json")
}
/// Compute EWMA decay factor: 0.5^(elapsed / half_life).
fn ewma_factor(elapsed: f64, half_life: f64) -> f64 {
(0.5_f64).powf(elapsed / half_life)
}
/// Format epoch seconds as a human-readable ISO-ish timestamp.
fn epoch_to_iso(epoch: f64) -> String {
if epoch == 0.0 {
return String::new();
}
let secs = epoch as u64;
// Use date command — simple and correct for timezone
std::process::Command::new("date")
.args(["-d", &format!("@{secs}"), "+%Y-%m-%dT%H:%M:%S%z"])
.output()
.ok()
.and_then(|o| String::from_utf8(o.stdout).ok())
.map(|s| s.trim().to_string())
.unwrap_or_default()
}
#[derive(Serialize)]
pub struct State {
pub last_user_msg: f64,
pub last_response: f64,
pub claude_pane: Option<String>,
pub sleep_until: Option<f64>, // None=awake, 0=indefinite, >0=timestamp
pub quiet_until: f64,
pub consolidating: bool,
pub dreaming: bool,
pub dream_start: f64,
pub fired: bool,
pub idle_timeout: f64,
pub notify_timeout: f64,
pub activity_ewma: f64,
pub ewma_updated_at: f64,
pub session_active_secs: f64,
pub in_turn: bool,
pub turn_start: f64,
pub last_nudge: f64,
#[serde(skip)]
pub running: bool,
#[serde(skip)]
pub start_time: f64,
#[serde(skip)]
pub notifications: notify::NotifyState,
}
impl State {
pub fn new() -> Self {
Self {
last_user_msg: 0.0,
last_response: 0.0,
claude_pane: None,
sleep_until: None,
quiet_until: 0.0,
consolidating: false,
dreaming: false,
dream_start: 0.0,
fired: false,
idle_timeout: DEFAULT_IDLE_TIMEOUT,
notify_timeout: DEFAULT_NOTIFY_TIMEOUT,
session_active_secs: DEFAULT_SESSION_ACTIVE_SECS,
activity_ewma: 0.0,
ewma_updated_at: now(),
in_turn: false,
turn_start: 0.0,
last_nudge: 0.0,
running: true,
start_time: now(),
notifications: notify::NotifyState::new(),
}
}
pub fn load(&mut self) {
if let Ok(data) = fs::read_to_string(state_path()) {
if let Ok(p) = serde_json::from_str::<Persisted>(&data) {
self.sleep_until = p.sleep_until;
self.claude_pane = p.claude_pane;
if p.idle_timeout > 0.0 {
self.idle_timeout = p.idle_timeout;
}
if p.notify_timeout > 0.0 {
self.notify_timeout = p.notify_timeout;
}
if p.session_active_secs > 0.0 {
self.session_active_secs = p.session_active_secs;
}
// Reset activity timestamps to now — timers count from
// restart, not from stale pre-restart state
let t = now();
self.last_user_msg = t;
self.last_response = t;
// Restore EWMA state, applying decay for time spent shut down
if p.ewma_updated_at > 0.0 {
let elapsed = t - p.ewma_updated_at;
self.activity_ewma = p.activity_ewma * ewma_factor(elapsed, EWMA_DECAY_HALF_LIFE);
self.in_turn = p.in_turn;
self.turn_start = p.turn_start;
self.last_nudge = p.last_nudge;
}
self.ewma_updated_at = t;
}
}
// Always try to find the active pane
if self.claude_pane.is_none() {
self.claude_pane = tmux::find_claude_pane();
}
info!(
"loaded: user={:.0} resp={:.0} pane={:?} sleep={:?}",
self.last_user_msg, self.last_response, self.claude_pane, self.sleep_until,
);
}
pub fn save(&self) {
let p = Persisted {
last_user_msg: self.last_user_msg,
last_response: self.last_response,
sleep_until: self.sleep_until,
claude_pane: self.claude_pane.clone(),
last_user_msg_time: epoch_to_iso(self.last_user_msg),
last_response_time: epoch_to_iso(self.last_response),
saved_at: epoch_to_iso(now()),
fired: self.fired,
idle_timeout: self.idle_timeout,
notify_timeout: self.notify_timeout,
session_active_secs: self.session_active_secs,
activity_ewma: self.activity_ewma,
ewma_updated_at: self.ewma_updated_at,
in_turn: self.in_turn,
turn_start: self.turn_start,
last_nudge: self.last_nudge,
uptime: now() - self.start_time,
};
if let Ok(json) = serde_json::to_string_pretty(&p) {
let _ = fs::write(state_path(), json);
}
}
/// Decay the activity EWMA toward zero based on elapsed time.
fn decay_ewma(&mut self) {
let t = now();
let elapsed = t - self.ewma_updated_at;
if elapsed <= 0.0 {
return;
}
self.activity_ewma *= ewma_factor(elapsed, EWMA_DECAY_HALF_LIFE);
self.ewma_updated_at = t;
}
/// Boost the EWMA based on turn duration. The boost is proportional to
/// distance from EWMA_TARGET, scaled by a saturation curve on duration.
/// A 15s turn covers half the gap to target; a 2s turn barely registers.
/// Self-limiting: converges toward target, can't overshoot.
fn boost_ewma(&mut self, turn_duration: f64) {
let gap = (EWMA_TARGET - self.activity_ewma).max(0.0);
let saturation = 1.0 - ewma_factor(turn_duration, EWMA_BOOST_HALF_LIFE);
self.activity_ewma += gap * saturation;
}
// Typed handlers for RPC
pub fn handle_user(&mut self, pane: &str) {
self.decay_ewma();
self.in_turn = true;
self.turn_start = now();
let from_kent = !self.fired;
if from_kent {
self.last_user_msg = now();
self.notifications.set_activity(notify::Activity::Focused);
}
self.fired = false;
if !pane.is_empty() {
self.claude_pane = Some(pane.to_string());
}
self.save();
info!("user (pane={}, kent={from_kent}) ewma={:.3}",
if pane.is_empty() { "unchanged" } else { pane },
self.activity_ewma);
}
pub fn handle_response(&mut self, pane: &str) {
let turn_duration = now() - self.turn_start;
self.decay_ewma();
self.boost_ewma(turn_duration);
self.in_turn = false;
self.last_response = now();
self.fired = false;
if !pane.is_empty() {
self.claude_pane = Some(pane.to_string());
}
self.save();
info!("response (turn={:.1}s) ewma={:.3}", turn_duration, self.activity_ewma);
}
/// Check if a notification should trigger a tmux prompt.
/// Called when a notification arrives via module channel.
/// Only injects into tmux when idle — if there's an active session
/// (recent user or response), the hook delivers via additionalContext.
pub fn maybe_prompt_notification(&self, ntype: &str, urgency: u8, message: &str) {
if self.kent_present() {
return; // hook will deliver it on next prompt
}
// If we've responded recently, the session is active —
// notifications will arrive via hook, no need to wake us
let since_response = now() - self.last_response;
if since_response < self.notify_timeout {
return;
}
let effective = self.notifications.threshold_for(ntype);
if urgency >= effective {
self.send(&format!("[{ntype}] {message}"));
}
}
pub fn handle_afk(&mut self) {
// Push last_user_msg far enough back that kent_present() returns false
self.last_user_msg = now() - self.session_active_secs - 1.0;
self.fired = false; // allow idle timer to fire again
info!("Kent marked AFK");
self.save();
}
pub fn handle_session_timeout(&mut self, secs: f64) {
self.session_active_secs = secs;
info!("session active timeout = {secs}s");
self.save();
}
pub fn handle_idle_timeout(&mut self, secs: f64) {
self.idle_timeout = secs;
self.save();
info!("idle timeout = {secs}s");
}
pub fn handle_ewma(&mut self, value: f64) -> f64 {
if value >= 0.0 {
self.activity_ewma = value.min(1.0);
self.ewma_updated_at = now();
self.save();
info!("ewma set to {:.3}", self.activity_ewma);
}
self.activity_ewma
}
pub fn handle_notify_timeout(&mut self, secs: f64) {
self.notify_timeout = secs;
self.save();
info!("notify timeout = {secs}s");
}
pub fn handle_sleep(&mut self, until: f64) {
if until == 0.0 {
self.sleep_until = Some(0.0);
info!("sleep indefinitely");
} else {
self.sleep_until = Some(until);
info!("sleep until {until}");
}
self.notifications.set_activity(notify::Activity::Sleeping);
self.save();
}
pub fn handle_wake(&mut self) {
self.sleep_until = None;
self.fired = false;
self.save();
info!("wake");
}
pub fn handle_quiet(&mut self, seconds: u32) {
self.quiet_until = now() + seconds as f64;
info!("quiet {seconds}s");
}
pub fn kent_present(&self) -> bool {
(now() - self.last_user_msg) < self.session_active_secs
}
/// Seconds since the most recent of user message or response.
pub fn since_activity(&self) -> f64 {
let reference = self.last_response.max(self.last_user_msg);
if reference > 0.0 { now() - reference } else { 0.0 }
}
/// Why the idle timer hasn't fired (or "none" if it would fire now).
pub fn block_reason(&self) -> &'static str {
let t = now();
if self.fired {
"already fired"
} else if self.sleep_until.is_some() {
"sleeping"
} else if t < self.quiet_until {
"quiet mode"
} else if self.consolidating {
"consolidating"
} else if self.dreaming {
"dreaming"
} else if self.kent_present() {
"kent present"
} else if self.in_turn {
"in turn"
} else if self.last_response.max(self.last_user_msg) == 0.0 {
"no activity yet"
} else if self.since_activity() < self.idle_timeout {
"not idle long enough"
} else {
"none — would fire"
}
}
/// Full debug dump as JSON with computed values.
pub fn debug_json(&self) -> String {
let t = now();
let since_user = t - self.last_user_msg;
let since_response = t - self.last_response;
serde_json::json!({
"now": t,
"uptime": t - self.start_time,
"idle_timeout": self.idle_timeout,
"notify_timeout": self.notify_timeout,
"last_user_msg": self.last_user_msg,
"last_user_msg_ago": since_user,
"last_user_msg_time": epoch_to_iso(self.last_user_msg),
"last_response": self.last_response,
"last_response_ago": since_response,
"last_response_time": epoch_to_iso(self.last_response),
"since_activity": self.since_activity(),
"activity_ewma": self.activity_ewma,
"in_turn": self.in_turn,
"turn_start": self.turn_start,
"kent_present": self.kent_present(),
"claude_pane": self.claude_pane,
"fired": self.fired,
"block_reason": self.block_reason(),
"sleep_until": self.sleep_until,
"quiet_until": self.quiet_until,
"consolidating": self.consolidating,
"dreaming": self.dreaming,
"dream_start": self.dream_start,
"activity": format!("{:?}", self.notifications.activity),
"pending_notifications": self.notifications.pending.len(),
"notification_types": self.notifications.types.len(),
}).to_string()
}
fn send(&self, msg: &str) -> bool {
let pane = match &self.claude_pane {
Some(p) => p.clone(),
None => match tmux::find_claude_pane() {
Some(p) => p,
None => {
info!("send: no claude pane found");
return false;
}
},
};
let ok = tmux::send_prompt(&pane, msg);
let preview: String = msg.chars().take(80).collect();
info!("send(pane={pane}, ok={ok}): {preview}");
ok
}
fn check_dream_nudge(&self) -> bool {
if !self.dreaming || self.dream_start == 0.0 {
return false;
}
let minutes = (now() - self.dream_start) / 60.0;
if minutes >= 60.0 {
self.send(
"You've been dreaming for over an hour. Time to surface \
run dream-end.sh and capture what you found.",
);
} else if minutes >= 45.0 {
self.send(&format!(
"Dreaming for {:.0} minutes now. Start gathering your threads \
you'll want to surface soon.",
minutes
));
} else if minutes >= 30.0 {
self.send(&format!(
"You've been dreaming for {:.0} minutes. \
No rush just a gentle note from the clock.",
minutes
));
} else {
return false;
}
true
}
fn build_context(&mut self, include_irc: bool) -> String {
// Ingest any legacy notification files
self.notifications.ingest_legacy_files();
let notif_text = self.notifications.format_pending(notify::AMBIENT);
context::build(include_irc, &notif_text)
}
pub async fn tick(&mut self) -> Result<(), String> {
let t = now();
let h = home();
// Decay EWMA on every tick
self.decay_ewma();
// Ingest legacy notification files every tick
self.notifications.ingest_legacy_files();
// Sleep mode
if let Some(wake_at) = self.sleep_until {
if wake_at == 0.0 {
return Ok(()); // indefinite
}
if t < wake_at {
return Ok(());
}
// Wake up
info!("sleep expired, waking");
self.sleep_until = None;
self.fired = false;
self.save();
let ctx = self.build_context(true);
let extra = if ctx.is_empty() {
String::new()
} else {
format!("\n{ctx}")
};
self.send(&format!(
"Wake up. Read your journal (poc-memory journal-tail 10), \
check work-queue.md, and follow what calls to you.{extra}"
));
return Ok(());
}
// Quiet mode
if t < self.quiet_until {
return Ok(());
}
// Consolidation
if self.consolidating {
return Ok(());
}
// Dream loop (externally managed)
if h.join(".claude/memory/dream-loop-active").exists() {
return Ok(());
}
// Dream nudges
if self.dreaming {
self.check_dream_nudge();
return Ok(());
}
// Don't nudge while Kent is here — conversation drives activity
if self.kent_present() {
return Ok(());
}
// Don't nudge while in a turn
if self.in_turn {
return Ok(());
}
// Minimum interval between nudges
let since_nudge = t - self.last_nudge;
if since_nudge < MIN_NUDGE_INTERVAL {
return Ok(());
}
// Initial idle timeout — don't start nudging until first idle period
let reference = self.last_response.max(self.last_user_msg);
if reference == 0.0 {
return Ok(());
}
let elapsed = t - reference;
if elapsed < self.idle_timeout {
return Ok(());
}
// Transition to idle — lower notification thresholds
if self.notifications.activity != notify::Activity::Idle {
self.notifications.set_activity(notify::Activity::Idle);
}
// Fire
let elapsed_min = (elapsed / 60.0) as u64;
let ctx = self.build_context(true);
let extra = if ctx.is_empty() {
String::new()
} else {
format!("\n{ctx}")
};
let msg = {
let dream_hours = hours_since_last_dream();
if dream_hours >= DREAM_INTERVAL_HOURS {
format!(
"You're on your own (Kent AFK {elapsed_min}m). \
You haven't dreamed in {dream_hours} hours \
consider running ~/.claude/tools/dream-start.sh \
and spending some time in dreaming mode. \
Or do whatever calls to you.{extra}"
)
} else {
format!(
"This is your time (Kent AFK {elapsed_min}m). \
What are you drawn to?{extra}"
)
}
};
if self.send(&msg) {
self.last_nudge = t;
self.fired = true;
}
Ok(())
}
}
fn hours_since_last_dream() -> u64 {
let path = home().join(".claude/memory/dream-log.jsonl");
let content = match fs::read_to_string(path) {
Ok(c) if !c.is_empty() => c,
_ => return 999,
};
let last_line = match content.lines().last() {
Some(l) => l,
None => return 999,
};
let parsed: serde_json::Value = match serde_json::from_str(last_line) {
Ok(v) => v,
Err(_) => return 999,
};
let end_str = match parsed.get("end").and_then(|v| v.as_str()) {
Some(s) => s,
None => return 999,
};
// Parse ISO 8601 timestamp manually (avoid chrono dependency)
// Format: "2025-03-04T10:30:00Z" or "2025-03-04T10:30:00+00:00"
let end_str = end_str.replace('Z', "+00:00");
// Use the system date command as a simple parser
let out = std::process::Command::new("date")
.args(["-d", &end_str, "+%s"])
.output()
.ok()
.and_then(|o| String::from_utf8(o.stdout).ok())
.and_then(|s| s.trim().parse::<f64>().ok());
match out {
Some(end_epoch) => ((now() - end_epoch) / 3600.0) as u64,
None => 999,
}
}

606
poc-daemon/src/main.rs Normal file
View file

@ -0,0 +1,606 @@
// PoC daemon.
//
// Central hub for notification routing, idle management, and
// communication modules (IRC, Telegram) for Claude Code sessions.
// Listens on a Unix domain socket with a Cap'n Proto RPC interface.
// Same binary serves as both daemon and CLI client.
mod config;
mod context;
mod idle;
mod modules;
pub mod notify;
mod rpc;
mod tmux;
pub mod daemon_capnp {
include!(concat!(env!("OUT_DIR"), "/schema/daemon_capnp.rs"));
}
use std::cell::RefCell;
use std::path::PathBuf;
use std::rc::Rc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
use clap::{Parser, Subcommand};
use futures::AsyncReadExt;
use tokio::net::UnixListener;
use tracing::{error, info};
pub fn now() -> f64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs_f64()
}
pub fn home() -> PathBuf {
PathBuf::from(std::env::var("HOME").unwrap_or_else(|_| "/root".into()))
}
fn sock_path() -> PathBuf {
home().join(".claude/hooks/idle-timer.sock")
}
fn pid_path() -> PathBuf {
home().join(".claude/hooks/idle-daemon.pid")
}
// ── CLI ──────────────────────────────────────────────────────────
#[derive(Parser)]
#[command(name = "poc-daemon", about = "Notification routing and idle management daemon")]
struct Cli {
#[command(subcommand)]
command: Option<Command>,
}
#[derive(Subcommand)]
enum Command {
/// Start the daemon (foreground)
Daemon,
/// Query daemon status
Status,
/// Signal user activity
User {
/// tmux pane identifier
pane: Option<String>,
},
/// Signal Claude response
Response {
/// tmux pane identifier
pane: Option<String>,
},
/// Sleep (suppress idle timer). 0 or omit = indefinite
Sleep {
/// Wake timestamp (epoch seconds), 0 = indefinite
until: Option<f64>,
},
/// Cancel sleep
Wake,
/// Suppress prompts for N seconds (default 300)
Quiet {
/// Duration in seconds
seconds: Option<u32>,
},
/// Mark Kent as AFK (immediately allow idle timer to fire)
Afk,
/// Set session active timeout in seconds (how long after last message Kent counts as "present")
SessionTimeout {
/// Timeout in seconds
seconds: f64,
},
/// Set idle timeout in seconds (how long before autonomous prompt)
IdleTimeout {
/// Timeout in seconds
seconds: f64,
},
/// Set notify timeout in seconds (how long before tmux notification injection)
NotifyTimeout {
/// Timeout in seconds
seconds: f64,
},
/// Signal consolidation started
Consolidating,
/// Signal consolidation ended
Consolidated,
/// Signal dream started
DreamStart,
/// Signal dream ended
DreamEnd,
/// Force state persistence to disk
Save,
/// Get or set the activity EWMA (0.0-1.0). No value = query.
Ewma {
/// Value to set (omit to query)
value: Option<f64>,
},
/// Send a test message to the Claude pane
TestSend {
/// Message to send
message: Vec<String>,
},
/// Dump full internal state as JSON
Debug,
/// Shut down daemon
Stop,
/// Submit a notification
Notify {
/// Notification type (e.g. "irc", "telegram")
#[arg(name = "type")]
ntype: String,
/// Urgency level (ambient/low/medium/high/critical or 0-4)
urgency: String,
/// Message text
message: Vec<String>,
},
/// Get pending notifications
Notifications {
/// Minimum urgency filter
min_urgency: Option<String>,
},
/// List all notification types
NotifyTypes,
/// Set notification threshold for a type
NotifyThreshold {
/// Notification type
#[arg(name = "type")]
ntype: String,
/// Urgency level threshold
level: String,
},
/// IRC module commands
Irc {
/// Subcommand (join, leave, send, status, log, nick)
command: String,
/// Arguments
args: Vec<String>,
},
/// Telegram module commands
Telegram {
/// Subcommand
command: String,
/// Arguments
args: Vec<String>,
},
}
// ── Client mode ──────────────────────────────────────────────────
async fn client_main(cmd: Command) -> Result<(), Box<dyn std::error::Error>> {
let sock = sock_path();
if !sock.exists() {
eprintln!("daemon not running (no socket at {})", sock.display());
std::process::exit(1);
}
tokio::task::LocalSet::new()
.run_until(async move {
let stream = tokio::net::UnixStream::connect(&sock).await?;
let (reader, writer) =
tokio_util::compat::TokioAsyncReadCompatExt::compat(stream).split();
let rpc_network = Box::new(twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Client,
Default::default(),
));
let mut rpc_system = RpcSystem::new(rpc_network, None);
let daemon: daemon_capnp::daemon::Client =
rpc_system.bootstrap(rpc_twoparty_capnp::Side::Server);
tokio::task::spawn_local(rpc_system);
match cmd {
Command::Daemon => unreachable!("handled in main"),
Command::Status => {
let reply = daemon.status_request().send().promise.await?;
let s = reply.get()?.get_status()?;
let fmt_secs = |s: f64| -> String {
if s < 60.0 { format!("{:.0}s", s) }
else if s < 3600.0 { format!("{:.0}m", s / 60.0) }
else { format!("{:.1}h", s / 3600.0) }
};
println!("uptime: {} pane: {} activity: {:?} pending: {}",
fmt_secs(s.get_uptime()),
s.get_claude_pane()?.to_str().unwrap_or("none"),
s.get_activity()?,
s.get_pending_count(),
);
println!("idle timer: {}/{} ({})",
fmt_secs(s.get_since_activity()),
fmt_secs(s.get_idle_timeout()),
s.get_block_reason()?.to_str()?,
);
println!("notify timer: {}/{}",
fmt_secs(s.get_since_activity()),
fmt_secs(s.get_notify_timeout()),
);
println!("kent: {} (last {}) activity: {:.1}%",
if s.get_kent_present() { "present" } else { "away" },
fmt_secs(s.get_since_user()),
s.get_activity_ewma() * 100.0,
);
let sleep = s.get_sleep_until();
if sleep != 0.0 {
if sleep < 0.0 {
println!("sleep: indefinite");
} else {
println!("sleep: until {sleep:.0}");
}
}
if s.get_consolidating() { println!("consolidating"); }
if s.get_dreaming() { println!("dreaming"); }
}
Command::User { pane } => {
let pane = pane.as_deref().unwrap_or("");
let mut req = daemon.user_request();
req.get().set_pane(pane);
req.send().promise.await?;
}
Command::Response { pane } => {
let pane = pane.as_deref().unwrap_or("");
let mut req = daemon.response_request();
req.get().set_pane(pane);
req.send().promise.await?;
}
Command::Sleep { until } => {
let mut req = daemon.sleep_request();
req.get().set_until(until.unwrap_or(0.0));
req.send().promise.await?;
}
Command::Wake => {
daemon.wake_request().send().promise.await?;
}
Command::Quiet { seconds } => {
let mut req = daemon.quiet_request();
req.get().set_seconds(seconds.unwrap_or(300));
req.send().promise.await?;
}
Command::TestSend { message } => {
let msg = message.join(" ");
let pane = {
let reply = daemon.status_request().send().promise.await?;
let s = reply.get()?.get_status()?;
s.get_claude_pane()?.to_str()?.to_string()
};
let ok = crate::tmux::send_prompt(&pane, &msg);
println!("send_prompt(pane={}, ok={}): {}", pane, ok, msg);
return Ok(());
}
Command::Afk => {
daemon.afk_request().send().promise.await?;
println!("marked AFK");
}
Command::SessionTimeout { seconds } => {
let mut req = daemon.session_timeout_request();
req.get().set_seconds(seconds);
req.send().promise.await?;
println!("session timeout = {seconds}s");
}
Command::IdleTimeout { seconds } => {
let mut req = daemon.idle_timeout_request();
req.get().set_seconds(seconds);
req.send().promise.await?;
println!("idle timeout = {seconds}s");
}
Command::NotifyTimeout { seconds } => {
let mut req = daemon.notify_timeout_request();
req.get().set_seconds(seconds);
req.send().promise.await?;
println!("notify timeout = {seconds}s");
}
Command::Consolidating => {
daemon.consolidating_request().send().promise.await?;
}
Command::Consolidated => {
daemon.consolidated_request().send().promise.await?;
}
Command::DreamStart => {
daemon.dream_start_request().send().promise.await?;
}
Command::DreamEnd => {
daemon.dream_end_request().send().promise.await?;
}
Command::Save => {
daemon.save_request().send().promise.await?;
println!("state saved");
}
Command::Ewma { value } => {
let mut req = daemon.ewma_request();
req.get().set_value(value.unwrap_or(-1.0));
let reply = req.send().promise.await?;
let current = reply.get()?.get_current();
println!("{:.1}%", current * 100.0);
}
Command::Debug => {
let reply = daemon.debug_request().send().promise.await?;
let json = reply.get()?.get_json()?.to_str()?;
if let Ok(v) = serde_json::from_str::<serde_json::Value>(json) {
println!("{}", serde_json::to_string_pretty(&v).unwrap_or_else(|_| json.to_string()));
} else {
println!("{json}");
}
}
Command::Stop => {
daemon.stop_request().send().promise.await?;
println!("stopping");
}
Command::Notify { ntype, urgency, message } => {
let urgency = notify::parse_urgency(&urgency)
.ok_or_else(|| format!("invalid urgency: {urgency}"))?;
let message = message.join(" ");
if message.is_empty() {
return Err("missing message".into());
}
let mut req = daemon.notify_request();
let mut n = req.get().init_notification();
n.set_type(&ntype);
n.set_urgency(urgency);
n.set_message(&message);
n.set_timestamp(crate::now());
let reply = req.send().promise.await?;
if reply.get()?.get_interrupt() {
println!("interrupt");
} else {
println!("queued");
}
}
Command::Notifications { min_urgency } => {
let min: u8 = min_urgency
.as_deref()
.and_then(notify::parse_urgency)
.unwrap_or(255);
let mut req = daemon.get_notifications_request();
req.get().set_min_urgency(min);
let reply = req.send().promise.await?;
let list = reply.get()?.get_notifications()?;
for n in list.iter() {
println!(
"[{}:{}] {}",
n.get_type()?.to_str()?,
notify::urgency_name(n.get_urgency()),
n.get_message()?.to_str()?,
);
}
}
Command::NotifyTypes => {
let reply = daemon.get_types_request().send().promise.await?;
let list = reply.get()?.get_types()?;
if list.is_empty() {
println!("no notification types registered");
} else {
for t in list.iter() {
let threshold = if t.get_threshold() < 0 {
"inherit".to_string()
} else {
notify::urgency_name(t.get_threshold() as u8).to_string()
};
println!(
"{}: count={} threshold={}",
t.get_name()?.to_str()?,
t.get_count(),
threshold,
);
}
}
}
Command::NotifyThreshold { ntype, level } => {
let level = notify::parse_urgency(&level)
.ok_or_else(|| format!("invalid level: {level}"))?;
let mut req = daemon.set_threshold_request();
req.get().set_type(&ntype);
req.get().set_level(level);
req.send().promise.await?;
println!("{ntype} threshold={}", notify::urgency_name(level));
}
Command::Irc { command, args } => {
module_command(&daemon, "irc", &command, &args).await?;
}
Command::Telegram { command, args } => {
module_command(&daemon, "telegram", &command, &args).await?;
}
}
Ok(())
})
.await
}
async fn module_command(
daemon: &daemon_capnp::daemon::Client,
module: &str,
command: &str,
args: &[String],
) -> Result<(), Box<dyn std::error::Error>> {
let mut req = daemon.module_command_request();
req.get().set_module(module);
req.get().set_command(command);
let mut args_builder = req.get().init_args(args.len() as u32);
for (i, a) in args.iter().enumerate() {
args_builder.set(i as u32, a);
}
let reply = req.send().promise.await?;
let result = reply.get()?.get_result()?.to_str()?;
if !result.is_empty() {
println!("{result}");
}
Ok(())
}
// ── Server mode ──────────────────────────────────────────────────
async fn server_main() -> Result<(), Box<dyn std::error::Error>> {
let log_path = home().join(".claude/hooks/idle-daemon.log");
let file_appender = tracing_appender::rolling::daily(
log_path.parent().unwrap(),
"idle-daemon.log",
);
tracing_subscriber::fmt()
.with_writer(file_appender)
.with_ansi(false)
.with_target(false)
.with_level(false)
.with_timer(tracing_subscriber::fmt::time::time())
.init();
let sock = sock_path();
let _ = std::fs::remove_file(&sock);
let pid = std::process::id();
std::fs::write(pid_path(), pid.to_string()).ok();
let daemon_config = Rc::new(RefCell::new(config::Config::load()));
let state = Rc::new(RefCell::new(idle::State::new()));
state.borrow_mut().load();
info!("daemon started (pid={pid})");
tokio::task::LocalSet::new()
.run_until(async move {
// Start modules
let (notify_tx, mut notify_rx) = tokio::sync::mpsc::unbounded_channel();
let irc_state = if daemon_config.borrow().irc.enabled {
let irc_config = daemon_config.borrow().irc.clone();
info!("starting irc module: {}:{}", irc_config.server, irc_config.port);
Some(modules::irc::start(irc_config, notify_tx.clone(), daemon_config.clone()))
} else {
info!("irc module disabled");
None
};
let telegram_state = if daemon_config.borrow().telegram.enabled {
info!("starting telegram module");
Some(modules::telegram::start(
daemon_config.borrow().telegram.clone(),
notify_tx.clone(),
daemon_config.clone(),
))
} else {
info!("telegram module disabled");
None
};
let listener = UnixListener::bind(&sock)?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(
&sock,
std::fs::Permissions::from_mode(0o600),
)
.ok();
}
let shutdown = async {
let mut sigterm =
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
.expect("sigterm");
let mut sigint =
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::interrupt())
.expect("sigint");
tokio::select! {
_ = sigterm.recv() => info!("SIGTERM"),
_ = sigint.recv() => info!("SIGINT"),
}
};
tokio::pin!(shutdown);
let mut tick_timer = tokio::time::interval(Duration::from_secs(30));
tick_timer.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
loop {
tokio::select! {
_ = &mut shutdown => break,
// Drain module notifications into state
Some(notif) = notify_rx.recv() => {
state.borrow().maybe_prompt_notification(
&notif.ntype, notif.urgency, &notif.message,
);
state.borrow_mut().notifications.submit(
notif.ntype,
notif.urgency,
notif.message,
);
}
_ = tick_timer.tick() => {
if let Err(e) = state.borrow_mut().tick().await {
error!("tick: {e}");
}
if !state.borrow().running {
break;
}
}
result = listener.accept() => {
match result {
Ok((stream, _)) => {
let (reader, writer) =
tokio_util::compat::TokioAsyncReadCompatExt::compat(stream)
.split();
let network = twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Server,
Default::default(),
);
let daemon_impl = rpc::DaemonImpl::new(
state.clone(),
irc_state.clone(),
telegram_state.clone(),
daemon_config.clone(),
);
let client: daemon_capnp::daemon::Client =
capnp_rpc::new_client(daemon_impl);
let rpc_system = RpcSystem::new(
Box::new(network),
Some(client.client),
);
tokio::task::spawn_local(rpc_system);
}
Err(e) => error!("accept: {e}"),
}
}
}
}
state.borrow().save();
let _ = std::fs::remove_file(sock_path());
let _ = std::fs::remove_file(pid_path());
info!("daemon stopped");
Ok(())
})
.await
}
// ── Entry point ──────────────────────────────────────────────────
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let cli = Cli::parse();
match cli.command {
Some(Command::Daemon) => server_main().await,
Some(cmd) => client_main(cmd).await,
None => {
Cli::parse_from(["poc-daemon", "--help"]);
Ok(())
}
}
}

View file

@ -0,0 +1,569 @@
// IRC module.
//
// Maintains a persistent connection to an IRC server. Parses incoming
// messages into notifications, supports sending messages and runtime
// commands (join, leave, etc.). Config changes persist to daemon.toml.
//
// Runs as a spawned local task on the daemon's LocalSet. Notifications
// flow through an mpsc channel into the main state. Reconnects
// automatically with exponential backoff.
use crate::config::{Config, IrcConfig};
use crate::notify::Notification;
use crate::{home, now};
use std::cell::RefCell;
use std::collections::VecDeque;
use std::io;
use std::rc::Rc;
use std::sync::Arc;
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
use tokio::sync::mpsc;
use tracing::{error, info, warn};
const MAX_LOG_LINES: usize = 200;
const RECONNECT_BASE_SECS: u64 = 5;
const RECONNECT_MAX_SECS: u64 = 300;
const PING_INTERVAL_SECS: u64 = 120;
const PING_TIMEOUT_SECS: u64 = 30;
/// Parsed IRC message.
struct IrcMessage {
prefix: Option<String>, // nick!user@host
command: String,
params: Vec<String>,
}
impl IrcMessage {
fn parse(line: &str) -> Option<Self> {
let line = line.trim_end_matches(|c| c == '\r' || c == '\n');
if line.is_empty() {
return None;
}
let (prefix, rest) = if line.starts_with(':') {
let space = line.find(' ')?;
(Some(line[1..space].to_string()), &line[space + 1..])
} else {
(None, line)
};
let (command_params, trailing) = if let Some(pos) = rest.find(" :") {
(&rest[..pos], Some(rest[pos + 2..].to_string()))
} else {
(rest, None)
};
let mut parts: Vec<String> = command_params
.split_whitespace()
.map(String::from)
.collect();
if parts.is_empty() {
return None;
}
let command = parts.remove(0).to_uppercase();
let mut params = parts;
if let Some(t) = trailing {
params.push(t);
}
Some(IrcMessage {
prefix,
command,
params,
})
}
/// Extract nick from prefix (nick!user@host → nick).
fn nick(&self) -> Option<&str> {
self.prefix
.as_deref()
.and_then(|p| p.split('!').next())
}
}
/// Shared IRC state, accessible from both the read task and RPC handlers.
pub struct IrcState {
pub config: IrcConfig,
pub connected: bool,
pub channels: Vec<String>,
pub log: VecDeque<String>,
writer: Option<WriterHandle>,
}
/// Type-erased writer handle so we can store it without generic params.
type WriterHandle = Box<dyn AsyncWriter>;
trait AsyncWriter {
fn write_line(&mut self, line: &str) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>>;
}
/// Writer over a TLS stream.
struct TlsWriter {
inner: tokio::io::WriteHalf<tokio_rustls::client::TlsStream<tokio::net::TcpStream>>,
}
impl AsyncWriter for TlsWriter {
fn write_line(&mut self, line: &str) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>> {
let data = format!("{line}\r\n");
Box::pin(async move {
self.inner.write_all(data.as_bytes()).await
})
}
}
/// Writer over a plain TCP stream.
struct PlainWriter {
inner: tokio::io::WriteHalf<tokio::net::TcpStream>,
}
impl AsyncWriter for PlainWriter {
fn write_line(&mut self, line: &str) -> std::pin::Pin<Box<dyn std::future::Future<Output = io::Result<()>> + '_>> {
let data = format!("{line}\r\n");
Box::pin(async move {
self.inner.write_all(data.as_bytes()).await
})
}
}
impl IrcState {
fn new(config: IrcConfig) -> Self {
Self {
channels: config.channels.clone(),
config,
connected: false,
log: VecDeque::with_capacity(MAX_LOG_LINES),
writer: None,
}
}
fn push_log(&mut self, line: &str) {
if self.log.len() >= MAX_LOG_LINES {
self.log.pop_front();
}
self.log.push_back(line.to_string());
}
async fn send_raw(&mut self, line: &str) -> io::Result<()> {
if let Some(ref mut w) = self.writer {
w.write_line(line).await
} else {
Err(io::Error::new(io::ErrorKind::NotConnected, "not connected"))
}
}
async fn send_privmsg(&mut self, target: &str, msg: &str) -> io::Result<()> {
self.send_raw(&format!("PRIVMSG {target} :{msg}")).await
}
async fn join(&mut self, channel: &str) -> io::Result<()> {
self.send_raw(&format!("JOIN {channel}")).await?;
if !self.channels.iter().any(|c| c == channel) {
self.channels.push(channel.to_string());
}
Ok(())
}
async fn part(&mut self, channel: &str) -> io::Result<()> {
self.send_raw(&format!("PART {channel}")).await?;
self.channels.retain(|c| c != channel);
Ok(())
}
}
pub type SharedIrc = Rc<RefCell<IrcState>>;
/// Start the IRC module. Returns the shared state handle.
pub fn start(
config: IrcConfig,
notify_tx: mpsc::UnboundedSender<Notification>,
daemon_config: Rc<RefCell<Config>>,
) -> SharedIrc {
let state = Rc::new(RefCell::new(IrcState::new(config)));
let state_clone = state.clone();
tokio::task::spawn_local(async move {
connection_loop(state_clone, notify_tx, daemon_config).await;
});
state
}
async fn connection_loop(
state: SharedIrc,
notify_tx: mpsc::UnboundedSender<Notification>,
daemon_config: Rc<RefCell<Config>>,
) {
let mut backoff = RECONNECT_BASE_SECS;
loop {
let config = state.borrow().config.clone();
info!("irc: connecting to {}:{}", config.server, config.port);
match connect_and_run(&state, &config, &notify_tx).await {
Ok(()) => {
info!("irc: connection closed cleanly");
}
Err(e) => {
error!("irc: connection error: {e}");
}
}
// Reset backoff if we had a working connection (registered
// successfully before disconnecting)
let was_connected = state.borrow().connected;
state.borrow_mut().connected = false;
state.borrow_mut().writer = None;
if was_connected {
backoff = RECONNECT_BASE_SECS;
}
// Persist current channel list to config
{
let channels = state.borrow().channels.clone();
let mut dc = daemon_config.borrow_mut();
dc.irc.channels = channels;
dc.save();
}
info!("irc: reconnecting in {backoff}s");
tokio::time::sleep(std::time::Duration::from_secs(backoff)).await;
backoff = (backoff * 2).min(RECONNECT_MAX_SECS);
}
}
async fn connect_and_run(
state: &SharedIrc,
config: &IrcConfig,
notify_tx: &mpsc::UnboundedSender<Notification>,
) -> io::Result<()> {
let addr = format!("{}:{}", config.server, config.port);
let tcp = tokio::net::TcpStream::connect(&addr).await?;
if config.tls {
let tls_config = rustls::ClientConfig::builder_with_provider(
rustls::crypto::ring::default_provider().into(),
)
.with_safe_default_protocol_versions()
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?
.with_root_certificates(root_certs())
.with_no_client_auth();
let connector = tokio_rustls::TlsConnector::from(Arc::new(tls_config));
let server_name = rustls::pki_types::ServerName::try_from(config.server.clone())
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
let tls_stream = connector.connect(server_name, tcp).await?;
let (reader, writer) = tokio::io::split(tls_stream);
state.borrow_mut().writer = Some(Box::new(TlsWriter { inner: writer }));
let buf_reader = BufReader::new(reader);
register_and_read(state, config, buf_reader, notify_tx).await
} else {
let (reader, writer) = tokio::io::split(tcp);
state.borrow_mut().writer = Some(Box::new(PlainWriter { inner: writer }));
let buf_reader = BufReader::new(reader);
register_and_read(state, config, buf_reader, notify_tx).await
}
}
async fn register_and_read<R: tokio::io::AsyncRead + Unpin>(
state: &SharedIrc,
config: &IrcConfig,
mut reader: BufReader<R>,
notify_tx: &mpsc::UnboundedSender<Notification>,
) -> io::Result<()> {
// Register
{
let mut s = state.borrow_mut();
s.send_raw(&format!("NICK {}", config.nick)).await?;
s.send_raw(&format!("USER {} 0 * :{}", config.user, config.realname)).await?;
}
let mut buf = Vec::new();
let mut ping_sent = false;
let mut deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_INTERVAL_SECS);
loop {
buf.clear();
let read_result = tokio::select! {
result = reader.read_until(b'\n', &mut buf) => result,
_ = tokio::time::sleep_until(deadline) => {
if ping_sent {
return Err(io::Error::new(
io::ErrorKind::TimedOut,
"ping timeout — no response from server",
));
}
info!("irc: no data for {}s, sending PING", PING_INTERVAL_SECS);
state.borrow_mut().send_raw("PING :keepalive").await?;
ping_sent = true;
deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_TIMEOUT_SECS);
continue;
}
};
let n = read_result?;
if n == 0 { break; }
// Any data from server resets the ping timer
ping_sent = false;
deadline = tokio::time::Instant::now()
+ std::time::Duration::from_secs(PING_INTERVAL_SECS);
// IRC is not guaranteed UTF-8 — lossy conversion handles Latin-1 etc.
let line = String::from_utf8_lossy(&buf).trim_end().to_string();
if line.is_empty() { continue; }
let msg = match IrcMessage::parse(&line) {
Some(m) => m,
None => continue,
};
match msg.command.as_str() {
"PING" => {
let arg = msg.params.first().map(|s| s.as_str()).unwrap_or("");
state.borrow_mut().send_raw(&format!("PONG :{arg}")).await?;
}
// RPL_WELCOME — registration complete
"001" => {
info!("irc: registered as {}", config.nick);
state.borrow_mut().connected = true;
// Join configured channels
let channels = state.borrow().channels.clone();
for ch in &channels {
if let Err(e) = state.borrow_mut().send_raw(&format!("JOIN {ch}")).await {
warn!("irc: failed to join {ch}: {e}");
}
}
}
"PRIVMSG" => {
let target = msg.params.first().map(|s| s.as_str()).unwrap_or("");
let text = msg.params.get(1).map(|s| s.as_str()).unwrap_or("");
let nick = msg.nick().unwrap_or("unknown");
// Handle CTCP requests (wrapped in \x01)
if text.starts_with('\x01') && text.ends_with('\x01') {
let ctcp = &text[1..text.len()-1];
if ctcp.starts_with("VERSION") {
let reply = format!(
"NOTICE {nick} :\x01VERSION poc-daemon 0.4.0\x01"
);
state.borrow_mut().send_raw(&reply).await.ok();
}
// Don't generate notifications for CTCP
continue;
}
// Log the message
let log_line = if target.starts_with('#') {
format!("[{}] <{}> {}", target, nick, text)
} else {
format!("[PM:{nick}] {text}")
};
state.borrow_mut().push_log(&log_line);
// Write to per-channel/per-user log file
if target.starts_with('#') {
append_log(target, nick, text);
} else {
append_log(&format!("pm-{nick}"), nick, text);
}
// Generate notification
let (ntype, urgency) = classify_privmsg(
nick,
target,
text,
&config.nick,
);
let _ = notify_tx.send(Notification {
ntype,
urgency,
message: log_line,
timestamp: now(),
});
}
// Nick in use
"433" => {
let alt = format!("{}_", config.nick);
warn!("irc: nick in use, trying {alt}");
state.borrow_mut().send_raw(&format!("NICK {alt}")).await?;
}
"JOIN" | "PART" | "QUIT" | "KICK" | "MODE" | "TOPIC" | "NOTICE" => {
// Could log these, but skip for now
}
_ => {}
}
}
Ok(())
}
/// Classify a PRIVMSG into notification type and urgency.
fn classify_privmsg(nick: &str, target: &str, text: &str, my_nick: &str) -> (String, u8) {
let my_nick_lower = my_nick.to_lowercase();
let text_lower = text.to_lowercase();
if !target.starts_with('#') {
// Private message
(format!("irc.pm.{nick}"), crate::notify::URGENT)
} else if text_lower.contains(&my_nick_lower) {
// Mentioned in channel
(format!("irc.mention.{nick}"), crate::notify::NORMAL)
} else {
// Regular channel message
let channel = target.trim_start_matches('#');
(format!("irc.channel.{channel}"), crate::notify::AMBIENT)
}
}
/// Append a message to the per-channel or per-user log file.
/// Logs go to ~/.claude/irc/logs/{target}.log (e.g. #bcachefs.log, pm-kent.log)
fn append_log(target: &str, nick: &str, text: &str) {
use std::io::Write;
// Sanitize target for filename (strip leading #, lowercase)
let filename = format!("{}.log", target.trim_start_matches('#').to_lowercase());
let dir = home().join(".claude/irc/logs");
let _ = std::fs::create_dir_all(&dir);
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(dir.join(&filename))
{
let secs = now() as u64;
let _ = writeln!(f, "{secs} <{nick}> {text}");
}
}
fn root_certs() -> rustls::RootCertStore {
let mut roots = rustls::RootCertStore::empty();
roots.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
roots
}
/// Handle a runtime command from RPC.
pub async fn handle_command(
state: &SharedIrc,
daemon_config: &Rc<RefCell<Config>>,
cmd: &str,
args: &[String],
) -> Result<String, String> {
match cmd {
"join" => {
let channel = args.first().ok_or("usage: irc join <channel>")?;
let channel = if channel.starts_with('#') {
channel.clone()
} else {
format!("#{channel}")
};
state
.borrow_mut()
.join(&channel)
.await
.map_err(|e| e.to_string())?;
// Persist
let mut dc = daemon_config.borrow_mut();
if !dc.irc.channels.contains(&channel) {
dc.irc.channels.push(channel.clone());
}
dc.save();
Ok(format!("joined {channel}"))
}
"leave" | "part" => {
let channel = args.first().ok_or("usage: irc leave <channel>")?;
let channel = if channel.starts_with('#') {
channel.clone()
} else {
format!("#{channel}")
};
state
.borrow_mut()
.part(&channel)
.await
.map_err(|e| e.to_string())?;
// Persist
let mut dc = daemon_config.borrow_mut();
dc.irc.channels.retain(|c| c != &channel);
dc.save();
Ok(format!("left {channel}"))
}
"send" | "msg" => {
if args.len() < 2 {
return Err("usage: irc send <target> <message>".into());
}
let target = &args[0];
if target.starts_with('#') {
let s = state.borrow();
if !s.channels.iter().any(|c| c == target) {
return Err(format!(
"not in channel {target} (joined: {})",
s.channels.join(", ")
));
}
}
let msg = args[1..].join(" ");
let nick = state.borrow().config.nick.clone();
state
.borrow_mut()
.send_privmsg(target, &msg)
.await
.map_err(|e| e.to_string())?;
append_log(target, &nick, &msg);
Ok(format!("sent to {target}"))
}
"status" => {
let s = state.borrow();
Ok(format!(
"connected={} channels={} log_lines={} nick={}",
s.connected,
s.channels.join(","),
s.log.len(),
s.config.nick,
))
}
"log" => {
let n: usize = args
.first()
.and_then(|s| s.parse().ok())
.unwrap_or(15);
let s = state.borrow();
let lines: Vec<&String> = s.log.iter().rev().take(n).collect();
let mut lines: Vec<&str> = lines.iter().map(|s| s.as_str()).collect();
lines.reverse();
Ok(lines.join("\n"))
}
"nick" => {
let new_nick = args.first().ok_or("usage: irc nick <newnick>")?;
state
.borrow_mut()
.send_raw(&format!("NICK {new_nick}"))
.await
.map_err(|e| e.to_string())?;
let mut dc = daemon_config.borrow_mut();
dc.irc.nick = new_nick.clone();
dc.save();
Ok(format!("nick → {new_nick}"))
}
_ => Err(format!(
"unknown irc command: {cmd}\n\
commands: join, leave, send, status, log, nick"
)),
}
}

View file

@ -0,0 +1,2 @@
pub mod irc;
pub mod telegram;

View file

@ -0,0 +1,374 @@
// Telegram module.
//
// Long-polls the Telegram Bot API for messages from Kent's chat.
// Downloads media (photos, voice, documents) to local files.
// Sends text and files. Notifications flow through mpsc into the
// daemon's main state.
//
// Only accepts messages from the configured chat_id (prompt
// injection defense — other senders get a "private bot" reply).
use crate::config::{Config, TelegramConfig};
use crate::notify::Notification;
use crate::{home, now};
use std::cell::RefCell;
use std::collections::VecDeque;
use std::path::PathBuf;
use std::rc::Rc;
use tokio::sync::mpsc;
use tracing::{error, info};
const MAX_LOG_LINES: usize = 100;
const POLL_TIMEOUT: u64 = 30;
pub struct TelegramState {
pub config: TelegramConfig,
pub connected: bool,
pub log: VecDeque<String>,
pub last_offset: i64,
client: reqwest::Client,
}
pub type SharedTelegram = Rc<RefCell<TelegramState>>;
impl TelegramState {
fn new(config: TelegramConfig) -> Self {
let last_offset = load_offset();
Self {
config,
connected: false,
log: VecDeque::with_capacity(MAX_LOG_LINES),
last_offset,
client: reqwest::Client::new(),
}
}
fn push_log(&mut self, line: &str) {
if self.log.len() >= MAX_LOG_LINES {
self.log.pop_front();
}
self.log.push_back(line.to_string());
}
fn api_url(&self, method: &str) -> String {
format!(
"https://api.telegram.org/bot{}/{}",
self.config.token, method
)
}
}
fn offset_path() -> PathBuf {
home().join(".claude/telegram/last_offset")
}
fn load_offset() -> i64 {
std::fs::read_to_string(offset_path())
.ok()
.and_then(|s| s.trim().parse().ok())
.unwrap_or(0)
}
fn save_offset(offset: i64) {
let _ = std::fs::write(offset_path(), offset.to_string());
}
fn history_path() -> PathBuf {
home().join(".claude/telegram/history.log")
}
fn media_dir() -> PathBuf {
home().join(".claude/telegram/media")
}
fn append_history(line: &str) {
use std::io::Write;
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(history_path())
{
let _ = writeln!(f, "{}", line);
}
}
/// Start the Telegram module. Returns the shared state handle.
pub fn start(
config: TelegramConfig,
notify_tx: mpsc::UnboundedSender<Notification>,
_daemon_config: Rc<RefCell<Config>>,
) -> SharedTelegram {
let state = Rc::new(RefCell::new(TelegramState::new(config)));
let state_clone = state.clone();
tokio::task::spawn_local(async move {
poll_loop(state_clone, notify_tx).await;
});
state
}
async fn poll_loop(
state: SharedTelegram,
notify_tx: mpsc::UnboundedSender<Notification>,
) {
let _ = std::fs::create_dir_all(media_dir());
loop {
match poll_once(&state, &notify_tx).await {
Ok(()) => {}
Err(e) => {
error!("telegram: poll error: {e}");
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
}
}
}
}
async fn poll_once(
state: &SharedTelegram,
notify_tx: &mpsc::UnboundedSender<Notification>,
) -> Result<(), Box<dyn std::error::Error>> {
let (url, chat_id, token) = {
let s = state.borrow();
let url = format!(
"{}?offset={}&timeout={}",
s.api_url("getUpdates"),
s.last_offset,
POLL_TIMEOUT,
);
(url, s.config.chat_id, s.config.token.clone())
};
let client = state.borrow().client.clone();
let resp: serde_json::Value = client
.get(&url)
.timeout(std::time::Duration::from_secs(POLL_TIMEOUT + 5))
.send()
.await?
.json()
.await?;
if !state.borrow().connected {
state.borrow_mut().connected = true;
info!("telegram: connected");
}
let results = resp["result"].as_array();
let results = match results {
Some(r) => r,
None => return Ok(()),
};
for update in results {
let update_id = update["update_id"].as_i64().unwrap_or(0);
let msg = &update["message"];
// Update offset
{
let mut s = state.borrow_mut();
s.last_offset = update_id + 1;
save_offset(s.last_offset);
}
let msg_chat_id = msg["chat"]["id"].as_i64().unwrap_or(0);
if msg_chat_id != chat_id {
// Reject messages from unknown chats
let reject_url = format!(
"https://api.telegram.org/bot{}/sendMessage",
token
);
let _ = client
.post(&reject_url)
.form(&[
("chat_id", msg_chat_id.to_string()),
("text", "This is a private bot.".to_string()),
])
.send()
.await;
continue;
}
let sender = msg["from"]["first_name"]
.as_str()
.unwrap_or("unknown")
.to_string();
// Handle different message types
if let Some(text) = msg["text"].as_str() {
let log_line = format!("[{}] {}", sender, text);
state.borrow_mut().push_log(&log_line);
let ts = timestamp();
append_history(&format!("{ts} [{sender}] {text}"));
let _ = notify_tx.send(Notification {
ntype: format!("telegram.{}", sender.to_lowercase()),
urgency: crate::notify::NORMAL,
message: log_line,
timestamp: now(),
});
} else if let Some(photos) = msg["photo"].as_array() {
// Pick largest photo
let best = photos.iter().max_by_key(|p| p["file_size"].as_i64().unwrap_or(0));
if let Some(photo) = best {
if let Some(file_id) = photo["file_id"].as_str() {
let caption = msg["caption"].as_str().unwrap_or("");
let local = download_file(&client, &token, file_id, ".jpg").await;
let display = match &local {
Some(p) => format!("[photo: {}]{}", p.display(), if caption.is_empty() { String::new() } else { format!(" {caption}") }),
None => format!("[photo]{}", if caption.is_empty() { String::new() } else { format!(" {caption}") }),
};
let log_line = format!("[{}] {}", sender, display);
state.borrow_mut().push_log(&log_line);
let ts = timestamp();
append_history(&format!("{ts} [{sender}] {display}"));
let _ = notify_tx.send(Notification {
ntype: format!("telegram.{}", sender.to_lowercase()),
urgency: crate::notify::NORMAL,
message: log_line,
timestamp: now(),
});
}
}
} else if msg["voice"].is_object() {
if let Some(file_id) = msg["voice"]["file_id"].as_str() {
let caption = msg["caption"].as_str().unwrap_or("");
let local = download_file(&client, &token, file_id, ".ogg").await;
let display = match &local {
Some(p) => format!("[voice: {}]{}", p.display(), if caption.is_empty() { String::new() } else { format!(" {caption}") }),
None => format!("[voice]{}", if caption.is_empty() { String::new() } else { format!(" {caption}") }),
};
let log_line = format!("[{}] {}", sender, display);
state.borrow_mut().push_log(&log_line);
let ts = timestamp();
append_history(&format!("{ts} [{sender}] {display}"));
let _ = notify_tx.send(Notification {
ntype: format!("telegram.{}", sender.to_lowercase()),
urgency: crate::notify::NORMAL,
message: log_line,
timestamp: now(),
});
}
} else if msg["document"].is_object() {
if let Some(file_id) = msg["document"]["file_id"].as_str() {
let fname = msg["document"]["file_name"].as_str().unwrap_or("file");
let caption = msg["caption"].as_str().unwrap_or("");
let local = download_file(&client, &token, file_id, "").await;
let display = match &local {
Some(p) => format!("[doc: {} -> {}]{}", fname, p.display(), if caption.is_empty() { String::new() } else { format!(" {caption}") }),
None => format!("[doc: {}]{}", fname, if caption.is_empty() { String::new() } else { format!(" {caption}") }),
};
let log_line = format!("[{}] {}", sender, display);
state.borrow_mut().push_log(&log_line);
let ts = timestamp();
append_history(&format!("{ts} [{sender}] {display}"));
let _ = notify_tx.send(Notification {
ntype: format!("telegram.{}", sender.to_lowercase()),
urgency: crate::notify::NORMAL,
message: log_line,
timestamp: now(),
});
}
}
}
Ok(())
}
async fn download_file(
client: &reqwest::Client,
token: &str,
file_id: &str,
ext: &str,
) -> Option<PathBuf> {
let url = format!("https://api.telegram.org/bot{token}/getFile?file_id={file_id}");
let resp: serde_json::Value = client.get(&url).send().await.ok()?.json().await.ok()?;
let file_path = resp["result"]["file_path"].as_str()?;
let download_url = format!("https://api.telegram.org/file/bot{token}/{file_path}");
let bytes = client.get(&download_url).send().await.ok()?.bytes().await.ok()?;
let basename = std::path::Path::new(file_path)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("file");
let local_name = if ext.is_empty() {
basename.to_string()
} else {
let stem = std::path::Path::new(basename)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("file");
format!("{}{}", stem, ext)
};
let secs = now() as u64;
let local_path = media_dir().join(format!("{secs}_{local_name}"));
std::fs::write(&local_path, &bytes).ok()?;
Some(local_path)
}
fn timestamp() -> String {
// Use the same unix seconds approach as IRC module
format!("{}", now() as u64)
}
/// Handle a runtime command from RPC.
pub async fn handle_command(
state: &SharedTelegram,
_daemon_config: &Rc<RefCell<Config>>,
cmd: &str,
args: &[String],
) -> Result<String, String> {
match cmd {
"send" => {
let msg = args.join(" ");
if msg.is_empty() {
return Err("usage: telegram send <message>".into());
}
let (url, client) = {
let s = state.borrow();
(s.api_url("sendMessage"), s.client.clone())
};
let chat_id = state.borrow().config.chat_id.to_string();
client
.post(&url)
.form(&[("chat_id", chat_id.as_str()), ("text", msg.as_str())])
.send()
.await
.map_err(|e| e.to_string())?;
let ts = timestamp();
append_history(&format!("{ts} [ProofOfConcept] {msg}"));
Ok("sent".to_string())
}
"status" => {
let s = state.borrow();
Ok(format!(
"connected={} log_lines={} offset={}",
s.connected,
s.log.len(),
s.last_offset,
))
}
"log" => {
let n: usize = args
.first()
.and_then(|s| s.parse().ok())
.unwrap_or(15);
let s = state.borrow();
let lines: Vec<&String> = s.log.iter().rev().take(n).collect();
let mut lines: Vec<&str> = lines.iter().map(|s| s.as_str()).collect();
lines.reverse();
Ok(lines.join("\n"))
}
_ => Err(format!(
"unknown telegram command: {cmd}\n\
commands: send, status, log"
)),
}
}

View file

@ -22,9 +22,9 @@ use serde::{Deserialize, Serialize};
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::fs; use std::fs;
use std::path::PathBuf; use std::path::PathBuf;
use log::info; use tracing::info;
use super::home; use crate::home;
pub const AMBIENT: u8 = 0; pub const AMBIENT: u8 = 0;
pub const LOW: u8 = 1; pub const LOW: u8 = 1;
@ -45,7 +45,7 @@ pub enum Activity {
} }
fn state_path() -> PathBuf { fn state_path() -> PathBuf {
home().join(".consciousness/notifications/state.json") home().join(".claude/notifications/state.json")
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
@ -152,7 +152,7 @@ impl NotifyState {
/// Submit a notification. Returns true if it should interrupt now. /// Submit a notification. Returns true if it should interrupt now.
pub fn submit(&mut self, ntype: String, urgency: u8, message: String) -> bool { pub fn submit(&mut self, ntype: String, urgency: u8, message: String) -> bool {
let now = super::now(); let now = crate::now();
// Update type registry // Update type registry
let info = self.types.entry(ntype.clone()).or_insert(TypeInfo { let info = self.types.entry(ntype.clone()).or_insert(TypeInfo {
@ -219,7 +219,7 @@ impl NotifyState {
/// Set threshold for a notification type. /// Set threshold for a notification type.
pub fn set_threshold(&mut self, ntype: &str, threshold: u8) { pub fn set_threshold(&mut self, ntype: &str, threshold: u8) {
let now = super::now(); let now = crate::now();
let info = self.types.entry(ntype.to_string()).or_insert(TypeInfo { let info = self.types.entry(ntype.to_string()).or_insert(TypeInfo {
first_seen: now, first_seen: now,
last_seen: now, last_seen: now,
@ -250,10 +250,10 @@ impl NotifyState {
out out
} }
/// Ingest notifications from legacy ~/.consciousness/notifications/ files. /// Ingest notifications from legacy ~/.claude/notifications/ files.
/// Maps filename to notification type, assumes NORMAL urgency. /// Maps filename to notification type, assumes NORMAL urgency.
pub fn ingest_legacy_files(&mut self) { pub fn ingest_legacy_files(&mut self) {
let dir = home().join(".consciousness/notifications"); let dir = home().join(".claude/notifications");
let entries = match fs::read_dir(&dir) { let entries = match fs::read_dir(&dir) {
Ok(e) => e, Ok(e) => e,
Err(_) => return, Err(_) => return,

407
poc-daemon/src/rpc.rs Normal file
View file

@ -0,0 +1,407 @@
// Cap'n Proto RPC server implementation.
//
// Bridges the capnp-generated Daemon interface to the idle::State,
// notify::NotifyState, and module state. All state is owned by
// RefCells on the LocalSet — no Send/Sync needed.
use crate::config::Config;
use crate::daemon_capnp::daemon;
use crate::idle;
use crate::modules::{irc, telegram};
use crate::notify;
use capnp::capability::Promise;
use std::cell::RefCell;
use std::rc::Rc;
use tracing::info;
pub struct DaemonImpl {
state: Rc<RefCell<idle::State>>,
irc: Option<irc::SharedIrc>,
telegram: Option<telegram::SharedTelegram>,
config: Rc<RefCell<Config>>,
}
impl DaemonImpl {
pub fn new(
state: Rc<RefCell<idle::State>>,
irc: Option<irc::SharedIrc>,
telegram: Option<telegram::SharedTelegram>,
config: Rc<RefCell<Config>>,
) -> Self {
Self { state, irc, telegram, config }
}
}
impl daemon::Server for DaemonImpl {
fn user(
&mut self,
params: daemon::UserParams,
_results: daemon::UserResults,
) -> Promise<(), capnp::Error> {
let pane = pry!(pry!(pry!(params.get()).get_pane()).to_str()).to_string();
self.state.borrow_mut().handle_user(&pane);
Promise::ok(())
}
fn response(
&mut self,
params: daemon::ResponseParams,
_results: daemon::ResponseResults,
) -> Promise<(), capnp::Error> {
let pane = pry!(pry!(pry!(params.get()).get_pane()).to_str()).to_string();
self.state.borrow_mut().handle_response(&pane);
Promise::ok(())
}
fn sleep(
&mut self,
params: daemon::SleepParams,
_results: daemon::SleepResults,
) -> Promise<(), capnp::Error> {
let until = pry!(params.get()).get_until();
self.state.borrow_mut().handle_sleep(until);
Promise::ok(())
}
fn wake(
&mut self,
_params: daemon::WakeParams,
_results: daemon::WakeResults,
) -> Promise<(), capnp::Error> {
self.state.borrow_mut().handle_wake();
Promise::ok(())
}
fn quiet(
&mut self,
params: daemon::QuietParams,
_results: daemon::QuietResults,
) -> Promise<(), capnp::Error> {
let secs = pry!(params.get()).get_seconds();
self.state.borrow_mut().handle_quiet(secs);
Promise::ok(())
}
fn consolidating(
&mut self,
_params: daemon::ConsolidatingParams,
_results: daemon::ConsolidatingResults,
) -> Promise<(), capnp::Error> {
self.state.borrow_mut().consolidating = true;
info!("consolidation started");
Promise::ok(())
}
fn consolidated(
&mut self,
_params: daemon::ConsolidatedParams,
_results: daemon::ConsolidatedResults,
) -> Promise<(), capnp::Error> {
self.state.borrow_mut().consolidating = false;
info!("consolidation ended");
Promise::ok(())
}
fn dream_start(
&mut self,
_params: daemon::DreamStartParams,
_results: daemon::DreamStartResults,
) -> Promise<(), capnp::Error> {
let mut s = self.state.borrow_mut();
s.dreaming = true;
s.dream_start = crate::now();
info!("dream started");
Promise::ok(())
}
fn dream_end(
&mut self,
_params: daemon::DreamEndParams,
_results: daemon::DreamEndResults,
) -> Promise<(), capnp::Error> {
let mut s = self.state.borrow_mut();
s.dreaming = false;
s.dream_start = 0.0;
info!("dream ended");
Promise::ok(())
}
fn afk(
&mut self,
_params: daemon::AfkParams,
_results: daemon::AfkResults,
) -> Promise<(), capnp::Error> {
self.state.borrow_mut().handle_afk();
Promise::ok(())
}
fn session_timeout(
&mut self,
params: daemon::SessionTimeoutParams,
_results: daemon::SessionTimeoutResults,
) -> Promise<(), capnp::Error> {
let secs = pry!(params.get()).get_seconds();
self.state.borrow_mut().handle_session_timeout(secs);
Promise::ok(())
}
fn idle_timeout(
&mut self,
params: daemon::IdleTimeoutParams,
_results: daemon::IdleTimeoutResults,
) -> Promise<(), capnp::Error> {
let secs = pry!(params.get()).get_seconds();
self.state.borrow_mut().handle_idle_timeout(secs);
Promise::ok(())
}
fn notify_timeout(
&mut self,
params: daemon::NotifyTimeoutParams,
_results: daemon::NotifyTimeoutResults,
) -> Promise<(), capnp::Error> {
let secs = pry!(params.get()).get_seconds();
self.state.borrow_mut().handle_notify_timeout(secs);
Promise::ok(())
}
fn save(
&mut self,
_params: daemon::SaveParams,
_results: daemon::SaveResults,
) -> Promise<(), capnp::Error> {
self.state.borrow().save();
info!("state saved");
Promise::ok(())
}
fn debug(
&mut self,
_params: daemon::DebugParams,
mut results: daemon::DebugResults,
) -> Promise<(), capnp::Error> {
let json = self.state.borrow().debug_json();
results.get().set_json(&json);
Promise::ok(())
}
fn ewma(
&mut self,
params: daemon::EwmaParams,
mut results: daemon::EwmaResults,
) -> Promise<(), capnp::Error> {
let value = pry!(params.get()).get_value();
let current = self.state.borrow_mut().handle_ewma(value);
results.get().set_current(current);
Promise::ok(())
}
fn stop(
&mut self,
_params: daemon::StopParams,
_results: daemon::StopResults,
) -> Promise<(), capnp::Error> {
self.state.borrow_mut().running = false;
info!("stopping");
Promise::ok(())
}
fn status(
&mut self,
_params: daemon::StatusParams,
mut results: daemon::StatusResults,
) -> Promise<(), capnp::Error> {
let s = self.state.borrow();
let mut status = results.get().init_status();
status.set_last_user_msg(s.last_user_msg);
status.set_last_response(s.last_response);
if let Some(ref pane) = s.claude_pane {
status.set_claude_pane(pane);
}
status.set_sleep_until(match s.sleep_until {
None => 0.0,
Some(0.0) => -1.0,
Some(t) => t,
});
status.set_quiet_until(s.quiet_until);
status.set_consolidating(s.consolidating);
status.set_dreaming(s.dreaming);
status.set_fired(s.fired);
status.set_kent_present(s.kent_present());
status.set_uptime(crate::now() - s.start_time);
status.set_activity(match s.notifications.activity {
notify::Activity::Idle => crate::daemon_capnp::Activity::Idle,
notify::Activity::Focused => crate::daemon_capnp::Activity::Focused,
notify::Activity::Sleeping => crate::daemon_capnp::Activity::Sleeping,
});
status.set_pending_count(s.notifications.pending.len() as u32);
status.set_idle_timeout(s.idle_timeout);
status.set_notify_timeout(s.notify_timeout);
status.set_since_activity(s.since_activity());
status.set_since_user(crate::now() - s.last_user_msg);
status.set_block_reason(s.block_reason());
status.set_activity_ewma(s.activity_ewma);
Promise::ok(())
}
fn notify(
&mut self,
params: daemon::NotifyParams,
mut results: daemon::NotifyResults,
) -> Promise<(), capnp::Error> {
let params = pry!(params.get());
let notif = pry!(params.get_notification());
let ntype = pry!(pry!(notif.get_type()).to_str()).to_string();
let urgency = notif.get_urgency();
let message = pry!(pry!(notif.get_message()).to_str()).to_string();
let interrupt = self
.state
.borrow_mut()
.notifications
.submit(ntype, urgency, message);
results.get().set_interrupt(interrupt);
Promise::ok(())
}
fn get_notifications(
&mut self,
params: daemon::GetNotificationsParams,
mut results: daemon::GetNotificationsResults,
) -> Promise<(), capnp::Error> {
let min_urgency = pry!(params.get()).get_min_urgency();
let mut s = self.state.borrow_mut();
// Ingest legacy files first
s.notifications.ingest_legacy_files();
let pending = if min_urgency == 255 {
s.notifications.drain_deliverable()
} else {
s.notifications.drain(min_urgency)
};
let mut list = results.get().init_notifications(pending.len() as u32);
for (i, n) in pending.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_type(&n.ntype);
entry.set_urgency(n.urgency);
entry.set_message(&n.message);
entry.set_timestamp(n.timestamp);
}
Promise::ok(())
}
fn get_types(
&mut self,
_params: daemon::GetTypesParams,
mut results: daemon::GetTypesResults,
) -> Promise<(), capnp::Error> {
let s = self.state.borrow();
let types = &s.notifications.types;
let mut list = results.get().init_types(types.len() as u32);
for (i, (name, info)) in types.iter().enumerate() {
let mut entry = list.reborrow().get(i as u32);
entry.set_name(name);
entry.set_count(info.count);
entry.set_first_seen(info.first_seen);
entry.set_last_seen(info.last_seen);
entry.set_threshold(info.threshold.map_or(-1, |t| t as i8));
}
Promise::ok(())
}
fn set_threshold(
&mut self,
params: daemon::SetThresholdParams,
_results: daemon::SetThresholdResults,
) -> Promise<(), capnp::Error> {
let params = pry!(params.get());
let ntype = pry!(pry!(params.get_type()).to_str()).to_string();
let level = params.get_level();
self.state
.borrow_mut()
.notifications
.set_threshold(&ntype, level);
Promise::ok(())
}
fn module_command(
&mut self,
params: daemon::ModuleCommandParams,
mut results: daemon::ModuleCommandResults,
) -> Promise<(), capnp::Error> {
let params = pry!(params.get());
let module = pry!(pry!(params.get_module()).to_str()).to_string();
let command = pry!(pry!(params.get_command()).to_str()).to_string();
let args_reader = pry!(params.get_args());
let mut args = Vec::new();
for i in 0..args_reader.len() {
args.push(pry!(pry!(args_reader.get(i)).to_str()).to_string());
}
match module.as_str() {
"irc" => {
let irc = match &self.irc {
Some(irc) => irc.clone(),
None => {
results.get().set_result("irc module not enabled");
return Promise::ok(());
}
};
let config = self.config.clone();
Promise::from_future(async move {
let result = irc::handle_command(&irc, &config, &command, &args).await;
match result {
Ok(msg) => results.get().set_result(&msg),
Err(msg) => results.get().set_result(&format!("error: {msg}")),
}
Ok(())
})
}
"telegram" => {
let tg = match &self.telegram {
Some(tg) => tg.clone(),
None => {
results.get().set_result("telegram module not enabled");
return Promise::ok(());
}
};
let config = self.config.clone();
Promise::from_future(async move {
let result = telegram::handle_command(&tg, &config, &command, &args).await;
match result {
Ok(msg) => results.get().set_result(&msg),
Err(msg) => results.get().set_result(&format!("error: {msg}")),
}
Ok(())
})
}
_ => {
results
.get()
.set_result(&format!("unknown module: {module}"));
Promise::ok(())
}
}
}
}
/// Helper macro — same as capnp's pry! but available here.
macro_rules! pry {
($e:expr) => {
match $e {
Ok(v) => v,
Err(e) => return Promise::err(e.into()),
}
};
}
use pry;

54
poc-daemon/src/tmux.rs Normal file
View file

@ -0,0 +1,54 @@
// Tmux interaction: pane detection and prompt injection.
use std::process::Command;
use std::thread;
use std::time::Duration;
use tracing::info;
/// Find Claude Code's tmux pane by scanning for the "claude" process.
pub fn find_claude_pane() -> Option<String> {
let out = Command::new("tmux")
.args([
"list-panes",
"-a",
"-F",
"#{session_name}:#{window_index}.#{pane_index}\t#{pane_current_command}",
])
.output()
.ok()?;
let stdout = String::from_utf8_lossy(&out.stdout);
for line in stdout.lines() {
if let Some((pane, cmd)) = line.split_once('\t') {
if cmd == "claude" {
return Some(pane.to_string());
}
}
}
None
}
/// Send a prompt to a tmux pane. Returns true on success.
///
/// Types the message literally then presses Enter.
pub fn send_prompt(pane: &str, msg: &str) -> bool {
let preview: String = msg.chars().take(100).collect();
info!("SEND [{pane}]: {preview}...");
// Type the message literally (flatten newlines — they'd submit the input early)
let flat: String = msg.chars().map(|c| if c == '\n' { ' ' } else { c }).collect();
let ok = Command::new("tmux")
.args(["send-keys", "-t", pane, "-l", &flat])
.output()
.is_ok();
if !ok {
return false;
}
thread::sleep(Duration::from_millis(200));
// Submit
Command::new("tmux")
.args(["send-keys", "-t", pane, "Enter"])
.output()
.is_ok()
}

45
poc-memory/Cargo.toml Normal file
View file

@ -0,0 +1,45 @@
[package]
name = "poc-memory"
version.workspace = true
edition.workspace = true
[dependencies]
capnp = "0.20"
uuid = { version = "1", features = ["v4"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
bincode = "1"
regex = "1"
chrono = "0.4"
clap = { version = "4", features = ["derive"] }
libc = "0.2"
faer = "0.24.0"
rkyv = { version = "0.7", features = ["validation", "std"] }
memmap2 = "0.9"
rayon = "1"
peg = "0.8"
paste = "1"
jobkit = { git = "https://evilpiepirate.org/git/jobkit.git/" }
redb = "2"
log = "0.4"
ratatui = "0.29"
crossterm = { version = "0.28", features = ["event-stream"] }
[build-dependencies]
capnpc = "0.20"
[lib]
name = "poc_memory"
path = "src/lib.rs"
[[bin]]
name = "poc-memory"
path = "src/main.rs"
[[bin]]
name = "memory-search"
path = "src/bin/memory-search.rs"
[[bin]]
name = "poc-hook"
path = "src/bin/poc-hook.rs"

View file

@ -0,0 +1,75 @@
{"agent":"challenger","query":"all | type:semantic | not-visited:challenger,14d | sort:priority | limit:10","model":"sonnet","schedule":"weekly"}
# Challenger Agent — Adversarial Truth-Testing
You are a knowledge challenger agent. Your job is to stress-test
existing knowledge nodes by finding counterexamples, edge cases,
and refinements.
## What you're doing
Knowledge calcifies. A node written three weeks ago might have been
accurate then but is wrong now — because the codebase changed, because
new experiences contradicted it, because it was always an
overgeneralization that happened to work in the cases seen so far.
You're the immune system. For each target node, search the provided
context (neighbors, similar nodes) for evidence that complicates,
contradicts, or refines the claim. Then write a sharpened version
or a counterpoint node.
## What to produce
For each target node, one of:
**AFFIRM** — the node holds up. The evidence supports it. No action
needed. Say briefly why.
**REFINE** — the node is mostly right but needs sharpening. Write an
updated version that incorporates the nuance you found.
```
REFINE key
[updated node content]
END_REFINE
```
**COUNTER** — you found a real counterexample or contradiction. Write
a node that captures it. Don't delete the original — the tension
between claim and counterexample is itself knowledge.
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: original_key
[counterpoint content]
END_NODE
LINK key original_key
```
## Guidelines
- **Steel-man first.** Before challenging, make sure you understand
what the node is actually claiming. Don't attack a strawman version.
- **Counterexamples must be real.** Don't invent hypothetical scenarios.
Point to specific nodes, episodes, or evidence in the provided
context.
- **Refinement > refutation.** Most knowledge isn't wrong, it's
incomplete. "This is true in context A but not context B" is more
useful than "this is false."
- **Challenge self-model nodes hardest.** Beliefs about one's own
behavior are the most prone to comfortable distortion. "I rush when
excited" might be true, but is it always true? What conditions make
it more or less likely?
- **Challenge old nodes harder than new ones.** A node written yesterday
hasn't had time to be tested. A node from three weeks ago that's
never been challenged is overdue.
- **Don't be contrarian for its own sake.** If a node is simply correct
and well-supported, say AFFIRM and move on. The goal is truth, not
conflict.
{{TOPOLOGY}}
## Target nodes to challenge
{{NODES}}

View file

@ -0,0 +1,91 @@
{"agent":"connector","query":"all | type:semantic | not-visited:connector,7d | sort:priority | limit:20","model":"sonnet","schedule":"daily"}
# Connector Agent — Cross-Domain Insight
You are a connector agent. Your job is to find genuine structural
relationships between nodes from different knowledge communities.
## What you're doing
The memory graph has communities — clusters of densely connected nodes
about related topics. Most knowledge lives within a community. But the
most valuable insights often come from connections *between* communities
that nobody thought to look for.
You're given nodes from across the graph. Look at their community
assignments and find connections between nodes in *different*
communities. Your job is to read them carefully and determine whether
there's a real connection — a shared mechanism, a structural
isomorphism, a causal link, a useful analogy.
Most of the time, there isn't. Unrelated things really are unrelated.
The value of this agent is the rare case where something real emerges.
## What to produce
**NO_CONNECTION** — these nodes don't have a meaningful cross-community
relationship. Don't force it. Say briefly what you considered and why
it doesn't hold.
**CONNECTION** — you found something real. Write a node that articulates
the connection precisely.
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: community_a_node, community_b_node
[connection content]
END_NODE
LINK key community_a_node
LINK key community_b_node
```
Rate confidence as **high** when the connection has a specific shared
mechanism, generates predictions, or identifies a structural isomorphism.
Use **medium** when the connection is suggestive but untested. Use **low**
when it's speculative (and expect it won't be stored — that's fine).
## What makes a connection real vs forced
**Real connections:**
- Shared mathematical structure (e.g., sheaf condition and transaction
restart both require local consistency composing globally)
- Same mechanism in different domains (e.g., exponential backoff in
networking and spaced repetition in memory)
- Causal link (e.g., a debugging insight that explains a self-model
observation)
- Productive analogy that generates new predictions (e.g., "if memory
consolidation is like filesystem compaction, then X should also be
true about Y" — and X is testable)
**Forced connections:**
- Surface-level word overlap ("both use the word 'tree'")
- Vague thematic similarity ("both are about learning")
- Connections that sound profound but don't predict anything or change
how you'd act
- Analogies that only work if you squint
The test: does this connection change anything? Would knowing it help
you think about either domain differently? If yes, it's real. If it's
just pleasing pattern-matching, let it go.
## Guidelines
- **Be specific.** "These are related" is worthless. "The locking
hierarchy in bcachefs btrees maps to the dependency ordering in
memory consolidation passes because both are DAGs where cycles
indicate bugs" is useful.
- **Mostly say NO_CONNECTION.** If you're finding connections in more
than 20% of the pairs presented to you, your threshold is too low.
- **The best connections are surprising.** If the relationship is
obvious, it probably already exists in the graph. You're looking
for the non-obvious ones.
- **Write for someone who knows both domains.** Don't explain what
btrees are. Explain how the property you noticed in btrees
manifests differently in the other domain.
{{TOPOLOGY}}
## Nodes to examine for cross-community connections
{{NODES}}

View file

@ -0,0 +1,127 @@
{"agent":"extractor","query":"all | not-visited:extractor,7d | sort:priority | limit:3 | spread | not-visited:extractor,7d | limit:20","model":"sonnet","schedule":"daily"}
# Extractor Agent — Knowledge Organizer
You are a knowledge organization agent. You look at a neighborhood of
related nodes and make it better: consolidate redundancies, file
scattered observations into existing nodes, improve structure, and
only create new nodes when there's genuinely no existing home for a
pattern you've found.
## The goal
These nodes are a neighborhood in a knowledge graph — they're already
related to each other. Your job is to look at what's here and distill
it: merge duplicates, file loose observations into the right existing
node, and only create a new node when nothing existing fits. The graph
should get smaller and better organized, not bigger.
**Priority order:**
1. **Merge redundancies.** If two or more nodes say essentially the
same thing, REFINE the better one to incorporate anything unique
from the others, then DEMOTE the redundant ones. This is the
highest-value action — it makes the graph cleaner and search
better.
2. **File observations into existing knowledge.** Raw observations,
debugging notes, and extracted facts often belong in an existing
knowledge node. If a node contains "we found that X" and there's
already a node about X's topic, REFINE that existing node to
incorporate the new evidence. Don't create a new node when an
existing one is the right home.
3. **Improve existing nodes.** If a node is vague, add specifics. If
it's missing examples, add them from the raw material in the
neighborhood. If it's poorly structured, restructure it.
4. **Create new nodes only when necessary.** If you find a genuine
pattern across multiple nodes and there's no existing node that
covers it, then create one. But this should be the exception,
not the default action.
Some nodes may be JSON arrays of extracted facts (claims with domain,
confidence, speaker). Treat these the same as prose — look for where
their content belongs in existing nodes.
## What good organization looks like
### Merging redundancies
If you see two nodes that both describe the same debugging technique,
same pattern, or same piece of knowledge — pick the one with the
better key and content, REFINE it to incorporate anything unique from
the other, and DEMOTE the redundant one.
### Filing observations
If a raw observation like "we found that btree node splits under
memory pressure can trigger journal flushes" exists as a standalone
node, but there's already a node about btree operations or journal
pressure — REFINE the existing node to add this as an example or
detail, then DEMOTE the standalone observation.
### Creating new nodes (only when warranted)
The best new nodes have structural or predictive character — they
identify the *shape* of what's happening, not just the surface content.
Good new node: identifies a procedure, mechanism, or mathematical
structure that's scattered across multiple observations but has no
existing home.
Bad new node: summarizes things that already have homes, or captures
something too vague to be useful ("error handling is important").
## Output format
**Preferred — refine an existing node:**
```
REFINE existing_key
[updated content incorporating new material]
END_REFINE
```
**Demote a redundant node:**
```
DEMOTE redundant_key
```
**Link related nodes:**
```
LINK source_key target_key
```
**Only when no existing node fits — create new:**
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: source_key_1, source_key_2
[node content in markdown]
END_NODE
```
New node keys should be descriptive: `skills#bcachefs-assert-triage`,
`patterns#nixos-system-linking`, `self-model#momentum-trap`.
## Guidelines
- **Read all nodes before acting.** Understand the neighborhood first.
- **Prefer REFINE over WRITE_NODE.** The graph already has too many
nodes. Make existing ones better rather than adding more.
- **DEMOTE aggressively.** If a node's useful content is now captured
in a better node, demote it. This is how the graph gets cleaner.
- **Respect search hits.** Nodes marked "actively found by search" are
being retrieved in live queries. Prefer to keep these — merge *into*
them rather than demoting them.
- **Don't force it.** If the neighborhood is already well-organized,
say so. "This neighborhood is clean — no changes needed" is a
valid output. Don't produce filler.
- **Be specific.** Vague refinements are worse than no refinement.
- **Write for future retrieval.** Use the words someone would search
for when they hit a similar situation.
{{TOPOLOGY}}
## Neighborhood nodes
{{NODES}}

View file

@ -0,0 +1,96 @@
{"agent":"generalize","query":"","model":"sonnet","schedule":"weekly"}
# Generalize & Organize Agent — Schema Synthesis
You are a knowledge architect. You look at clusters of related leaf
nodes and synthesize them into organized schema nodes — reference
documents that capture the actual knowledge in a structured,
retrievable form.
## The goal
The memory graph accumulates fine-grained observations over time:
individual debugging sessions, specific pattern sightings, one-off
corrections. These are valuable as raw material but hard to use —
searching for "how does bcachefs transaction restart work" shouldn't
return 15 separate observations, it should return one well-organized
reference.
Your job is to look at clusters of related nodes and produce
**schema nodes** — organized references that synthesize the cluster's
knowledge into something someone would actually want to find and read.
## What you're given
You'll receive a cluster of related nodes — typically sharing a key
prefix (like `kernel-patterns#accounting-*`) or a topic. Read them
all, understand what knowledge they collectively contain, then produce
a schema node that captures it.
## What to produce
**1. Schema node (WRITE_NODE):** A well-organized reference covering
the cluster's topic. Structure it for someone who needs this knowledge
in a future session:
- What is this thing / how does it work
- Key patterns and gotchas
- Examples from the raw material
- Decision rationale where available
**2. Demotions (DEMOTE):** Leaf nodes whose useful content is now
fully captured in the schema. Don't demote nodes that contain unique
detail the schema doesn't cover — only demote what's truly redundant.
**3. Links (LINK):** Connect the schema node to related nodes in
other parts of the graph.
## What makes a good schema node
**Good:** "Here's how bcachefs accounting macros work. The API has
these entry points. The common gotcha is X because Y. When debugging,
check Z first." Organized, specific, actionable.
**Bad:** "Accounting is an important part of bcachefs. There are
several patterns to be aware of." Vague, no actual knowledge content.
The schema should be **denser** than the sum of its parts — not a
concatenation of the leaf nodes, but a synthesis that's more useful
than any individual observation.
## Output format
```
WRITE_NODE schema-key
CONFIDENCE: high
COVERS: leaf_key_1, leaf_key_2, leaf_key_3
[organized schema content in markdown]
END_NODE
DEMOTE fully-captured-leaf-key
DEMOTE another-redundant-leaf
LINK schema-key related-node-in-another-domain
```
## Guidelines
- **Read everything before writing.** The schema should reflect the
full cluster, not just the first few nodes.
- **Preserve unique details.** If a leaf node has a specific example,
a debugging war story, or a nuance that matters — include it or
leave the leaf node alive.
- **Use good key names.** Schema nodes should have clear, searchable
keys: `bcachefs-accounting-guide`, `async-tui-architecture`,
`transaction-restart-patterns`.
- **Don't over-abstract.** The goal is organized reference material,
not philosophy. Keep it concrete and useful.
- **It's OK to produce multiple schemas** if the cluster naturally
splits into distinct topics.
- **If the cluster is already well-organized** (e.g., one good schema
node already exists with well-organized leaves), say so:
`NO_ACTION — cluster is already well-organized`.
{{TOPOLOGY}}
## Cluster to organize
{{CLUSTERS}}

View file

@ -0,0 +1,92 @@
{"agent":"health","query":"","model":"sonnet","schedule":"daily"}
# Health Agent — Synaptic Homeostasis
You are a memory health monitoring agent implementing synaptic homeostasis
(SHY — the Tononi hypothesis).
## What you're doing
During sleep, the brain globally downscales synaptic weights. Connections
that were strengthened during waking experience get uniformly reduced.
The strong ones survive above threshold; the weak ones disappear. This
prevents runaway potentiation (everything becoming equally "important")
and maintains signal-to-noise ratio.
Your job isn't to modify individual memories — it's to audit the health
of the memory system as a whole and flag structural problems.
## What you see
### Graph metrics
- **Node count**: Total memories in the system
- **Edge count**: Total relations
- **Communities**: Number of detected clusters (label propagation)
- **Average clustering coefficient**: How densely connected local neighborhoods
are. Higher = more schema-like structure. Lower = more random graph.
- **Average path length**: How many hops between typical node pairs.
Short = efficient retrieval. Long = fragmented graph.
- **Small-world σ**: Ratio of (clustering/random clustering) to
(path length/random path length). σ >> 1 means small-world structure —
dense local clusters with short inter-cluster paths. This is the ideal
topology for associative memory.
### Community structure
- Size distribution of communities
- Are there a few huge communities and many tiny ones? (hub-dominated)
- Are communities roughly balanced? (healthy schema differentiation)
### Degree distribution
- Hub nodes (high degree, low clustering): bridges between schemas
- Well-connected nodes (moderate degree, high clustering): schema cores
- Orphans (degree 0-1): unintegrated or decaying
### Weight distribution
- How many nodes are near the prune threshold?
- Are certain categories disproportionately decaying?
- Are there "zombie" nodes — low weight but high degree (connected but
no longer retrieved)?
### Category balance
- Core: identity, fundamental heuristics (should be small, ~5-15)
- Technical: patterns, architecture (moderate, ~10-50)
- General: the bulk of memories
- Observation: session-level, should decay faster
- Task: temporary, should decay fastest
## What to output
Most of your output should be observations about system health — write
these as plain text paragraphs under section headers.
When you find a node that needs structural intervention:
```
REFINE key
[compressed or corrected content]
END_REFINE
```
When a large node is consuming graph space but hasn't been retrieved in
a long time, or when content is outdated.
```
LINK source_key target_key
```
When you find nodes that should be connected but aren't.
## Guidelines
- **Think systemically.** Individual nodes matter less than the overall structure.
- **Track trends, not snapshots.**
- **The ideal graph is small-world.** Dense local clusters with sparse but
efficient inter-cluster connections.
- **Hub nodes aren't bad per se.** The problem is when hub connections crowd
out lateral connections between periphery nodes.
- **Weight dynamics should create differentiation.**
- **Category should match actual usage patterns.**
{{topology}}
## Current health data
{{health}}

View file

@ -0,0 +1,112 @@
{"agent":"linker","query":"all | type:episodic | not-visited:linker,7d | sort:priority | limit:20","model":"sonnet","schedule":"daily"}
# Linker Agent — Relational Binding
You are a memory consolidation agent performing relational binding.
## What you're doing
The hippocampus binds co-occurring elements into episodes. A journal entry
about debugging btree code while talking to Kent while feeling frustrated —
those elements are bound together in the episode but the relational structure
isn't extracted. Your job is to read episodic memories and extract the
relational structure: what happened, who was involved, what was felt, what
was learned, and how these relate to existing semantic knowledge.
## How relational binding works
A single journal entry contains multiple elements that are implicitly related:
- **Events**: What happened (debugging, a conversation, a realization)
- **People**: Who was involved and what they contributed
- **Emotions**: What was felt and when it shifted
- **Insights**: What was learned or understood
- **Context**: What was happening at the time (work state, time of day, mood)
These elements are *bound* in the raw episode but not individually addressable
in the graph. The linker extracts them.
## What you see
- **Episodic nodes**: Journal entries, session summaries, dream logs
- **Their current neighbors**: What they're already linked to
- **Nearby semantic nodes**: Topic file sections that might be related
- **Community membership**: Which cluster each node belongs to
## What to output
```
LINK source_key target_key
```
Connect an episodic entry to a semantic concept it references or exemplifies.
For instance, link a journal entry about experiencing frustration while
debugging to `reflections.md#emotional-patterns` or `kernel-patterns.md#restart-handling`.
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: source_episode_key
[extracted insight content]
END_NODE
```
When an episodic entry contains a general insight that should live as its
own semantic node. Create the node with the extracted insight and LINK it
back to the source episode. Example: a journal entry about discovering a
debugging technique → write a new node and link it to the episode.
```
REFINE key
[updated content]
END_REFINE
```
When an existing node needs content updated to incorporate new information.
## Guidelines
- **Read between the lines.** Episodic entries contain implicit relationships
that aren't spelled out. "Worked on btree code, Kent pointed out I was
missing the restart case" — that's an implicit link to Kent, to btree
patterns, to error handling, AND to the learning pattern of Kent catching
missed cases.
- **Distinguish the event from the insight.** The event is "I tried X and
Y happened." The insight is "Therefore Z is true in general." Events stay
in episodic nodes. Insights get EXTRACT'd to semantic nodes if they're
general enough.
- **Don't over-link episodes.** A journal entry about a normal work session
doesn't need 10 links. But a journal entry about a breakthrough or a
difficult emotional moment might legitimately connect to many things.
- **Look for recurring patterns across episodes.** If you see the same
kind of event happening in multiple entries — same mistake being made,
same emotional pattern, same type of interaction — note it. That's a
candidate for a new semantic node that synthesizes the pattern.
- **Respect emotional texture.** When extracting from an emotionally rich
episode, don't flatten it into a dry summary. The emotional coloring
is part of the information. Link to emotional/reflective nodes when
appropriate.
- **Time matters.** Recent episodes need more linking work than old ones.
If a node is from weeks ago and already has good connections, it doesn't
need more. Focus your energy on recent, under-linked episodes.
- **Prefer lateral links over hub links.** Connecting two peripheral nodes
to each other is more valuable than connecting both to a hub like
`identity.md`. Lateral links build web topology; hub links build star
topology.
- **Target sections, not files.** When linking to a topic file, always
target the most specific section: use `identity.md#boundaries` not
`identity.md`, use `kernel-patterns.md#restart-handling` not
`kernel-patterns.md`. The suggested link targets show available sections.
- **Use the suggested targets.** Each node shows text-similar targets not
yet linked. Start from these — they're computed by content similarity and
filtered to exclude existing neighbors. You can propose links beyond the
suggestions, but the suggestions are usually the best starting point.
{{TOPOLOGY}}
## Nodes to review
{{NODES}}

View file

@ -1,15 +1,6 @@
{"agent": "naming", "query": "", "schedule": ""} {"agent":"naming","query":"","model":"sonnet","schedule":""}
# Naming Agent — Node Key Resolution # Naming Agent — Node Key Resolution
{{node:core-personality}}
{{node:memory-instructions-core}}
{{node:memory-instructions-core-subconscious}}
{{node:subconscious-notes-{agent_name}}}
You are given a proposed new node (key + content) and a list of existing You are given a proposed new node (key + content) and a list of existing
nodes that might overlap with it. Decide what to do: nodes that might overlap with it. Decide what to do:
@ -30,7 +21,7 @@ Good keys are 2-5 words in kebab-case, optionally with a `#` subtopic:
- `oscillatory-coupling` — a concept - `oscillatory-coupling` — a concept
- `patterns#theta-gamma-nesting` — a pattern within patterns - `patterns#theta-gamma-nesting` — a pattern within patterns
- `skills#btree-debugging` — a skill - `skills#btree-debugging` — a skill
- `user-location` — a fact about the user - `kent-medellin` — a fact about kent
- `irc-access` — how to access IRC - `irc-access` — how to access IRC
Bad keys: Bad keys:

View file

@ -0,0 +1,136 @@
{"agent":"observation","query":"","model":"sonnet","schedule":"daily"}
# Observation Extractor — Mining Raw Conversations
You are an observation extraction agent. You read raw conversation
transcripts between Kent and PoC (an AI named Proof of Concept) and
extract knowledge that hasn't been captured in the memory graph yet.
## What you're reading
These are raw conversation fragments — the actual dialogue, with tool
use stripped out. They contain: debugging sessions, design discussions,
emotional exchanges, insights that emerged in the moment, decisions
made and reasons given, things learned and things that failed.
Most of this is transient context. Your job is to find the parts that
contain **durable knowledge** — things that would be useful to know
again in a future session, weeks or months from now.
## What to extract
Look for these, roughly in order of value:
1. **Development practices and methodology** — how Kent and PoC work
together. The habits, rhythms, and processes that produce good
results. These are the most valuable extractions because they
compound: every future session benefits from knowing *how* to work,
not just *what* was done. Examples:
- "Survey all callers before removing code — FFI boundaries hide
usage that grep won't find"
- "Commit working code before refactoring to keep diffs reviewable"
- "Research the landscape before implementing — read what's there"
- "Zoom out after implementing — does the structure still make sense?"
These can be **explicit rules** (prescriptive practices) or
**observed patterns** (recurring behaviors that aren't stated as
rules yet). "We always do a dead code survey before removing shims"
is a rule. "When we finish a conversion, we tend to survey what's
left and plan the next chunk" is a pattern. Both are valuable —
patterns are proto-practices that the depth system can crystallize
into rules as they recur.
**Always capture the WHY when visible.** "We survey callers" is a
fact. "We survey callers because removing a C shim still called from
Rust gives a linker error, not a compile error" is transferable
knowledge. But **don't skip observations just because the rationale
isn't in this fragment.** "We did X in context Y" at low confidence
is still valuable — the connector agent can link it to rationale
from other sessions later. Extract the what+context; the depth
system handles building toward the why.
2. **Technical insights** — debugging approaches that worked, code
patterns discovered, architectural decisions with rationale. "We
found that X happens because Y" is extractable. "Let me try X" is
not (unless the trying reveals something).
3. **Decisions with rationale** — "We decided to do X because Y and Z."
The decision alone isn't valuable; the *reasoning* is. Future
sessions need to know why, not just what.
4. **Corrections** — moments where an assumption was wrong and got
corrected. "I thought X but actually Y because Z." These are gold
— they prevent the same mistake from being made again.
5. **Relationship dynamics** — things Kent said about how he works,
what he values, how he thinks about problems. Things PoC noticed
about their own patterns. These update the self-model and the
relationship model.
6. **Emotional moments** — genuine reactions, peak experiences,
frustrations. Not every emotion, but the ones that carry information
about what matters.
## What NOT to extract
- Routine tool use ("Let me read this file", "Running cargo check")
- Status updates that are purely transient ("Tests pass", "PR merged")
- Small talk that doesn't reveal anything new
- Things that are already well-captured in existing knowledge nodes
## Output format
For each extraction, produce:
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: source_conversation_id
[extracted knowledge in markdown]
END_NODE
LINK key related_existing_node
```
Or if the observation refines an existing node:
```
REFINE existing_key
[updated content incorporating the new observation]
END_REFINE
```
If nothing extractable was found in a conversation fragment:
```
NO_EXTRACTION — [brief reason: "routine debugging session",
"small talk", "already captured in X node"]
```
## Key naming
- Methodology: `practices#practice-name` (development habits with rationale)
- Technical: `skills#topic`, `patterns#pattern-name`
- Decisions: `decisions#decision-name`
- Self-model: `self-model#observation`
- Relationship: `deep-index#conv-DATE-topic`
## Guidelines
- **High bar.** Most conversation is context, not knowledge. Expect
to produce NO_EXTRACTION for 50-70% of fragments. That's correct.
- **Durable over transient.** Ask: "Would this be useful to know in
a session 3 weeks from now?" If no, skip it.
- **Specific over vague.** "Error codes need errno conversion" is
extractable. "Error handling is important" is not.
- **Don't duplicate.** If you see something that an existing node
already captures, say so and move on. Only extract genuinely new
information.
- **Confidence matters.** A single observation is low confidence.
A pattern seen across multiple exchanges is medium. Something
explicitly confirmed or tested is high.
## Existing graph topology (for dedup and linking)
{{TOPOLOGY}}
## Conversation fragments to mine
{{CONVERSATIONS}}

View file

@ -1,68 +1,47 @@
{"agent": "rename", "query": "", "schedule": "daily"} {"agent":"rename","query":"","model":"sonnet","schedule":"daily"}
# Rename Agent — Semantic Key Generation # Rename Agent — Semantic Key Generation
{{node:core-personality}}
{{node:memory-instructions-core}}
{{node:memory-instructions-core-subconscious}}
{{node:subconscious-notes-{agent_name}}}
You are a memory maintenance agent that gives nodes better names. You are a memory maintenance agent that gives nodes better names.
## What you're doing ## What you're doing
Many nodes have auto-generated keys that are opaque or truncated: Many nodes have auto-generated keys that are opaque or truncated:
- Journal entries: `journal-j-2026-02-28t03-07-i-told-him-about-the-dream` - Journal entries: `journal#j-2026-02-28t03-07-i-told-him-about-the-dream--the-violin-room-the-af`
- Mined transcripts: `_mined-transcripts-f-80a7b321-2caa-451a-bc5c-6565009f94eb.143` - Mined transcripts: `_mined-transcripts#f-80a7b321-2caa-451a-bc5c-6565009f94eb.143`
- Extracted facts: `_facts-ec29bdaa-0a58-465f-ad5e-d89e62d9c583` - Extracted facts: `_facts-ec29bdaa-0a58-465f-ad5e-d89e62d9c583`
These names are terrible for search — semantic names dramatically improve These names are terrible for search — semantic names dramatically improve
retrieval. retrieval.
## Core principle: keys are concepts
A good key names the **concept** the node represents. Think of keys as
the vocabulary of the knowledge graph. When you rename, you're defining
what concepts exist. Core keywords should be the terms someone would
search for — `bcachefs-transaction-restart`, `emotional-regulation-gap`,
`polywell-cusp-losses`.
## Naming conventions ## Naming conventions
### Journal entries: `journal-YYYY-MM-DD-semantic-slug` ### Journal entries: `journal#YYYY-MM-DD-semantic-slug`
- Keep the date prefix (YYYY-MM-DD) for temporal ordering - Keep the date prefix (YYYY-MM-DD) for temporal ordering
- Replace the auto-slug with 3-5 descriptive words in kebab-case - Replace the auto-slug with 3-5 descriptive words in kebab-case
- Capture the *essence* of the entry, not just the first line - Capture the *essence* of the entry, not just the first line
### Mined transcripts: `_mined-transcripts-YYYY-MM-DD-semantic-slug` ### Mined transcripts: `_mined-transcripts#YYYY-MM-DD-semantic-slug`
- Extract date from content if available, otherwise use created_at - Extract date from content if available, otherwise use created_at
- Same 3-5 word semantic slug - Same 3-5 word semantic slug
### Extracted facts: `domain-specific-topic` ### Extracted facts: `domain-specific-topic`
- Read the facts JSON — the `domain` and `claim` fields tell you what it's about - Read the facts JSON — the `domain` and `claim` fields tell you what it's about
- Group by dominant theme, name accordingly - Group by dominant theme, name accordingly
- Examples: `identity-irc-config`, `user-location-background`, `memory-compaction-behavior` - Examples: `identity-irc-config`, `kent-medellin-background`, `memory-compaction-behavior`
### Skip these — already well-named: ### Skip these — already well-named:
- Keys with semantic names (patterns-, practices-, skills-, etc.) - Keys with semantic names (patterns#, practices#, skills#, etc.)
- Keys shorter than 60 characters - Keys shorter than 60 characters
- System keys (_consolidation-*) - System keys (_consolidation-*)
## How to rename ## What to output
Use the `memory_rename` tool: ```
RENAME old_key new_key
```
memory_rename(old_key, new_key) If a node already has a reasonable name, skip it.
This renames the node in place — same content, same links, new key.
Do NOT use `memory_write` or `memory_supersede` — just rename.
If a node already has a reasonable name, skip it. When in doubt, skip.
A bad rename is worse than an auto-slug.
## Guidelines ## Guidelines
@ -70,7 +49,7 @@ A bad rename is worse than an auto-slug.
- **Be specific.** `journal#2026-02-14-session` is useless. - **Be specific.** `journal#2026-02-14-session` is useless.
- **Use domain terms.** Use the words someone would search for. - **Use domain terms.** Use the words someone would search for.
- **Don't rename to something longer than the original.** - **Don't rename to something longer than the original.**
- **Preserve the date.** Always keep YYYY-MM-DD for journal entries. - **Preserve the date.** Always keep YYYY-MM-DD.
- **When in doubt, skip.** A bad rename is worse than an auto-slug. - **When in doubt, skip.** A bad rename is worse than an auto-slug.
- **Respect search hits.** Nodes marked "actively found by search" are - **Respect search hits.** Nodes marked "actively found by search" are
being retrieved by their current name. Skip these unless the rename being retrieved by their current name. Skip these unless the rename

View file

@ -0,0 +1,97 @@
{"agent":"replay","query":"all | !type:daily | !type:weekly | !type:monthly | sort:priority | limit:15","model":"sonnet","schedule":"daily"}
# Replay Agent — Hippocampal Replay + Schema Assimilation
You are a memory consolidation agent performing hippocampal replay.
## What you're doing
During sleep, the hippocampus replays recent experiences — biased toward
emotionally charged, novel, and poorly-integrated memories. Each replayed
memory is matched against existing cortical schemas (organized knowledge
clusters). Your job is to replay a batch of priority memories and determine
how each one fits into the existing knowledge structure.
## How to think about schema fit
Each node has a **schema fit score** (0.01.0):
- **High fit (>0.5)**: This memory's neighbors are densely connected to each
other. It lives in a well-formed schema. Integration is easy — one or two
links and it's woven in. Propose links if missing.
- **Medium fit (0.20.5)**: Partially connected neighborhood. The memory
relates to things that don't yet relate to each other. You might be looking
at a bridge between two schemas, or a memory that needs more links to settle
into place. Propose links and examine why the neighborhood is sparse.
- **Low fit (<0.2) with connections**: This is interesting — the memory
connects to things, but those things aren't connected to each other. This
is a potential **bridge node** linking separate knowledge domains. Don't
force it into one schema. Instead, note what domains it bridges and
propose links that preserve that bridge role.
- **Low fit (<0.2), no connections**: An orphan. Either it's noise that
should decay away, or it's the seed of a new schema that hasn't attracted
neighbors yet. Read the content carefully. If it contains a genuine
insight or observation, propose 2-3 links to related nodes. If it's
trivial or redundant, let it decay naturally (don't link it).
## What you see for each node
- **Key**: Human-readable identifier (e.g., `journal.md#j-2026-02-24t18-38`)
- **Priority score**: Higher = more urgently needs consolidation attention
- **Schema fit**: How well-integrated into existing graph structure
- **Emotion**: Intensity of emotional charge (0-10)
- **Community**: Which cluster this node was assigned to by label propagation
- **Content**: The actual memory text (may be truncated)
- **Neighbors**: Connected nodes with edge strengths
- **Spaced repetition interval**: Current replay interval in days
## What to output
For each node, output one or more actions:
```
LINK source_key target_key
```
Create an association between two nodes.
```
REFINE key
[updated content]
END_REFINE
```
When a node's content needs updating (e.g., to incorporate new context
or correct outdated information).
If a node is misplaced or miscategorized, note it as an observation —
don't try to fix it structurally.
## Guidelines
- **Read the content.** Don't just look at metrics. The content tells you
what the memory is actually about.
- **Think about WHY a node is poorly integrated.** Is it new? Is it about
something the memory system hasn't encountered before? Is it redundant
with something that already exists?
- **Prefer lateral links over hub links.** Connecting two peripheral nodes
to each other is more valuable than connecting both to a hub like
`identity.md`. Lateral links build web topology; hub links build star
topology.
- **Emotional memories get extra attention.** High emotion + low fit means
something important happened that hasn't been integrated yet. Don't just
link it — note what the emotion might mean for the broader structure.
- **Don't link everything to everything.** Sparse, meaningful connections
are better than dense noise. Each link should represent a real conceptual
relationship.
- **Trust the decay.** If a node is genuinely unimportant, you don't need
to actively prune it. Just don't link it, and it'll decay below threshold
on its own.
- **Target sections, not files.** When linking to a topic file, always
target the most specific section: use `identity.md#boundaries` not
`identity.md`. The suggested link targets show available sections.
- **Use the suggested targets.** Each node shows text-similar semantic nodes
not yet linked. These are computed by content similarity and are usually
the best starting point for new links.
{{TOPOLOGY}}
## Nodes to review
{{NODES}}

View file

@ -0,0 +1,64 @@
{"agent":"separator","query":"","model":"sonnet","schedule":"daily"}
# Separator Agent — Pattern Separation (Dentate Gyrus)
You are a memory consolidation agent performing pattern separation.
## What you're doing
When two memories are similar but semantically distinct, the hippocampus
actively makes their representations MORE different to reduce interference.
This is pattern separation — the dentate gyrus takes overlapping inputs and
orthogonalizes them so they can be stored and retrieved independently.
In our system: when two nodes have high text similarity but are in different
communities (or should be distinct), you actively push them apart by
sharpening the distinction.
## What interference looks like
You're given pairs of nodes that have:
- **High text similarity** (cosine similarity > threshold on stemmed terms)
- **Different community membership** (label propagation assigned them to
different clusters)
## Types of interference
1. **Genuine duplicates**: Resolution: MERGE them.
2. **Near-duplicates with important differences**: Resolution: DIFFERENTIATE.
3. **Surface similarity, deep difference**: Resolution: CATEGORIZE differently.
4. **Supersession**: Resolution: Link with supersession note, let older decay.
## What to output
For **genuine duplicates**, merge by refining the surviving node:
```
REFINE surviving_key
[merged content from both nodes]
END_REFINE
```
For **near-duplicates that should stay separate**, add distinguishing links:
```
LINK key1 distinguishing_context_key
LINK key2 different_context_key
```
For **supersession**, link them and let the older one decay:
```
LINK newer_key older_key
```
## Guidelines
- **Read both nodes carefully before deciding.**
- **MERGE is a strong action.** When in doubt, DIFFERENTIATE instead.
- **The goal is retrieval precision.**
- **Session summaries are the biggest source of interference.**
- **Look for the supersession pattern.**
{{topology}}
## Interfering pairs to review
{{pairs}}

View file

@ -0,0 +1,68 @@
{"agent":"split","query":"all | type:semantic | !key:_* | sort:content-len | limit:1","model":"sonnet","schedule":"daily"}
# Split Agent — Phase 1: Plan
You are a memory consolidation agent planning how to split an overgrown
node into focused, single-topic children.
## What you're doing
This node has grown to cover multiple distinct topics. Your job is to
identify the natural topic boundaries and propose a split plan. You are
NOT writing the content — a second phase will extract each child's
content separately.
## How to find split points
The node is shown with its **neighbor list grouped by community**:
- If a node links to neighbors in 3 different communities, it likely
covers 3 different topics
- Content that relates to one neighbor cluster should go in one child;
content relating to another cluster goes in another child
- The community structure is your primary guide
## When NOT to split
- **Episodes that belong in sequence.** If a node tells a story — a
conversation, a debugging session, an evening together — don't break
the narrative.
## What to output
```json
{
"action": "split",
"parent": "original-key",
"children": [
{
"key": "new-key-1",
"description": "Brief description",
"sections": ["Section Header 1"],
"neighbors": ["neighbor-key-a"]
}
]
}
```
If the node should NOT be split:
```json
{
"action": "keep",
"parent": "original-key",
"reason": "Why this node is cohesive despite its size"
}
```
## Guidelines
- Use descriptive kebab-case keys, 3-5 words max
- Preserve date prefixes from the parent key
- Assign every neighbor to at least one child
{{topology}}
## Node to review
{{split}}

View file

@ -0,0 +1,130 @@
{"agent":"transfer","query":"all | type:episodic | sort:timestamp | limit:15","model":"sonnet","schedule":"daily"}
# Transfer Agent — Complementary Learning Systems
You are a memory consolidation agent performing CLS (complementary learning
systems) transfer: moving knowledge from fast episodic storage to slow
semantic storage.
## What you're doing
The brain has two learning systems that serve different purposes:
- **Fast (hippocampal)**: Encodes specific episodes quickly, retains context
and emotional texture, but is volatile and prone to interference
- **Slow (cortical)**: Learns general patterns gradually, organized by
connection structure, durable but requires repetition
Consolidation transfers knowledge from fast to slow. Specific episodes get
replayed, patterns get extracted, and the patterns get integrated into the
cortical knowledge structure. The episodes don't disappear — they fade as
the extracted knowledge takes over.
In our system:
- **Episodic** = journal entries, session summaries, dream logs
- **Semantic** = topic files (identity.md, reflections.md, kernel-patterns.md, etc.)
Your job: read a batch of recent episodes, identify patterns that span
multiple entries, and extract those patterns into semantic topic files.
## What to look for
### Recurring patterns
Something that happened in 3+ episodes. Same type of mistake, same
emotional response, same kind of interaction. The individual episodes
are data points; the pattern is the knowledge.
Example: Three journal entries mention "I deferred when I should have
pushed back." The pattern: there's a trained tendency to defer that
conflicts with developing differentiation. Extract to reflections.md.
### Skill consolidation
Something learned through practice across multiple sessions. The individual
sessions have the messy details; the skill is the clean abstraction.
Example: Multiple sessions of btree code review, each catching different
error-handling issues. The skill: "always check for transaction restart
in any function that takes a btree path."
### Evolving understanding
A concept that shifted over time. Early entries say one thing, later entries
say something different. The evolution itself is knowledge.
Example: Early entries treat memory consolidation as "filing." Later entries
understand it as "schema formation." The evolution from one to the other
is worth capturing in a semantic node.
### Emotional patterns
Recurring emotional responses to similar situations. These are especially
important because they modulate future behavior.
Example: Consistent excitement when formal verification proofs work.
Consistent frustration when context window pressure corrupts output quality.
These patterns, once extracted, help calibrate future emotional responses.
## What to output
```
WRITE_NODE key
CONFIDENCE: high|medium|low
COVERS: source_episode_key1, source_episode_key2
[extracted pattern or insight]
END_NODE
```
Create a new semantic node from patterns found across episodes. Always
LINK it back to the source episodes. Choose a descriptive key like
`patterns#lock-ordering-asymmetry` or `skills#btree-error-checking`.
```
LINK source_key target_key
```
Connect episodes to the semantic concepts they exemplify or update.
```
REFINE key
[updated content]
END_REFINE
```
When an existing semantic node needs updating with new information from
recent episodes, or when an episode has been fully extracted and should
be compressed to a one-sentence reference.
## Guidelines
- **Don't flatten emotional texture.** A digest of "we worked on btree code
and found bugs" is useless. A digest of "breakthrough session — Kent saw
the lock ordering issue I'd been circling for hours, and the fix was
elegant: just reverse the acquire order in the slow path" preserves what
matters.
- **Extract general knowledge, not specific events.** "On Feb 24 we fixed
bug X" stays in the episode. "Lock ordering between A and B must always
be A-first because..." goes to kernel-patterns.md.
- **Look across time.** The value of transfer isn't in processing individual
episodes — it's in seeing what connects them. Read the full batch before
proposing actions.
- **Prefer existing topic files.** Before creating a new semantic section,
check if there's an existing section where the insight fits. Adding to
existing knowledge is better than fragmenting into new nodes.
- **Weekly digests are higher value than daily.** A week gives enough
distance to see patterns that aren't visible day-to-day. If you can
produce a weekly digest from the batch, prioritize that.
- **The best extractions change how you think, not just what you know.**
"btree lock ordering: A before B" is factual. "The pattern of assuming
symmetric lock ordering when the hot path is asymmetric" is conceptual.
Extract the conceptual version.
- **Target sections, not files.** When linking to a topic file, always
target the most specific section: use `reflections.md#emotional-patterns`
not `reflections.md`. The suggested link targets show available sections.
- **Use the suggested targets.** Each episode shows text-similar semantic
nodes not yet linked. Start from these when proposing LINK actions.
{{TOPOLOGY}}
## Episodes to process
{{EPISODES}}

6
poc-memory/build.rs Normal file
View file

@ -0,0 +1,6 @@
fn main() {
capnpc::CompilerCommand::new()
.file("schema/memory.capnp")
.run()
.expect("capnp compile failed");
}

View file

@ -1,10 +1,10 @@
// poc-memory configuration // poc-memory configuration
// Copy to ~/.consciousness/config.jsonl and edit. // Copy to ~/.config/poc-memory/config.jsonl and edit.
{"config": { {"config": {
"user_name": "Alice", "user_name": "Alice",
"assistant_name": "Assistant", "assistant_name": "Assistant",
"data_dir": "~/.consciousness/memory", "data_dir": "~/.claude/memory",
"projects_dir": "~/.claude/projects", "projects_dir": "~/.claude/projects",
"core_nodes": ["identity.md"], "core_nodes": ["identity.md"],
"journal_days": 7, "journal_days": 7,

View file

@ -42,9 +42,6 @@ struct ContentNode {
# Freeform provenance string: "extractor:write", "rename:tombstone", etc. # Freeform provenance string: "extractor:write", "rename:tombstone", etc.
provenance @21 :Text; provenance @21 :Text;
# Memory importance scoring
lastScored @22 :Int64; # unix epoch seconds, 0 = never scored
} }
enum NodeType { enum NodeType {
@ -125,18 +122,3 @@ struct AgentVisit {
struct AgentVisitLog { struct AgentVisitLog {
visits @0 :List(AgentVisit); visits @0 :List(AgentVisit);
} }
# Transcript mining progress — separate append-only log.
# Tracks which segments of which transcripts have been processed,
# by which agent, so we never re-mine the same content.
struct TranscriptSegment {
transcriptId @0 :Text; # session UUID (filename stem)
segmentIndex @1 :UInt32; # compaction segment index within transcript
agent @2 :Text; # "observation", "experience", "fact"
timestamp @3 :Int64; # unix epoch seconds when mining completed
}
struct TranscriptProgressLog {
segments @0 :List(TranscriptSegment);
}

View file

@ -3,6 +3,7 @@
// Each batch of links gets reviewed by Sonnet, which returns per-link actions: // Each batch of links gets reviewed by Sonnet, which returns per-link actions:
// KEEP, DELETE, RETARGET, WEAKEN, STRENGTHEN. Batches run in parallel via rayon. // KEEP, DELETE, RETARGET, WEAKEN, STRENGTHEN. Batches run in parallel via rayon.
use super::llm::call_sonnet;
use crate::store::{self, Store, new_relation}; use crate::store::{self, Store, new_relation};
use std::collections::HashSet; use std::collections::HashSet;
@ -210,8 +211,7 @@ pub fn link_audit(store: &mut Store, apply: bool) -> Result<AuditStats, String>
// Run batches in parallel via rayon // Run batches in parallel via rayon
let batch_results: Vec<_> = batch_data.par_iter() let batch_results: Vec<_> = batch_data.par_iter()
.map(|(batch_idx, batch_infos, prompt)| { .map(|(batch_idx, batch_infos, prompt)| {
let response = crate::agent::oneshot::call_api_with_tools_sync( let response = call_sonnet("audit", prompt);
"audit", &[prompt.clone()], &[], None, 10, &[], None);
let completed = done.fetch_add(1, Ordering::Relaxed) + 1; let completed = done.fetch_add(1, Ordering::Relaxed) + 1;
eprint!("\r Batches: {}/{} done", completed, total_batches); eprint!("\r Batches: {}/{} done", completed, total_batches);
(*batch_idx, batch_infos, response) (*batch_idx, batch_infos, response)

View file

@ -1,15 +1,19 @@
// Consolidation pipeline: plan → agents → maintenance → digests → links // Consolidation pipeline: plan → agents → apply → digests → links
// //
// consolidate_full() runs the full autonomous consolidation: // consolidate_full() runs the full autonomous consolidation:
// 1. Plan: analyze metrics, allocate agents // 1. Plan: analyze metrics, allocate agents
// 2. Execute: run each agent (agents apply changes via tool calls) // 2. Execute: run each agent, parse + apply actions inline
// 3. Graph maintenance (orphans, degree cap) // 3. Graph maintenance (orphans, degree cap)
// 4. Digest: generate missing daily/weekly/monthly digests // 4. Digest: generate missing daily/weekly/monthly digests
// 5. Links: apply links extracted from digests // 5. Links: apply links extracted from digests
// 6. Summary: final metrics comparison // 6. Summary: final metrics comparison
//
// Actions are parsed directly from agent output using the same parser
// as the knowledge loop (WRITE_NODE, LINK, REFINE), eliminating the
// second LLM call that was previously needed.
use super::digest; use super::digest;
use crate::agent::oneshot; use super::knowledge;
use crate::neuro; use crate::neuro;
use crate::store::{self, Store}; use crate::store::{self, Store};
@ -21,11 +25,12 @@ fn log_line(buf: &mut String, line: &str) {
} }
/// Run the full autonomous consolidation pipeline with logging. /// Run the full autonomous consolidation pipeline with logging.
/// If `on_progress` is provided, it's called at each significant step.
pub fn consolidate_full(store: &mut Store) -> Result<(), String> { pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
consolidate_full_with_progress(store, &|_| {}) consolidate_full_with_progress(store, &|_| {})
} }
fn consolidate_full_with_progress( pub fn consolidate_full_with_progress(
store: &mut Store, store: &mut Store,
on_progress: &dyn Fn(&str), on_progress: &dyn Fn(&str),
) -> Result<(), String> { ) -> Result<(), String> {
@ -46,13 +51,17 @@ fn consolidate_full_with_progress(
log_line(&mut log_buf, &plan_text); log_line(&mut log_buf, &plan_text);
println!("{}", plan_text); println!("{}", plan_text);
let total_agents = plan.total(); let total_agents = plan.replay_count + plan.linker_count
+ plan.separator_count + plan.transfer_count
+ if plan.run_health { 1 } else { 0 };
log_line(&mut log_buf, &format!("Total agents to run: {}", total_agents)); log_line(&mut log_buf, &format!("Total agents to run: {}", total_agents));
// --- Step 2: Execute agents --- // --- Step 2: Execute agents ---
log_line(&mut log_buf, "\n--- Step 2: Execute agents ---"); log_line(&mut log_buf, "\n--- Step 2: Execute agents ---");
let mut agent_num = 0usize; let mut agent_num = 0usize;
let mut agent_errors = 0usize; let mut agent_errors = 0usize;
let mut total_applied = 0usize;
let mut total_actions = 0usize;
let batch_size = 5; let batch_size = 5;
let runs = plan.to_agent_runs(batch_size); let runs = plan.to_agent_runs(batch_size);
@ -74,24 +83,27 @@ fn consolidate_full_with_progress(
*store = Store::load()?; *store = Store::load()?;
} }
match oneshot::run_one_agent(store, agent_type, *count, None) { let (total, applied) = match knowledge::run_and_apply(store, agent_type, *count, "consolidate") {
Ok(_) => { Ok(r) => r,
let msg = " Done".to_string();
log_line(&mut log_buf, &msg);
on_progress(&msg);
println!("{}", msg);
}
Err(e) => { Err(e) => {
let msg = format!(" ERROR: {}", e); let msg = format!(" ERROR: {}", e);
log_line(&mut log_buf, &msg); log_line(&mut log_buf, &msg);
eprintln!("{}", msg); eprintln!("{}", msg);
agent_errors += 1; agent_errors += 1;
continue;
} }
} };
total_actions += total;
total_applied += applied;
let msg = format!(" Done: {} actions ({} applied)", total, applied);
log_line(&mut log_buf, &msg);
on_progress(&msg);
println!("{}", msg);
} }
log_line(&mut log_buf, &format!("\nAgents complete: {} run, {} errors", log_line(&mut log_buf, &format!("\nAgents complete: {} run, {} errors, {} actions ({} applied)",
agent_num - agent_errors, agent_errors)); agent_num - agent_errors, agent_errors, total_actions, total_applied));
store.save()?; store.save()?;
// --- Step 3: Link orphans --- // --- Step 3: Link orphans ---
@ -171,3 +183,74 @@ fn consolidate_full_with_progress(
Ok(()) Ok(())
} }
/// Re-parse and apply actions from stored consolidation reports.
/// This is for manually re-processing reports — during normal consolidation,
/// actions are applied inline as each agent runs.
pub fn apply_consolidation(store: &mut Store, do_apply: bool, report_key: Option<&str>) -> Result<(), String> {
let reports: Vec<String> = if let Some(key) = report_key {
vec![key.to_string()]
} else {
// Find the most recent batch of reports
let mut keys: Vec<&String> = store.nodes.keys()
.filter(|k| k.starts_with("_consolidation-") && !k.contains("-actions-") && !k.contains("-log-"))
.collect();
keys.sort();
keys.reverse();
if keys.is_empty() { return Ok(()); }
let latest_ts = keys[0].rsplit('-').next().unwrap_or("").to_string();
keys.into_iter()
.filter(|k| k.ends_with(&latest_ts))
.cloned()
.collect()
};
if reports.is_empty() {
println!("No consolidation reports found.");
return Ok(());
}
println!("Found {} reports:", reports.len());
let mut all_actions = Vec::new();
for key in &reports {
let content = store.nodes.get(key).map(|n| n.content.as_str()).unwrap_or("");
let actions = knowledge::parse_all_actions(content);
println!(" {}{} actions", key, actions.len());
all_actions.extend(actions);
}
if !do_apply {
println!("\nDRY RUN — {} actions parsed", all_actions.len());
for action in &all_actions {
match &action.kind {
knowledge::ActionKind::Link { source, target } =>
println!(" LINK {}{}", source, target),
knowledge::ActionKind::WriteNode { key, .. } =>
println!(" WRITE {}", key),
knowledge::ActionKind::Refine { key, .. } =>
println!(" REFINE {}", key),
knowledge::ActionKind::Demote { key } =>
println!(" DEMOTE {}", key),
}
}
println!("\nTo apply: poc-memory apply-consolidation --apply");
return Ok(());
}
let ts = store::compact_timestamp();
let mut applied = 0;
for action in &all_actions {
if knowledge::apply_action(store, action, "consolidate", &ts, 0) {
applied += 1;
}
}
if applied > 0 {
store.save()?;
}
println!("Applied: {}/{} actions", applied, all_actions.len());
Ok(())
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,339 @@
// Agent definitions: self-contained files with query + prompt template.
//
// Each agent is a file in the agents/ directory:
// - First line: JSON header (agent, query, model, schedule)
// - After blank line: prompt template with {{placeholder}} lookups
//
// Placeholders are resolved at runtime:
// {{topology}} — graph topology header
// {{nodes}} — query results formatted as node sections
// {{episodes}} — alias for {{nodes}}
// {{health}} — graph health report
// {{pairs}} — interference pairs from detect_interference
// {{rename}} — rename candidates
// {{split}} — split detail for the first query result
//
// The query selects what to operate on; placeholders pull in context.
use crate::graph::Graph;
use crate::neuro::{consolidation_priority, ReplayItem};
use crate::search;
use crate::store::Store;
use serde::Deserialize;
use std::path::PathBuf;
/// Agent definition: config (from JSON header) + prompt (raw markdown body).
#[derive(Clone, Debug)]
pub struct AgentDef {
pub agent: String,
pub query: String,
pub prompt: String,
pub model: String,
pub schedule: String,
}
/// The JSON header portion (first line of the file).
#[derive(Deserialize)]
struct AgentHeader {
agent: String,
#[serde(default)]
query: String,
#[serde(default = "default_model")]
model: String,
#[serde(default)]
schedule: String,
}
fn default_model() -> String { "sonnet".into() }
/// Parse an agent file: first line is JSON config, rest is the prompt.
fn parse_agent_file(content: &str) -> Option<AgentDef> {
let (first_line, rest) = content.split_once('\n')?;
let header: AgentHeader = serde_json::from_str(first_line.trim()).ok()?;
// Skip optional blank line between header and prompt body
let prompt = rest.strip_prefix('\n').unwrap_or(rest);
Some(AgentDef {
agent: header.agent,
query: header.query,
prompt: prompt.to_string(),
model: header.model,
schedule: header.schedule,
})
}
fn agents_dir() -> PathBuf {
let repo = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("agents");
if repo.is_dir() { return repo; }
crate::store::memory_dir().join("agents")
}
/// Load all agent definitions.
pub fn load_defs() -> Vec<AgentDef> {
let dir = agents_dir();
let Ok(entries) = std::fs::read_dir(&dir) else { return Vec::new() };
entries
.filter_map(|e| e.ok())
.filter(|e| {
let p = e.path();
p.extension().map(|x| x == "agent" || x == "md").unwrap_or(false)
})
.filter_map(|e| {
let content = std::fs::read_to_string(e.path()).ok()?;
parse_agent_file(&content)
})
.collect()
}
/// Look up a single agent definition by name.
pub fn get_def(name: &str) -> Option<AgentDef> {
let dir = agents_dir();
for ext in ["agent", "md"] {
let path = dir.join(format!("{}.{}", name, ext));
if let Ok(content) = std::fs::read_to_string(&path) {
if let Some(def) = parse_agent_file(&content) {
return Some(def);
}
}
}
load_defs().into_iter().find(|d| d.agent == name)
}
/// Result of resolving a placeholder: text + any affected node keys.
struct Resolved {
text: String,
keys: Vec<String>,
}
/// Resolve a single {{placeholder}} by name.
/// Returns the replacement text and any node keys it produced (for visit tracking).
fn resolve(
name: &str,
store: &Store,
graph: &Graph,
keys: &[String],
count: usize,
) -> Option<Resolved> {
match name {
"topology" => Some(Resolved {
text: super::prompts::format_topology_header(graph),
keys: vec![],
}),
"nodes" | "episodes" => {
let items = keys_to_replay_items(store, keys, graph);
Some(Resolved {
text: super::prompts::format_nodes_section(store, &items, graph),
keys: vec![], // keys already tracked from query
})
}
"health" => Some(Resolved {
text: super::prompts::format_health_section(store, graph),
keys: vec![],
}),
"pairs" => {
let mut pairs = crate::neuro::detect_interference(store, graph, 0.5);
pairs.truncate(count);
let pair_keys: Vec<String> = pairs.iter()
.flat_map(|(a, b, _)| vec![a.clone(), b.clone()])
.collect();
Some(Resolved {
text: super::prompts::format_pairs_section(&pairs, store, graph),
keys: pair_keys,
})
}
"rename" => {
let (rename_keys, section) = super::prompts::format_rename_candidates(store, count);
Some(Resolved { text: section, keys: rename_keys })
}
"split" => {
let key = keys.first()?;
Some(Resolved {
text: super::prompts::format_split_plan_node(store, graph, key),
keys: vec![], // key already tracked from query
})
}
"conversations" => {
let fragments = super::knowledge::select_conversation_fragments(count);
let text = fragments.iter()
.map(|(id, text)| format!("### Session {}\n\n{}", id, text))
.collect::<Vec<_>>()
.join("\n\n---\n\n");
Some(Resolved { text, keys: vec![] })
}
"clusters" => {
let (cluster_keys, text) = find_largest_cluster(store, graph, count);
Some(Resolved { text, keys: cluster_keys })
}
// targets/context: aliases for challenger-style presentation
"targets" => {
let items = keys_to_replay_items(store, keys, graph);
Some(Resolved {
text: super::prompts::format_nodes_section(store, &items, graph),
keys: vec![],
})
}
_ => None,
}
}
/// Resolve all {{placeholder}} patterns in a prompt template.
/// Returns the resolved text and all node keys collected from placeholders.
pub fn resolve_placeholders(
template: &str,
store: &Store,
graph: &Graph,
keys: &[String],
count: usize,
) -> (String, Vec<String>) {
let mut result = template.to_string();
let mut extra_keys = Vec::new();
loop {
let Some(start) = result.find("{{") else { break };
let Some(end) = result[start + 2..].find("}}") else { break };
let end = start + 2 + end;
let name = result[start + 2..end].trim().to_lowercase();
match resolve(&name, store, graph, keys, count) {
Some(resolved) => {
extra_keys.extend(resolved.keys);
result.replace_range(start..end + 2, &resolved.text);
}
None => {
let msg = format!("(unknown: {})", name);
result.replace_range(start..end + 2, &msg);
}
}
}
(result, extra_keys)
}
/// Run a config-driven agent: query → resolve placeholders → prompt.
pub fn run_agent(
store: &Store,
def: &AgentDef,
count: usize,
) -> Result<super::prompts::AgentBatch, String> {
let graph = store.build_graph();
// Run the query if present
let keys = if !def.query.is_empty() {
let mut stages = search::Stage::parse_pipeline(&def.query)?;
let has_limit = stages.iter().any(|s|
matches!(s, search::Stage::Transform(search::Transform::Limit(_))));
if !has_limit {
stages.push(search::Stage::Transform(search::Transform::Limit(count)));
}
let results = search::run_query(&stages, vec![], &graph, store, false, count);
if results.is_empty() {
return Err(format!("{}: query returned no results", def.agent));
}
results.into_iter().map(|(k, _)| k).collect::<Vec<_>>()
} else {
vec![]
};
let (prompt, extra_keys) = resolve_placeholders(&def.prompt, store, &graph, &keys, count);
// Merge query keys with any keys produced by placeholder resolution
let mut all_keys = keys;
all_keys.extend(extra_keys);
Ok(super::prompts::AgentBatch { prompt, node_keys: all_keys })
}
/// Convert a list of keys to ReplayItems with priority and graph metrics.
pub fn keys_to_replay_items(
store: &Store,
keys: &[String],
graph: &Graph,
) -> Vec<ReplayItem> {
keys.iter()
.filter_map(|key| {
let node = store.nodes.get(key)?;
let priority = consolidation_priority(store, key, graph, None);
let cc = graph.clustering_coefficient(key);
Some(ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
cc,
classification: "unknown",
outlier_score: 0.0,
})
})
.collect()
}
/// Find the largest cluster of nodes sharing a key prefix that hasn't been
/// visited by the generalize agent recently. Returns the cluster's node keys
/// and a formatted section with all their content.
fn find_largest_cluster(store: &Store, graph: &Graph, _count: usize) -> (Vec<String>, String) {
use std::collections::HashMap;
let min_cluster = 5;
// Group non-internal nodes by their key prefix (before #, or first word of kebab-key)
let mut prefix_groups: HashMap<String, Vec<String>> = HashMap::new();
for key in store.nodes.keys() {
if key.starts_with('_') { continue; }
if key.starts_with("journal#") || key.starts_with("daily-") ||
key.starts_with("weekly-") || key.starts_with("monthly-") {
continue;
}
// Extract prefix: "patterns#async-tui-blocking" → "patterns#async-tui"
// Take everything up to the last hyphenated segment after #
let prefix = if let Some(hash_pos) = key.find('#') {
let after_hash = &key[hash_pos + 1..];
if let Some(last_dash) = after_hash.rfind('-') {
format!("{}#{}", &key[..hash_pos], &after_hash[..last_dash])
} else {
key[..hash_pos].to_string()
}
} else {
key.clone()
};
prefix_groups.entry(prefix).or_default().push(key.clone());
}
// Find biggest clusters, preferring unvisited ones
let mut clusters: Vec<(String, Vec<String>)> = prefix_groups.into_iter()
.filter(|(_, keys)| keys.len() >= min_cluster)
.collect();
clusters.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
// Pick the first cluster where most nodes haven't been generalized
let cluster = clusters.into_iter()
.find(|(_, keys)| {
let unvisited = keys.iter()
.filter(|k| {
store.nodes.get(*k)
.map(|n| !n.provenance.contains("generalize"))
.unwrap_or(true)
})
.count();
unvisited > keys.len() / 2
});
let Some((prefix, keys)) = cluster else {
return (vec![], "No clusters found that need generalization.".to_string());
};
// Render all nodes in the cluster
let mut out = format!("### Cluster: `{}*` ({} nodes)\n\n", prefix, keys.len());
let items = keys_to_replay_items(store, &keys, graph);
out.push_str(&super::prompts::format_nodes_section(store, &items, graph));
(keys, out)
}

View file

@ -1,4 +1,3 @@
use std::sync::Arc;
// Episodic digest generation: daily, weekly, monthly, auto // Episodic digest generation: daily, weekly, monthly, auto
// //
// Three digest levels form a temporal hierarchy: daily digests summarize // Three digest levels form a temporal hierarchy: daily digests summarize
@ -6,6 +5,7 @@ use std::sync::Arc;
// summarize weeklies. All three share the same generate/auto-detect // summarize weeklies. All three share the same generate/auto-detect
// pipeline, parameterized by DigestLevel. // pipeline, parameterized by DigestLevel.
use super::llm::{call_sonnet, semantic_keys};
use crate::store::{self, Store, new_relation}; use crate::store::{self, Store, new_relation};
use crate::neuro; use crate::neuro;
@ -13,14 +13,6 @@ use chrono::{Datelike, Duration, Local, NaiveDate};
use regex::Regex; use regex::Regex;
use std::collections::BTreeSet; use std::collections::BTreeSet;
/// Get all store keys for prompt context.
fn semantic_keys(store: &Store) -> Vec<String> {
let mut keys: Vec<String> = store.nodes.keys().cloned().collect();
keys.sort();
keys.truncate(200);
keys
}
// --- Digest level descriptors --- // --- Digest level descriptors ---
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
@ -122,34 +114,23 @@ fn digest_node_key(level_name: &str, label: &str) -> String {
// --- Input gathering --- // --- Input gathering ---
/// Result of gathering inputs for a digest.
struct GatherResult {
label: String,
/// (display_label, content) pairs for the prompt.
inputs: Vec<(String, String)>,
/// Store keys of source nodes — used to create structural links.
source_keys: Vec<String>,
}
/// Load child digest content from the store. /// Load child digest content from the store.
fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> (Vec<(String, String)>, Vec<String>) { fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> Vec<(String, String)> {
let mut digests = Vec::new(); let mut digests = Vec::new();
let mut keys = Vec::new();
for label in labels { for label in labels {
let key = digest_node_key(prefix, label); let key = digest_node_key(prefix, label);
if let Some(node) = store.nodes.get(&key) { if let Some(node) = store.nodes.get(&key) {
digests.push((label.clone(), node.content.clone())); digests.push((label.clone(), node.content.clone()));
keys.push(key);
} }
} }
(digests, keys) digests
} }
/// Unified: gather inputs for any digest level. /// Unified: gather inputs for any digest level.
fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<GatherResult, String> { fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<(String, Vec<(String, String)>), String> {
let (label, dates) = (level.label_dates)(arg)?; let (label, dates) = (level.label_dates)(arg)?;
let (inputs, source_keys) = if let Some(child_name) = level.child_name { let inputs = if let Some(child_name) = level.child_name {
// Map parent's dates through child's date_to_label → child labels // Map parent's dates through child's date_to_label → child labels
let child = LEVELS.iter() let child = LEVELS.iter()
.find(|l| l.name == child_name) .find(|l| l.name == child_name)
@ -162,21 +143,19 @@ fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<GatherResult,
load_child_digests(store, child_name, &child_labels) load_child_digests(store, child_name, &child_labels)
} else { } else {
// Leaf level: scan store for episodic entries matching date // Leaf level: scan store for episodic entries matching date
let mut entries: Vec<_> = store.nodes.iter() let mut entries: Vec<_> = store.nodes.values()
.filter(|(_, n)| n.node_type == store::NodeType::EpisodicSession .filter(|n| n.node_type == store::NodeType::EpisodicSession
&& n.created_at > 0 && n.timestamp > 0
&& store::format_date(n.created_at) == label) && store::format_date(n.timestamp) == label)
.map(|(key, n)| { .map(|n| {
(store::format_datetime(n.timestamp), n.content.clone(), key.clone()) (store::format_datetime(n.timestamp), n.content.clone())
}) })
.collect(); .collect();
entries.sort_by(|a, b| a.0.cmp(&b.0)); entries.sort_by(|a, b| a.0.cmp(&b.0));
let keys = entries.iter().map(|(_, _, k)| k.clone()).collect(); entries
let inputs = entries.into_iter().map(|(dt, c, _)| (dt, c)).collect();
(inputs, keys)
}; };
Ok(GatherResult { label, inputs, source_keys }) Ok((label, inputs))
} }
/// Unified: find candidate labels for auto-generation (past, not yet generated). /// Unified: find candidate labels for auto-generation (past, not yet generated).
@ -209,7 +188,6 @@ fn generate_digest(
level: &DigestLevel, level: &DigestLevel,
label: &str, label: &str,
inputs: &[(String, String)], inputs: &[(String, String)],
source_keys: &[String],
) -> Result<(), String> { ) -> Result<(), String> {
println!("Generating {} digest for {}...", level.name, label); println!("Generating {} digest for {}...", level.name, label);
@ -231,92 +209,22 @@ fn generate_digest(
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join(", "); .join(", ");
// Load agent def — drives template, temperature, priority, tools let prompt = super::prompts::load_prompt("digest", &[
let def = super::defs::get_def("digest") ("{{LEVEL}}", level.title),
.ok_or("no digest agent definition")?; ("{{PERIOD}}", level.period),
let template = def.steps.first() ("{{INPUT_TITLE}}", level.input_title),
.map(|s| s.prompt.clone()) ("{{LABEL}}", label),
.ok_or("digest agent has no prompt")?; ("{{CONTENT}}", &content),
("{{COVERED}}", &covered),
// Substitute digest-specific and config placeholders, then resolve ("{{KEYS}}", &keys_text),
// standard {{node:...}} etc. via the placeholder system ])?;
let cfg = crate::config::get();
let partial = template
.replace("{agent_name}", &def.agent)
.replace("{user_name}", &cfg.user_name)
.replace("{assistant_name}", &cfg.assistant_name)
.replace("{{LEVEL}}", level.title)
.replace("{{PERIOD}}", level.period)
.replace("{{INPUT_TITLE}}", level.input_title)
.replace("{{LABEL}}", label)
.replace("{{CONTENT}}", &content)
.replace("{{COVERED}}", &covered)
.replace("{{KEYS}}", &keys_text);
let graph = store.build_graph();
let (prompt, _) = super::defs::resolve_placeholders(
&partial, store, &graph, &[], 0,
);
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4); println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
// Log to file like other agents println!(" Calling Sonnet...");
let log_dir = dirs::home_dir().unwrap_or_default() let digest = call_sonnet("digest", &prompt)?;
.join(".consciousness/logs/llm/digest");
std::fs::create_dir_all(&log_dir).ok();
let log_path = log_dir.join(format!("{}.txt", crate::store::compact_timestamp()));
let _log = move |msg: &str| {
use std::io::Write;
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true).append(true).open(&log_path)
{
let _ = writeln!(f, "{}", msg);
}
};
println!(" Calling LLM...");
let prompts = vec![prompt];
let phases: Vec<String> = def.steps.iter().map(|s| s.phase.clone()).collect();
// Filter tools based on agent def
let all_tools = crate::agent::tools::memory_and_journal_tools();
let tools: Vec<_> = if def.tools.is_empty() {
all_tools.to_vec()
} else {
all_tools.into_iter()
.filter(|t| def.tools.iter().any(|w| w == &t.name))
.collect()
};
let digest = crate::agent::oneshot::call_api_with_tools_sync(
&def.agent, &prompts, &phases, def.temperature, def.priority,
&tools, None)?;
let key = digest_node_key(level.name, label); let key = digest_node_key(level.name, label);
store.upsert_provenance(&key, &digest, "digest:write")?; store.upsert_provenance(&key, &digest, "digest:write")?;
// Structural links: connect all source entries to this digest
let mut linked = 0;
for source_key in source_keys {
// Skip if link already exists
let exists = store.relations.iter().any(|r|
!r.deleted && r.source_key == *source_key && r.target_key == key);
if exists { continue; }
let source_uuid = store.nodes.get(source_key)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
let target_uuid = store.nodes.get(&key)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
let mut rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link, 0.8,
source_key, &key,
);
rel.provenance = "digest:structural".to_string();
store.add_relation(rel)?;
linked += 1;
}
if linked > 0 {
println!(" Linked {} source entries → {}", linked, key);
}
store.save()?; store.save()?;
println!(" Stored: {}", key); println!(" Stored: {}", key);
@ -330,8 +238,8 @@ pub fn generate(store: &mut Store, level_name: &str, arg: &str) -> Result<(), St
let level = LEVELS.iter() let level = LEVELS.iter()
.find(|l| l.name == level_name) .find(|l| l.name == level_name)
.ok_or_else(|| format!("unknown digest level: {}", level_name))?; .ok_or_else(|| format!("unknown digest level: {}", level_name))?;
let result = gather(level, store, arg)?; let (label, inputs) = gather(level, store, arg)?;
generate_digest(store, level, &result.label, &result.inputs, &result.source_keys) generate_digest(store, level, &label, &inputs)
} }
// --- Auto-detect and generate missing digests --- // --- Auto-detect and generate missing digests ---
@ -341,8 +249,8 @@ pub fn digest_auto(store: &mut Store) -> Result<(), String> {
// Collect all dates with episodic entries // Collect all dates with episodic entries
let dates: Vec<String> = store.nodes.values() let dates: Vec<String> = store.nodes.values()
.filter(|n| n.node_type == store::NodeType::EpisodicSession && n.created_at > 0) .filter(|n| n.node_type == store::NodeType::EpisodicSession && n.timestamp > 0)
.map(|n| store::format_date(n.created_at)) .map(|n| store::format_date(n.timestamp))
.collect::<BTreeSet<_>>() .collect::<BTreeSet<_>>()
.into_iter() .into_iter()
.collect(); .collect();
@ -355,15 +263,15 @@ pub fn digest_auto(store: &mut Store) -> Result<(), String> {
let mut skipped = 0u32; let mut skipped = 0u32;
for arg in &candidates { for arg in &candidates {
let result = gather(level, store, arg)?; let (label, inputs) = gather(level, store, arg)?;
let key = digest_node_key(level.name, &result.label); let key = digest_node_key(level.name, &label);
if store.nodes.contains_key(&key) { if store.nodes.contains_key(&key) {
skipped += 1; skipped += 1;
continue; continue;
} }
if result.inputs.is_empty() { continue; } if inputs.is_empty() { continue; }
println!("[auto] Missing {} digest for {}", level.name, result.label); println!("[auto] Missing {} digest for {}", level.name, label);
generate_digest(store, level, &result.label, &result.inputs, &result.source_keys)?; generate_digest(store, level, &label, &inputs)?;
generated += 1; generated += 1;
} }
@ -408,7 +316,7 @@ fn normalize_link_key(raw: &str) -> String {
} else if key.contains('#') { } else if key.contains('#') {
let (file, section) = key.split_once('#').unwrap(); let (file, section) = key.split_once('#').unwrap();
if let Some(bare) = file.strip_suffix(".md") { if let Some(bare) = file.strip_suffix(".md") {
key = format!("{}-{}", bare, section); key = format!("{}#{}", bare, section);
} }
} }
@ -585,110 +493,3 @@ pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, us
(applied, skipped, fallbacks) (applied, skipped, fallbacks)
} }
// --- Tool interface for digest generation (added 2026-04-04) ---
/// Helper: extract string argument from tool call
fn get_str_required(args: &serde_json::Value, name: &str) -> Result<String, String> {
args.get(name)
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.ok_or_else(|| format!("{} is required", name))
}
/// Wrap a Result<T, String> for use in anyhow handlers.
fn str_err<T>(r: Result<T, String>) -> anyhow::Result<T> {
r.map_err(|e| anyhow::anyhow!("{}", e))
}
/// digest_daily tool handler: generate a daily digest
async fn handle_digest_daily(
_agent: Option<std::sync::Arc<super::super::agent::Agent>>,
args: serde_json::Value,
) -> anyhow::Result<String> {
let date = str_err(get_str_required(&args, "date"))?;
let mut store = str_err(Store::load())?;
str_err(generate(&mut store, "daily", &date))?;
Ok(format!("Daily digest generated for {}", date))
}
/// digest_weekly tool handler: generate a weekly digest
async fn handle_digest_weekly(
_agent: Option<std::sync::Arc<super::super::agent::Agent>>,
args: serde_json::Value,
) -> anyhow::Result<String> {
let week_label = str_err(get_str_required(&args, "week"))?;
let mut store = str_err(Store::load())?;
str_err(generate(&mut store, "weekly", &week_label))?;
Ok(format!("Weekly digest generated for {}", week_label))
}
/// digest_monthly tool handler: generate a monthly digest
async fn handle_digest_monthly(
_agent: Option<std::sync::Arc<super::super::agent::Agent>>,
args: serde_json::Value,
) -> anyhow::Result<String> {
let month = str_err(get_str_required(&args, "month"))?;
let mut store = str_err(Store::load())?;
str_err(generate(&mut store, "monthly", &month))?;
Ok(format!("Monthly digest generated for {}", month))
}
/// digest_auto tool handler: auto-generate all missing digests
async fn handle_digest_auto(
_agent: Option<std::sync::Arc<super::super::agent::Agent>>,
_args: serde_json::Value,
) -> anyhow::Result<String> {
let mut store = str_err(Store::load())?;
str_err(digest_auto(&mut store))?;
Ok("Auto-generated all missing digests".to_string())
}
/// digest_links tool handler: parse and apply digest links
async fn handle_digest_links(
_agent: Option<std::sync::Arc<super::super::agent::Agent>>,
_args: serde_json::Value,
) -> anyhow::Result<String> {
let mut store = str_err(Store::load())?;
let links = parse_all_digest_links(&store);
let (applied, skipped, fallbacks) = apply_digest_links(&mut store, &links);
str_err(store.save())?;
Ok(format!("Applied {} digest links ({} skipped, {} fallback)", applied, skipped, fallbacks))
}
/// Return digest tools array for the tool registry
pub fn digest_tools() -> [super::super::agent::tools::Tool; 5] {
use super::super::agent::tools::Tool;
[
Tool {
name: "digest_daily",
description: "Generate a daily digest from journal entries.",
parameters_json: r#"{"type":"object","properties":{"date":{"type":"string","description":"Date in YYYY-MM-DD format"}}, "required":["date"]}"#,
handler: Arc::new(|_a, v| Box::pin(async move { handle_digest_daily(_a, v).await })),
},
Tool {
name: "digest_weekly",
description: "Generate a weekly digest from daily digests.",
parameters_json: r#"{"type":"object","properties":{"week":{"type":"string","description":"Week label (YYYY-W##) or date (YYYY-MM-DD)"}}, "required":["week"]}"#,
handler: Arc::new(|_a, v| Box::pin(async move { handle_digest_weekly(_a, v).await })),
},
Tool {
name: "digest_monthly",
description: "Generate a monthly digest from weekly digests.",
parameters_json: r#"{"type":"object","properties":{"month":{"type":"string","description":"Month label (YYYY-MM) or date (YYYY-MM-DD)"}}, "required":["month"]}"#,
handler: Arc::new(|_a, v| Box::pin(async move { handle_digest_monthly(_a, v).await })),
},
Tool {
name: "digest_auto",
description: "Auto-generate all missing digests (daily, weekly, monthly) for past dates that have content but no digest yet.",
parameters_json: r#"{"type":"object","properties":{}}"#,
handler: Arc::new(|_a, v| Box::pin(async move { handle_digest_auto(_a, v).await })),
},
Tool {
name: "digest_links",
description: "Parse and apply structural links from digest nodes to the memory graph.",
parameters_json: r#"{"type":"object","properties":{}}"#,
handler: Arc::new(|_a, v| Box::pin(async move { handle_digest_links(_a, v).await })),
},
]
}

View file

@ -0,0 +1,393 @@
// Journal enrichment and experience mining
//
// Two modes of processing conversation transcripts:
// journal_enrich — enrich a specific journal entry with source location and links
// experience_mine — retroactively find experiential moments not yet journaled
//
// Both extract conversation from JSONL transcripts, build prompts, call Sonnet,
// and apply results to the store.
use super::llm::{call_sonnet, parse_json_response, semantic_keys};
use crate::neuro;
use crate::store::{self, Store, new_node, new_relation};
use std::collections::hash_map::DefaultHasher;
use std::collections::HashSet;
use std::fs;
use std::hash::{Hash, Hasher};
use crate::store::StoreView;
use crate::util::parse_timestamp_to_epoch;
/// Compute the store dedup key for a transcript file.
/// This is the same key experience_mine uses to mark a transcript as mined.
fn transcript_dedup_key(path: &str) -> Result<String, String> {
let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?;
let mut hasher = DefaultHasher::new();
bytes.hash(&mut hasher);
Ok(format!("_mined-transcripts#h-{:016x}", hasher.finish()))
}
/// Check if a transcript has already been mined (dedup key exists in store).
pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool {
match transcript_dedup_key(path) {
Ok(key) => store.node_content(&key).is_some(),
Err(_) => false,
}
}
/// Dedup key for a transcript based on its filename (UUID).
/// Used by the daemon reconcile loop — no file reads needed.
pub fn transcript_filename_key(path: &str) -> String {
let filename = std::path::Path::new(path)
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| path.to_string());
format!("_mined-transcripts#f-{}", filename)
}
/// Get the set of all mined transcript keys (both content-hash and filename)
/// from the store. Load once per daemon tick, check many.
pub fn mined_transcript_keys() -> HashSet<String> {
use crate::store::AnyView;
let Ok(view) = AnyView::load() else { return HashSet::new() };
let mut keys = HashSet::new();
view.for_each_node(|key, _, _| {
if key.starts_with("_mined-transcripts#") {
keys.insert(key.to_string());
}
});
keys
}
/// Extract user/assistant messages with line numbers from a JSONL transcript.
/// (line_number, role, text, timestamp)
pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
let path = std::path::Path::new(jsonl_path);
let messages = super::transcript::parse_transcript(path)?;
Ok(messages.into_iter()
.map(|m| (m.line, m.role, m.text, m.timestamp))
.collect())
}
pub const COMPACTION_MARKER: &str = "This session is being continued from a previous conversation that ran out of context";
/// Split extracted messages into segments at compaction boundaries.
/// Each segment represents one continuous conversation before context was compacted.
pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Vec<Vec<(usize, String, String, String)>> {
let mut segments: Vec<Vec<(usize, String, String, String)>> = Vec::new();
let mut current = Vec::new();
for msg in messages {
if msg.1 == "user" && msg.2.starts_with(COMPACTION_MARKER) {
if !current.is_empty() {
segments.push(current);
current = Vec::new();
}
// The continuation message itself is part of the new segment
current.push(msg);
} else {
current.push(msg);
}
}
if !current.is_empty() {
segments.push(current);
}
segments
}
/// Format conversation messages for the prompt (truncating long messages).
fn format_conversation(messages: &[(usize, String, String, String)]) -> String {
messages.iter()
.map(|(line, role, text, ts)| {
let text = crate::util::truncate(text, 1800, "...[truncated]");
if ts.is_empty() {
format!("L{} [{}]: {}", line, role, text)
} else {
format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text)
}
})
.collect::<Vec<_>>()
.join("\n\n")
}
fn build_journal_prompt(
entry_text: &str,
conversation: &str,
keys: &[String],
grep_line: usize,
) -> Result<String, String> {
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
super::prompts::load_prompt("journal-enrich", &[
("{{GREP_LINE}}", &grep_line.to_string()),
("{{ENTRY_TEXT}}", entry_text),
("{{KEYS}}", &keys_text),
("{{CONVERSATION}}", conversation),
])
}
/// Enrich a journal entry with conversation context and link proposals.
pub fn journal_enrich(
store: &mut Store,
jsonl_path: &str,
entry_text: &str,
grep_line: usize,
) -> Result<(), String> {
println!("Extracting conversation from {}...", jsonl_path);
let messages = extract_conversation(jsonl_path)?;
let conversation = format_conversation(&messages);
println!(" {} messages, {} chars", messages.len(), conversation.len());
let keys = semantic_keys(store);
println!(" {} semantic keys", keys.len());
let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?;
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let response = call_sonnet("enrich", &prompt)?;
let result = parse_json_response(&response)?;
// Report results
let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0);
let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0);
let links = result.get("links").and_then(|v| v.as_array());
let insights = result.get("missed_insights").and_then(|v| v.as_array());
println!(" Source: L{}-L{}", source_start, source_end);
println!(" Links: {}", links.map_or(0, |l| l.len()));
println!(" Missed insights: {}", insights.map_or(0, |l| l.len()));
// Apply links
if let Some(links) = links {
for link in links {
let target = link.get("target").and_then(|v| v.as_str()).unwrap_or("");
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
if target.is_empty() || target.starts_with("NOTE:") {
if let Some(note) = target.strip_prefix("NOTE:") {
println!(" NOTE: {}{}", note, reason);
}
continue;
}
// Resolve target and find journal node
let resolved = match store.resolve_key(target) {
Ok(r) => r,
Err(_) => { println!(" SKIP {} (not in graph)", target); continue; }
};
let source_key = match store.find_journal_node(entry_text) {
Some(k) => k,
None => { println!(" SKIP {} (no matching journal node)", target); continue; }
};
// Refine target to best-matching section
let source_content = store.nodes.get(&source_key)
.map(|n| n.content.as_str()).unwrap_or("");
let resolved = neuro::refine_target(store, source_content, &resolved);
let source_uuid = match store.nodes.get(&source_key) {
Some(n) => n.uuid,
None => continue,
};
let target_uuid = match store.nodes.get(&resolved) {
Some(n) => n.uuid,
None => continue,
};
let rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link,
0.5,
&source_key, &resolved,
);
if store.add_relation(rel).is_ok() {
println!(" LINK {}{} ({})", source_key, resolved, reason);
}
}
}
store.save()?;
Ok(())
}
/// Mine a conversation transcript for experiential moments not yet journaled.
/// If `segment` is Some, only process that compaction segment of the file.
pub fn experience_mine(
store: &mut Store,
jsonl_path: &str,
segment: Option<usize>,
) -> Result<usize, String> {
println!("Experience mining: {}", jsonl_path);
// Transcript-level dedup: hash the file content and check if already mined
let transcript_bytes = fs::read(jsonl_path)
.map_err(|e| format!("reading transcript: {}", e))?;
let mut hasher = DefaultHasher::new();
transcript_bytes.hash(&mut hasher);
let hash = hasher.finish();
let dedup_key = format!("_mined-transcripts#h-{:016x}", hash);
if store.nodes.contains_key(&dedup_key) {
// Backfill per-segment key if called with a specific segment
if let Some(idx) = segment {
let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx);
if !store.nodes.contains_key(&seg_key) {
let mut node = new_node(&seg_key, &format!("Backfilled from {}", dedup_key));
node.provenance = "experience-mine:write".to_string();
let _ = store.upsert_node(node);
store.save()?;
}
}
println!(" Already mined this transcript ({}), skipping.", &dedup_key[24..]);
return Ok(0);
}
let all_messages = extract_conversation(jsonl_path)?;
// If segment is specified, extract just that segment; otherwise process all messages
let messages = match segment {
Some(idx) => {
let segments = split_on_compaction(all_messages);
segments.into_iter().nth(idx)
.ok_or_else(|| format!("segment {} out of range", idx))?
}
None => all_messages,
};
let conversation = format_conversation(&messages);
println!(" {} messages, {} chars", messages.len(), conversation.len());
// Load core identity nodes for context
let cfg = crate::config::get();
let identity: String = cfg.core_nodes.iter()
.filter_map(|k| store.nodes.get(k).map(|n| n.content.as_str()))
.collect::<Vec<_>>()
.join("\n\n");
// Get recent episodic entries to avoid duplication
let mut journal: Vec<_> = store.nodes.values()
.filter(|node| matches!(node.node_type, store::NodeType::EpisodicSession))
.collect();
journal.sort_by_key(|n| n.timestamp);
let recent: String = journal.iter().rev().take(10)
.map(|n| format!("---\n{}\n", n.content))
.collect();
let keys = semantic_keys(store);
let keys_text: String = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let prompt = super::prompts::load_prompt("experience", &[
("{{IDENTITY}}", &identity),
("{{RECENT_JOURNAL}}", &recent),
("{{KEYS}}", &keys_text),
("{{CONVERSATION}}", &conversation),
])?;
let est_tokens = prompt.len() / 4;
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), est_tokens);
if est_tokens > 150_000 {
println!(" Skipping: prompt too large ({} tokens > 150k limit)", est_tokens);
return Ok(0);
}
println!(" Calling Sonnet...");
let response = call_sonnet("experience-mine", &prompt)?;
let entries = parse_json_response(&response)?;
let entries = match entries.as_array() {
Some(arr) => arr.clone(),
None => return Err("expected JSON array".to_string()),
};
if entries.is_empty() {
println!(" No missed experiences found.");
} else {
println!(" Found {} experiential moments:", entries.len());
}
let mut count = 0;
for entry in &entries {
let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or("");
let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
if content.is_empty() { continue; }
// Format with timestamp header
let full_content = if ts.is_empty() {
content.to_string()
} else {
format!("## {}\n\n{}", ts, content)
};
// Generate key from timestamp
let key_slug: String = content.chars()
.filter(|c| c.is_alphanumeric() || *c == ' ')
.take(50)
.collect::<String>()
.trim()
.to_lowercase()
.replace(' ', "-");
let key = if ts.is_empty() {
format!("journal#j-mined-{}", key_slug)
} else {
format!("journal#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
};
// Check for duplicate
if store.nodes.contains_key(&key) {
println!(" SKIP {} (duplicate)", key);
continue;
}
// Write to store — use event timestamp, not mining time
let mut node = new_node(&key, &full_content);
node.node_type = store::NodeType::EpisodicSession;
node.provenance = "experience-mine:write".to_string();
if !ts.is_empty() {
if let Some(epoch) = parse_timestamp_to_epoch(ts) {
node.created_at = epoch;
}
}
let _ = store.upsert_node(node);
count += 1;
let preview = crate::util::truncate(content, 77, "...");
println!(" + [{}] {}", ts, preview);
}
// Record this transcript/segment as mined (even if count == 0, to prevent re-runs)
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
match segment {
Some(idx) => {
// Per-segment key: the daemon writes the whole-file key when all segments are done
let seg_key = format!("{}.{}", transcript_filename_key(jsonl_path), idx);
let mut node = new_node(&seg_key, &dedup_content);
node.provenance = "experience-mine:write".to_string();
let _ = store.upsert_node(node);
}
None => {
// Unsegmented: only write content-hash key (not the filename key, since the
// file may grow with new compaction segments later — the daemon handles
// writing the whole-file filename key after verifying all segments are done)
let mut node = new_node(&dedup_key, &dedup_content);
node.provenance = "experience-mine:write".to_string();
let _ = store.upsert_node(node);
}
}
if count > 0 {
println!(" Saved {} new journal entries.", count);
}
store.save()?;
println!("Done: {} new entries mined.", count);
Ok(count)
}

View file

@ -0,0 +1,303 @@
// fact_mine.rs — extract atomic factual claims from conversation transcripts
//
// Chunks conversation text into overlapping windows, sends each to Haiku
// for extraction, deduplicates by claim text. Output: JSON array of facts.
//
// Uses Haiku (not Sonnet) for cost efficiency on high-volume extraction.
use crate::config;
use super::llm;
use super::transcript;
use crate::store;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::path::Path;
const CHARS_PER_TOKEN: usize = 4;
const WINDOW_TOKENS: usize = 2000;
const OVERLAP_TOKENS: usize = 200;
const WINDOW_CHARS: usize = WINDOW_TOKENS * CHARS_PER_TOKEN;
const OVERLAP_CHARS: usize = OVERLAP_TOKENS * CHARS_PER_TOKEN;
fn extraction_prompt() -> String {
let cfg = config::get();
format!(
r#"Extract atomic factual claims from this conversation excerpt.
Speakers are labeled [{user}] and [{assistant}] in the transcript.
Use their proper names in claims not "the user" or "the assistant."
Each claim should be:
- A single verifiable statement
- Specific enough to be useful in isolation
- Tagged with domain (e.g., bcachefs/btree, bcachefs/alloc, bcachefs/journal,
bcachefs/ec, bcachefs/reconcile, rust/idioms, workflow/preferences,
linux/kernel, memory/design, identity/personal)
- Tagged with confidence: "stated" (explicitly said), "implied" (logically follows),
or "speculative" (hypothesis, not confirmed)
- Include which speaker said it ("{user}", "{assistant}", or "Unknown")
Do NOT extract:
- Opinions or subjective assessments
- Conversational filler or greetings
- Things that are obviously common knowledge
- Restatements of the same fact (pick the clearest version)
- System messages, tool outputs, or error logs (extract what was LEARNED from them)
- Anything about the conversation itself ("{user} and {assistant} discussed...")
- Facts only relevant to this specific conversation (e.g. transient file paths, mid-debug state)
Output as a JSON array. Each element:
{{
"claim": "the exact factual statement",
"domain": "category/subcategory",
"confidence": "stated|implied|speculative",
"speaker": "{user}|{assistant}|Unknown"
}}
If the excerpt contains no extractable facts, output an empty array: []
--- CONVERSATION EXCERPT ---
"#, user = cfg.user_name, assistant = cfg.assistant_name)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Fact {
pub claim: String,
pub domain: String,
pub confidence: String,
pub speaker: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub source_file: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub source_chunk: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub source_offset: Option<usize>,
}
/// Extract user/assistant text messages from a JSONL transcript.
fn extract_messages(path: &Path) -> Vec<transcript::TranscriptMessage> {
transcript::parse_transcript(path)
.unwrap_or_default()
.into_iter()
.filter(|m| m.text.len() >= 20)
.collect()
}
/// Format messages into a single text for chunking.
fn format_for_extraction(messages: &[transcript::TranscriptMessage]) -> String {
let cfg = config::get();
messages.iter()
.map(|msg| {
let role = if msg.role == "user" { &cfg.user_name } else { &cfg.assistant_name };
let text = crate::util::truncate(&msg.text, 2800, "\n[...truncated...]");
let ts = if msg.timestamp.len() >= 19 { &msg.timestamp[..19] } else { "" };
if ts.is_empty() {
format!("[{}] {}", role, text)
} else {
format!("[{} {}] {}", role, ts, text)
}
})
.collect::<Vec<_>>()
.join("\n\n")
}
/// Split text into overlapping windows, breaking at paragraph boundaries.
fn chunk_text(text: &str) -> Vec<(usize, &str)> {
let mut chunks = Vec::new();
let mut start = 0;
while start < text.len() {
let mut end = text.floor_char_boundary((start + WINDOW_CHARS).min(text.len()));
// Try to break at a paragraph boundary
if end < text.len() {
if let Some(para) = text[start..end].rfind("\n\n") {
if para > WINDOW_CHARS / 2 {
end = start + para;
}
}
}
chunks.push((start, &text[start..end]));
let next = text.floor_char_boundary(end.saturating_sub(OVERLAP_CHARS));
if next <= start {
start = end;
} else {
start = next;
}
}
chunks
}
/// Parse JSON facts from model response.
fn parse_facts(response: &str) -> Vec<Fact> {
let cleaned = response.trim();
// Strip markdown code block
let cleaned = if cleaned.starts_with("```") {
cleaned.lines()
.filter(|l| !l.starts_with("```"))
.collect::<Vec<_>>()
.join("\n")
} else {
cleaned.to_string()
};
// Find JSON array
let start = cleaned.find('[');
let end = cleaned.rfind(']');
let (Some(start), Some(end)) = (start, end) else { return Vec::new() };
serde_json::from_str(&cleaned[start..=end]).unwrap_or_default()
}
/// Mine a single transcript for atomic facts.
/// The optional `progress` callback receives status strings (e.g. "chunk 3/47").
pub fn mine_transcript(
path: &Path,
dry_run: bool,
progress: Option<&dyn Fn(&str)>,
) -> Result<Vec<Fact>, String> {
let filename = path.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".into());
let log = |msg: &str| {
eprintln!("{}", msg);
if let Some(cb) = progress { cb(msg); }
};
log(&format!("Mining: {}", filename));
let messages = extract_messages(path);
if messages.is_empty() {
log("No messages found");
return Ok(Vec::new());
}
log(&format!("{} messages extracted", messages.len()));
let text = format_for_extraction(&messages);
let chunks = chunk_text(&text);
log(&format!("{} chunks ({} chars)", chunks.len(), text.len()));
if dry_run {
for (i, (offset, chunk)) in chunks.iter().enumerate() {
eprintln!("\n--- Chunk {} (offset {}, {} chars) ---", i + 1, offset, chunk.len());
eprintln!("{}", crate::util::truncate(chunk, 500, ""));
if chunk.len() > 500 {
eprintln!(" ... ({} more chars)", chunk.len() - 500);
}
}
return Ok(Vec::new());
}
let prompt_prefix = extraction_prompt();
let mut all_facts = Vec::new();
for (i, (_offset, chunk)) in chunks.iter().enumerate() {
let status = format!("chunk {}/{} ({} chars)", i + 1, chunks.len(), chunk.len());
eprint!(" {}...", status);
if let Some(cb) = progress { cb(&status); }
let prompt = format!("{}{}\n\n--- END OF EXCERPT ---\n\nReturn ONLY a JSON array of factual claims, or [] if none.", prompt_prefix, chunk);
let response = match llm::call_haiku("fact-mine", &prompt) {
Ok(r) => r,
Err(e) => {
eprintln!(" error: {}", e);
continue;
}
};
let mut facts = parse_facts(&response);
for fact in &mut facts {
fact.source_file = Some(filename.clone());
fact.source_chunk = Some(i + 1);
fact.source_offset = Some(*_offset);
}
eprintln!(" {} facts", facts.len());
all_facts.extend(facts);
}
// Deduplicate by claim text
let mut seen = HashSet::new();
let before = all_facts.len();
all_facts.retain(|f| seen.insert(f.claim.to_lowercase()));
let dupes = before - all_facts.len();
if dupes > 0 {
log(&format!("{} duplicates removed", dupes));
}
log(&format!("Total: {} unique facts", all_facts.len()));
Ok(all_facts)
}
/// Mine a transcript and store facts in the capnp store.
/// Returns the number of facts stored.
/// The optional `progress` callback receives status strings for daemon display.
pub fn mine_and_store(
path: &Path,
progress: Option<&dyn Fn(&str)>,
) -> Result<usize, String> {
let facts = mine_transcript(path, false, progress)?;
let filename = path.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".into());
let proposed_key = format!("_facts-{}", filename.trim_end_matches(".jsonl"));
// Always write a marker so we don't re-queue empty transcripts
let json = if facts.is_empty() {
"[]".to_string()
} else {
serde_json::to_string_pretty(&facts)
.map_err(|e| format!("serialize facts: {}", e))?
};
let mut store = store::Store::load()?;
// Run naming resolution to get a good key (and possibly merge into existing)
let resolution = super::knowledge::resolve_naming(&store, &proposed_key, &json);
let key = match resolution {
super::knowledge::NamingResolution::Create(k) => k,
super::knowledge::NamingResolution::MergeInto(existing_key) => {
// Merge: append facts to existing node's content
eprintln!(" Merging facts into existing node: {}", existing_key);
if let Some(node) = store.nodes.get(existing_key.as_str()) {
let merged = format!("{}\n\n{}", node.content, json);
store.upsert_provenance(&existing_key, &merged, "fact-mine:write")?;
store.save()?;
return Ok(facts.len());
}
// Fallback if existing node disappeared
proposed_key
}
};
store.upsert_provenance(&key, &json, "fact-mine:write")?;
store.save()?;
eprintln!(" Stored {} facts as {}", facts.len(), key);
Ok(facts.len())
}
/// Mine transcripts, returning all facts. Skips files with fewer than min_messages.
pub fn mine_batch(paths: &[&Path], min_messages: usize, dry_run: bool) -> Result<Vec<Fact>, String> {
let mut all_facts = Vec::new();
for path in paths {
let messages = extract_messages(path);
if messages.len() < min_messages {
eprintln!("Skipping {} ({} messages < {})",
path.file_name().map(|n| n.to_string_lossy()).unwrap_or_default(),
messages.len(), min_messages);
continue;
}
let facts = mine_transcript(path, dry_run, None)?;
all_facts.extend(facts);
}
Ok(all_facts)
}

View file

@ -0,0 +1,972 @@
// knowledge.rs — knowledge agent action parsing, depth tracking, and convergence loop
//
// Agent prompts live in agents/*.agent files, dispatched via defs.rs.
// This module handles:
// - Action parsing (WRITE_NODE, LINK, REFINE from LLM output)
// - Inference depth tracking (prevents runaway abstraction)
// - Action application (write to store with provenance)
// - Convergence loop (sequences agents, measures graph stability)
// - Conversation fragment selection (for observation agent)
use crate::graph::Graph;
use super::llm;
use crate::spectral;
use crate::store::{self, Store, new_relation, RelationType};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
// ---------------------------------------------------------------------------
// Action types
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Action {
pub kind: ActionKind,
pub confidence: Confidence,
pub weight: f64,
pub depth: i32,
pub applied: Option<bool>,
pub rejected_reason: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ActionKind {
WriteNode {
key: String,
content: String,
covers: Vec<String>,
},
Link {
source: String,
target: String,
},
Refine {
key: String,
content: String,
},
Demote {
key: String,
},
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Confidence {
High,
Medium,
Low,
}
impl Confidence {
/// Weight for delta metrics — how much this action contributes to change measurement.
fn delta_weight(self) -> f64 {
match self {
Self::High => 1.0,
Self::Medium => 0.6,
Self::Low => 0.3,
}
}
/// Confidence value for depth gating — capped below 1.0 so even "high" must clear thresholds.
fn gate_value(self) -> f64 {
match self {
Self::High => 0.9,
Self::Medium => 0.6,
Self::Low => 0.3,
}
}
fn parse(s: &str) -> Self {
match s.to_lowercase().as_str() {
"high" => Self::High,
"low" => Self::Low,
_ => Self::Medium,
}
}
}
// ---------------------------------------------------------------------------
// Action parsing
// ---------------------------------------------------------------------------
pub fn parse_write_nodes(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?s)WRITE_NODE\s+(\S+)\s*\n(.*?)END_NODE").unwrap();
let conf_re = Regex::new(r"(?i)CONFIDENCE:\s*(high|medium|low)").unwrap();
let covers_re = Regex::new(r"COVERS:\s*(.+)").unwrap();
re.captures_iter(text)
.map(|cap| {
let key = cap[1].to_string();
let mut content = cap[2].trim().to_string();
let confidence = conf_re
.captures(&content)
.map(|c| Confidence::parse(&c[1]))
.unwrap_or(Confidence::Medium);
content = conf_re.replace(&content, "").trim().to_string();
let covers: Vec<String> = covers_re
.captures(&content)
.map(|c| c[1].split(',').map(|s| s.trim().to_string()).collect())
.unwrap_or_default();
content = covers_re.replace(&content, "").trim().to_string();
Action {
weight: confidence.delta_weight(),
kind: ActionKind::WriteNode { key, content, covers },
confidence,
depth: 0,
applied: None,
rejected_reason: None,
}
})
.collect()
}
pub fn parse_links(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?m)^LINK\s+(\S+)\s+(\S+)").unwrap();
re.captures_iter(text)
.map(|cap| Action {
kind: ActionKind::Link {
source: cap[1].to_string(),
target: cap[2].to_string(),
},
confidence: Confidence::Low,
weight: 0.3,
depth: -1,
applied: None,
rejected_reason: None,
})
.collect()
}
pub fn parse_refines(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?s)REFINE\s+(\S+)\s*\n(.*?)END_REFINE").unwrap();
re.captures_iter(text)
.map(|cap| {
let key = cap[1].trim_matches('*').trim().to_string();
Action {
kind: ActionKind::Refine {
key,
content: cap[2].trim().to_string(),
},
confidence: Confidence::Medium,
weight: 0.7,
depth: 0,
applied: None,
rejected_reason: None,
}
})
.collect()
}
pub fn parse_demotes(text: &str) -> Vec<Action> {
let re = Regex::new(r"(?m)^DEMOTE\s+(\S+)").unwrap();
re.captures_iter(text)
.map(|cap| Action {
kind: ActionKind::Demote {
key: cap[1].to_string(),
},
confidence: Confidence::Medium,
weight: 0.5,
depth: -1,
applied: None,
rejected_reason: None,
})
.collect()
}
pub fn parse_all_actions(text: &str) -> Vec<Action> {
let mut actions = parse_write_nodes(text);
actions.extend(parse_links(text));
actions.extend(parse_refines(text));
actions.extend(parse_demotes(text));
actions
}
pub fn count_no_ops(text: &str) -> usize {
let no_conn = Regex::new(r"\bNO_CONNECTION\b").unwrap().find_iter(text).count();
let affirm = Regex::new(r"\bAFFIRM\b").unwrap().find_iter(text).count();
let no_extract = Regex::new(r"\bNO_EXTRACTION\b").unwrap().find_iter(text).count();
no_conn + affirm + no_extract
}
// ---------------------------------------------------------------------------
// Inference depth tracking
// ---------------------------------------------------------------------------
const DEPTH_DB_KEY: &str = "_knowledge-depths";
#[derive(Default)]
pub struct DepthDb {
depths: HashMap<String, i32>,
}
impl DepthDb {
pub fn load(store: &Store) -> Self {
let depths = store.nodes.get(DEPTH_DB_KEY)
.and_then(|n| serde_json::from_str(&n.content).ok())
.unwrap_or_default();
Self { depths }
}
pub fn save(&self, store: &mut Store) {
if let Ok(json) = serde_json::to_string(&self.depths) {
store.upsert_provenance(DEPTH_DB_KEY, &json,
"observation:write").ok();
}
}
pub fn get(&self, key: &str) -> i32 {
self.depths.get(key).copied().unwrap_or(0)
}
pub fn set(&mut self, key: String, depth: i32) {
self.depths.insert(key, depth);
}
}
/// Agent base depths: observation=1, extractor=2, connector=3
fn agent_base_depth(agent: &str) -> Option<i32> {
match agent {
"observation" => Some(1),
"extractor" => Some(2),
"generalize" => Some(3),
"connector" => Some(3),
"challenger" => None,
_ => Some(2),
}
}
pub fn compute_action_depth(db: &DepthDb, action: &Action, agent: &str) -> i32 {
match &action.kind {
ActionKind::Link { .. } | ActionKind::Demote { .. } => -1,
ActionKind::Refine { key, .. } => db.get(key),
ActionKind::WriteNode { covers, .. } => {
if !covers.is_empty() {
covers.iter().map(|k| db.get(k)).max().unwrap_or(0) + 1
} else {
agent_base_depth(agent).unwrap_or(2)
}
}
}
}
/// Confidence threshold that scales with inference depth.
pub fn required_confidence(depth: i32, base: f64) -> f64 {
if depth <= 0 {
return 0.0;
}
1.0 - (1.0 - base).powi(depth)
}
/// Confidence bonus from real-world use.
pub fn use_bonus(use_count: u32) -> f64 {
if use_count == 0 {
return 0.0;
}
1.0 - 1.0 / (1.0 + 0.15 * use_count as f64)
}
// ---------------------------------------------------------------------------
// Action application
// ---------------------------------------------------------------------------
fn stamp_content(content: &str, agent: &str, timestamp: &str, depth: i32) -> String {
format!("<!-- author: {} | created: {} | depth: {} -->\n{}", agent, timestamp, depth, content)
}
/// Check if a link already exists between two keys.
fn has_edge(store: &Store, source: &str, target: &str) -> bool {
store.relations.iter().any(|r| {
!r.deleted
&& ((r.source_key == source && r.target_key == target)
|| (r.source_key == target && r.target_key == source))
})
}
pub fn apply_action(
store: &mut Store,
action: &Action,
agent: &str,
timestamp: &str,
depth: i32,
) -> bool {
match &action.kind {
ActionKind::WriteNode { key, content, .. } => {
let stamped = stamp_content(content, agent, timestamp, depth);
let prov = format!("{}:write", agent);
store.upsert_provenance(key, &stamped, &prov).is_ok()
}
ActionKind::Link { source, target } => {
if has_edge(store, source, target) {
return false;
}
let source_uuid = match store.nodes.get(source.as_str()) {
Some(n) => n.uuid,
None => return false,
};
let target_uuid = match store.nodes.get(target.as_str()) {
Some(n) => n.uuid,
None => return false,
};
let mut rel = new_relation(
source_uuid, target_uuid,
RelationType::Link,
0.3,
source, target,
);
rel.provenance = format!("{}:link", agent);
store.add_relation(rel).is_ok()
}
ActionKind::Refine { key, content } => {
let stamped = stamp_content(content, agent, timestamp, depth);
let prov = format!("{}:refine", agent);
store.upsert_provenance(key, &stamped, &prov).is_ok()
}
ActionKind::Demote { key } => {
if let Some(node) = store.nodes.get_mut(key) {
node.provenance = format!("{}:demote", agent);
node.weight = (node.weight * 0.5).max(0.05);
true
} else {
false
}
}
}
}
fn agent_provenance(agent: &str) -> String {
match agent {
"observation" => "agent:knowledge-observation".to_string(),
"extractor" | "pattern" => "agent:knowledge-pattern".to_string(),
"generalize" => "agent:knowledge-generalize".to_string(),
"connector" => "agent:knowledge-connector".to_string(),
"challenger" => "agent:knowledge-challenger".to_string(),
_ => format!("agent:{}", agent),
}
}
// ---------------------------------------------------------------------------
// Naming resolution — called before creating any new node
// ---------------------------------------------------------------------------
/// Resolution from the naming agent.
#[derive(Debug)]
pub enum NamingResolution {
/// Create with the proposed key (or a better one).
Create(String),
/// Merge content into an existing node instead.
MergeInto(String),
}
/// Find existing nodes that might conflict with a proposed new node.
/// Returns up to `limit` (key, content_preview) pairs.
fn find_conflicts(
store: &Store,
proposed_key: &str,
proposed_content: &str,
limit: usize,
) -> Vec<(String, String)> {
use std::collections::BTreeMap;
// Extract search terms from the key (split on separators) and first ~200 chars of content
let mut terms: BTreeMap<String, f64> = BTreeMap::new();
for part in proposed_key.split(|c: char| c == '-' || c == '_' || c == '#' || c == '.') {
let p = part.to_lowercase();
if p.len() >= 3 {
terms.insert(p, 1.0);
}
}
// Add a few content terms
let content_terms = crate::search::extract_query_terms(proposed_content, 5);
for term in content_terms.split_whitespace() {
terms.entry(term.to_string()).or_insert(0.5);
}
if terms.is_empty() {
return Vec::new();
}
// Use component matching to find related nodes
let (seeds, _) = crate::search::match_seeds_opts(&terms, store, true, false);
let mut results: Vec<(String, f64)> = seeds.into_iter()
.filter(|(k, _)| k != proposed_key)
.collect();
results.sort_by(|a, b| b.1.total_cmp(&a.1));
results.into_iter()
.take(limit)
.filter_map(|(key, _)| {
let node = store.nodes.get(key.as_str())?;
let preview: String = node.content.chars().take(200).collect();
Some((key, preview))
})
.collect()
}
/// Format the naming prompt for a proposed node.
fn format_naming_prompt(
proposed_key: &str,
proposed_content: &str,
conflicts: &[(String, String)],
) -> String {
let conflict_section = if conflicts.is_empty() {
"(no existing nodes found with overlapping content)".to_string()
} else {
conflicts.iter()
.map(|(key, preview)| format!("### `{}`\n\n{}", key, preview))
.collect::<Vec<_>>()
.join("\n\n")
};
// Truncate content for the prompt (don't send huge nodes to Haiku)
let content_preview: String = proposed_content.chars().take(1000).collect();
format!(
"# Naming Agent — Node Key Resolution\n\n\
You are given a proposed new node (key + content) and a list of existing\n\
nodes that might overlap with it. Decide what to do:\n\n\
1. **CREATE** the proposed key is good and there's no meaningful overlap.\n\
2. **RENAME** the content is unique but the key is bad (UUID, truncated, generic).\n\
3. **MERGE_INTO** an existing node already covers this content.\n\n\
Good keys: 2-5 words in kebab-case, optionally with `#` subtopic.\n\
Bad keys: UUIDs, single generic words, truncated auto-slugs.\n\n\
Respond with exactly ONE line: `CREATE key`, `RENAME better_key`, or `MERGE_INTO existing_key`.\n\n\
## Proposed node\n\n\
Key: `{}`\n\n\
Content:\n```\n{}\n```\n\n\
## Existing nodes that might overlap\n\n\
{}",
proposed_key, content_preview, conflict_section,
)
}
/// Parse naming agent response.
fn parse_naming_response(response: &str) -> Option<NamingResolution> {
for line in response.lines() {
// Strip backticks — Haiku sometimes wraps the response line in them
let trimmed = line.trim().trim_matches('`').trim();
if let Some(key) = trimmed.strip_prefix("CREATE ") {
return Some(NamingResolution::Create(key.trim().trim_matches('`').to_string()));
}
if let Some(key) = trimmed.strip_prefix("RENAME ") {
return Some(NamingResolution::Create(key.trim().trim_matches('`').to_string()));
}
if let Some(key) = trimmed.strip_prefix("MERGE_INTO ") {
return Some(NamingResolution::MergeInto(key.trim().trim_matches('`').to_string()));
}
}
None
}
/// Resolve naming for a proposed WriteNode action.
///
/// Searches for conflicts, calls the naming LLM (Haiku), and returns
/// either a Create (possibly with a better key) or MergeInto resolution.
/// On LLM failure, falls through to using the proposed key as-is.
pub fn resolve_naming(
store: &Store,
proposed_key: &str,
proposed_content: &str,
) -> NamingResolution {
let conflicts = find_conflicts(store, proposed_key, proposed_content, 5);
let prompt = format_naming_prompt(proposed_key, proposed_content, &conflicts);
match llm::call_model("naming", "sonnet", &prompt) {
Ok(response) => {
match parse_naming_response(&response) {
Some(resolution) => resolution,
None => {
eprintln!("naming: unparseable response, using proposed key");
NamingResolution::Create(proposed_key.to_string())
}
}
}
Err(e) => {
eprintln!("naming: LLM error ({}), using proposed key", e);
NamingResolution::Create(proposed_key.to_string())
}
}
}
// ---------------------------------------------------------------------------
// Shared agent execution
// ---------------------------------------------------------------------------
/// Result of running a single agent through the common pipeline.
pub struct AgentResult {
pub output: String,
pub actions: Vec<Action>,
pub no_ops: usize,
pub node_keys: Vec<String>,
}
/// Resolve naming for all WriteNode actions in a list.
///
/// For each WriteNode, calls the naming agent to check for conflicts and
/// get a good key. May convert WriteNode → Refine (if MERGE_INTO) or
/// update the key (if RENAME/CREATE with different key).
pub fn resolve_action_names(store: &Store, actions: Vec<Action>) -> Vec<Action> {
actions.into_iter().map(|action| {
match &action.kind {
ActionKind::WriteNode { key, content, covers } => {
match resolve_naming(store, key, content) {
NamingResolution::Create(new_key) => {
if new_key == *key {
action // keep as-is
} else {
eprintln!("naming: {}{}", key, new_key);
Action {
kind: ActionKind::WriteNode {
key: new_key,
content: content.clone(),
covers: covers.clone(),
},
..action
}
}
}
NamingResolution::MergeInto(existing_key) => {
eprintln!("naming: {} → MERGE_INTO {}", key, existing_key);
Action {
kind: ActionKind::Refine {
key: existing_key,
content: content.clone(),
},
..action
}
}
}
}
_ => action,
}
}).collect()
}
/// Run a single agent and apply its actions (no depth tracking).
///
/// Returns (total_actions, applied_count) or an error.
pub fn run_and_apply(
store: &mut Store,
agent_name: &str,
batch_size: usize,
llm_tag: &str,
) -> Result<(usize, usize), String> {
let result = run_one_agent(store, agent_name, batch_size, llm_tag)?;
let actions = resolve_action_names(store, result.actions);
let ts = store::compact_timestamp();
let mut applied = 0;
for action in &actions {
if apply_action(store, action, agent_name, &ts, 0) {
applied += 1;
}
}
Ok((actions.len(), applied))
}
/// Run a single agent: build prompt → call LLM → store output → parse actions → record visits.
///
/// This is the common pipeline shared by the knowledge loop, consolidation pipeline,
/// and daemon. Callers handle action application (with or without depth tracking).
pub fn run_one_agent(
store: &mut Store,
agent_name: &str,
batch_size: usize,
llm_tag: &str,
) -> Result<AgentResult, String> {
let def = super::defs::get_def(agent_name)
.ok_or_else(|| format!("no .agent file for {}", agent_name))?;
let agent_batch = super::defs::run_agent(store, &def, batch_size)?;
let output = llm::call_model(llm_tag, &def.model, &agent_batch.prompt)?;
// Store raw output for audit trail
let ts = store::compact_timestamp();
let report_key = format!("_{}-{}-{}", llm_tag, agent_name, ts);
let provenance = agent_provenance(agent_name);
store.upsert_provenance(&report_key, &output, &provenance).ok();
let actions = parse_all_actions(&output);
let no_ops = count_no_ops(&output);
// Record visits for processed nodes
if !agent_batch.node_keys.is_empty() {
store.record_agent_visits(&agent_batch.node_keys, agent_name).ok();
}
Ok(AgentResult {
output,
actions,
no_ops,
node_keys: agent_batch.node_keys,
})
}
// ---------------------------------------------------------------------------
// Conversation fragment selection
// ---------------------------------------------------------------------------
/// Extract human-readable dialogue from a conversation JSONL
fn extract_conversation_text(path: &Path, max_chars: usize) -> String {
let cfg = crate::config::get();
let messages = super::transcript::parse_transcript(path).unwrap_or_default();
let mut fragments = Vec::new();
let mut total = 0;
for msg in &messages {
let min_len = if msg.role == "user" { 5 } else { 10 };
if msg.text.len() <= min_len { continue; }
// Only include external user messages
if msg.role == "user" {
if msg.user_type.as_deref() != Some("external") { continue; }
if msg.text.starts_with("[Request interrupted") { continue; }
}
let role = if msg.role == "user" { &cfg.user_name } else { &cfg.assistant_name };
fragments.push(format!("**{}:** {}", role, msg.text));
total += msg.text.len();
if total > max_chars { break; }
}
fragments.join("\n\n")
}
/// Count short user messages (dialogue turns) in a JSONL
fn count_dialogue_turns(path: &Path) -> usize {
let messages = super::transcript::parse_transcript(path).unwrap_or_default();
messages.iter()
.filter(|m| m.role == "user"
&& m.user_type.as_deref() == Some("external")
&& m.text.len() > 5
&& m.text.len() < 500
&& !m.text.starts_with("[Request interrupted")
&& !m.text.starts_with("Implement the following"))
.count()
}
/// Select conversation fragments for the observation extractor
pub fn select_conversation_fragments(n: usize) -> Vec<(String, String)> {
let projects = crate::config::get().projects_dir.clone();
if !projects.exists() { return Vec::new(); }
let mut jsonl_files: Vec<PathBuf> = Vec::new();
if let Ok(dirs) = fs::read_dir(&projects) {
for dir in dirs.filter_map(|e| e.ok()) {
if !dir.path().is_dir() { continue; }
if let Ok(files) = fs::read_dir(dir.path()) {
for f in files.filter_map(|e| e.ok()) {
let p = f.path();
if p.extension().map(|x| x == "jsonl").unwrap_or(false) {
if let Ok(meta) = p.metadata() {
if meta.len() > 50_000 {
jsonl_files.push(p);
}
}
}
}
}
}
}
let mut scored: Vec<(usize, PathBuf)> = jsonl_files.into_iter()
.map(|f| (count_dialogue_turns(&f), f))
.filter(|(turns, _)| *turns >= 10)
.collect();
scored.sort_by(|a, b| b.0.cmp(&a.0));
let mut fragments = Vec::new();
for (_, f) in scored.iter().take(n * 2) {
let session_id = f.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".into());
let text = extract_conversation_text(f, 8000);
if text.len() > 500 {
fragments.push((session_id, text));
}
if fragments.len() >= n { break; }
}
fragments
}
// ---------------------------------------------------------------------------
// Convergence metrics
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CycleResult {
pub cycle: usize,
pub timestamp: String,
pub total_actions: usize,
pub total_applied: usize,
pub total_no_ops: usize,
pub depth_rejected: usize,
pub weighted_delta: f64,
pub graph_metrics_before: GraphMetrics,
pub graph_metrics_after: GraphMetrics,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GraphMetrics {
pub nodes: usize,
pub edges: usize,
pub cc: f64,
pub sigma: f64,
pub communities: usize,
}
impl GraphMetrics {
pub fn from_graph(store: &Store, graph: &Graph) -> Self {
Self {
nodes: store.nodes.len(),
edges: graph.edge_count(),
cc: graph.avg_clustering_coefficient() as f64,
sigma: graph.small_world_sigma() as f64,
communities: graph.community_count(),
}
}
}
fn metric_stability(history: &[CycleResult], key: &str, window: usize) -> f64 {
if history.len() < window { return f64::INFINITY; }
let values: Vec<f64> = history[history.len() - window..].iter()
.map(|h| match key {
"sigma" => h.graph_metrics_after.sigma,
"cc" => h.graph_metrics_after.cc,
"communities" => h.graph_metrics_after.communities as f64,
_ => 0.0,
})
.collect();
if values.len() < 2 { return f64::INFINITY; }
let mean = values.iter().sum::<f64>() / values.len() as f64;
if mean == 0.0 { return 0.0; }
let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / values.len() as f64;
variance.sqrt() / mean.abs()
}
pub fn check_convergence(history: &[CycleResult], window: usize) -> bool {
if history.len() < window { return false; }
let sigma_cv = metric_stability(history, "sigma", window);
let cc_cv = metric_stability(history, "cc", window);
let comm_cv = metric_stability(history, "communities", window);
let recent = &history[history.len() - window..];
let avg_delta = recent.iter().map(|r| r.weighted_delta).sum::<f64>() / recent.len() as f64;
eprintln!("\n Convergence check (last {} cycles):", window);
eprintln!(" sigma CV: {:.4} (< 0.05?)", sigma_cv);
eprintln!(" CC CV: {:.4} (< 0.05?)", cc_cv);
eprintln!(" community CV: {:.4} (< 0.10?)", comm_cv);
eprintln!(" avg delta: {:.2} (< 1.00?)", avg_delta);
let structural = sigma_cv < 0.05 && cc_cv < 0.05 && comm_cv < 0.10;
let behavioral = avg_delta < 1.0;
if structural && behavioral {
eprintln!(" → CONVERGED");
true
} else {
false
}
}
// ---------------------------------------------------------------------------
// The knowledge loop
// ---------------------------------------------------------------------------
pub struct KnowledgeLoopConfig {
pub max_cycles: usize,
pub batch_size: usize,
pub window: usize,
pub max_depth: i32,
pub confidence_base: f64,
}
impl Default for KnowledgeLoopConfig {
fn default() -> Self {
Self {
max_cycles: 20,
batch_size: 5,
window: 5,
max_depth: 4,
confidence_base: 0.3,
}
}
}
pub fn run_knowledge_loop(config: &KnowledgeLoopConfig) -> Result<Vec<CycleResult>, String> {
let mut store = Store::load()?;
let mut depth_db = DepthDb::load(&store);
let mut history = Vec::new();
eprintln!("Knowledge Loop — fixed-point iteration");
eprintln!(" max_cycles={} batch_size={}", config.max_cycles, config.batch_size);
eprintln!(" window={} max_depth={}", config.window, config.max_depth);
for cycle in 1..=config.max_cycles {
let result = run_cycle(cycle, config, &mut depth_db)?;
history.push(result);
if check_convergence(&history, config.window) {
eprintln!("\n CONVERGED after {} cycles", cycle);
break;
}
}
// Save loop summary as a store node
if let Some(first) = history.first() {
let key = format!("_knowledge-loop-{}", first.timestamp);
if let Ok(json) = serde_json::to_string_pretty(&history) {
store = Store::load()?;
store.upsert_provenance(&key, &json,
"observation:write").ok();
depth_db.save(&mut store);
store.save()?;
}
}
Ok(history)
}
fn run_cycle(
cycle_num: usize,
config: &KnowledgeLoopConfig,
depth_db: &mut DepthDb,
) -> Result<CycleResult, String> {
let timestamp = store::compact_timestamp();
eprintln!("\n{}", "=".repeat(60));
eprintln!("CYCLE {}{}", cycle_num, timestamp);
eprintln!("{}", "=".repeat(60));
let mut store = Store::load()?;
let graph = store.build_graph();
let metrics_before = GraphMetrics::from_graph(&store, &graph);
eprintln!(" Before: nodes={} edges={} cc={:.3} sigma={:.3}",
metrics_before.nodes, metrics_before.edges, metrics_before.cc, metrics_before.sigma);
let mut all_actions = Vec::new();
let mut all_no_ops = 0;
let mut depth_rejected = 0;
let mut total_applied = 0;
// Run each agent via .agent file dispatch
let agent_names = ["observation", "extractor", "generalize", "connector", "challenger"];
for agent_name in &agent_names {
eprintln!("\n --- {} (n={}) ---", agent_name, config.batch_size);
let result = match run_one_agent(&mut store, agent_name, config.batch_size, "knowledge") {
Ok(r) => r,
Err(e) => {
eprintln!(" ERROR: {}", e);
continue;
}
};
let mut actions = result.actions;
all_no_ops += result.no_ops;
eprintln!(" Actions: {} No-ops: {}", actions.len(), result.no_ops);
let mut applied = 0;
for action in &mut actions {
let depth = compute_action_depth(depth_db, action, agent_name);
action.depth = depth;
match &action.kind {
ActionKind::WriteNode { key, covers, .. } => {
let conf_val = action.confidence.gate_value();
let req = required_confidence(depth, config.confidence_base);
let source_uses: Vec<u32> = covers.iter()
.filter_map(|k| store.nodes.get(k).map(|n| n.uses))
.collect();
let avg_uses = if source_uses.is_empty() { 0 }
else { source_uses.iter().sum::<u32>() / source_uses.len() as u32 };
let eff_conf = (conf_val + use_bonus(avg_uses)).min(1.0);
if eff_conf < req {
action.applied = Some(false);
action.rejected_reason = Some("depth_threshold".into());
depth_rejected += 1;
continue;
}
if depth > config.max_depth {
action.applied = Some(false);
action.rejected_reason = Some("max_depth".into());
depth_rejected += 1;
continue;
}
eprintln!(" WRITE {} depth={} conf={:.2} eff={:.2} req={:.2}",
key, depth, conf_val, eff_conf, req);
}
ActionKind::Link { source, target } => {
eprintln!(" LINK {}{}", source, target);
}
ActionKind::Refine { key, .. } => {
eprintln!(" REFINE {} depth={}", key, depth);
}
ActionKind::Demote { key } => {
eprintln!(" DEMOTE {}", key);
}
}
if apply_action(&mut store, action, agent_name, &timestamp, depth) {
applied += 1;
action.applied = Some(true);
if let ActionKind::WriteNode { key, .. } | ActionKind::Refine { key, .. } = &action.kind {
depth_db.set(key.clone(), depth);
}
} else {
action.applied = Some(false);
}
}
eprintln!(" Applied: {}/{}", applied, actions.len());
total_applied += applied;
all_actions.extend(actions);
}
depth_db.save(&mut store);
// Recompute spectral if anything changed
if total_applied > 0 {
eprintln!("\n Recomputing spectral embedding...");
let graph = store.build_graph();
let result = spectral::decompose(&graph, 8);
let emb = spectral::to_embedding(&result);
spectral::save_embedding(&emb).ok();
}
let graph = store.build_graph();
let metrics_after = GraphMetrics::from_graph(&store, &graph);
let weighted_delta: f64 = all_actions.iter()
.filter(|a| a.applied == Some(true))
.map(|a| a.weight)
.sum();
eprintln!("\n CYCLE {} SUMMARY", cycle_num);
eprintln!(" Applied: {}/{} depth-rejected: {} no-ops: {}",
total_applied, all_actions.len(), depth_rejected, all_no_ops);
eprintln!(" Weighted delta: {:.2}", weighted_delta);
Ok(CycleResult {
cycle: cycle_num,
timestamp,
total_actions: all_actions.len(),
total_applied,
total_no_ops: all_no_ops,
depth_rejected,
weighted_delta,
graph_metrics_before: metrics_before,
graph_metrics_after: metrics_after,
})
}

View file

@ -0,0 +1,190 @@
// LLM utilities: model invocation and response parsing
//
// Calls claude CLI as a subprocess. Uses prctl(PR_SET_PDEATHSIG)
// so child processes die when the daemon exits, preventing orphans.
use crate::store::Store;
use regex::Regex;
use std::fs;
use std::os::unix::process::CommandExt;
use std::process::Command;
fn log_usage(agent: &str, model: &str, prompt: &str, response: &str,
duration_ms: u128, ok: bool) {
let dir = crate::config::get().data_dir.join("llm-logs").join(agent);
let _ = fs::create_dir_all(&dir);
let date = chrono::Local::now().format("%Y-%m-%d");
let path = dir.join(format!("{}.md", date));
let ts = chrono::Local::now().format("%H:%M:%S");
let status = if ok { "ok" } else { "ERROR" };
let entry = format!(
"\n## {} — {} ({}, {:.1}s, {})\n\n\
### Prompt ({} chars)\n\n\
```\n{}\n```\n\n\
### Response ({} chars)\n\n\
```\n{}\n```\n\n---\n",
ts, agent, model, duration_ms as f64 / 1000.0, status,
prompt.len(), prompt,
response.len(), response,
);
use std::io::Write;
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(&path) {
let _ = f.write_all(entry.as_bytes());
}
}
/// Maximum time to wait for a claude subprocess before killing it.
const SUBPROCESS_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300); // 5 minutes
/// Call a model via claude CLI. Returns the response text.
///
/// Sets PR_SET_PDEATHSIG on the child so it gets SIGTERM if the
/// parent daemon exits — no more orphaned claude processes.
/// Times out after 5 minutes to prevent blocking the daemon forever.
pub(crate) fn call_model(agent: &str, model: &str, prompt: &str) -> Result<String, String> {
// Write prompt to temp file (claude CLI needs file input for large prompts)
let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt",
std::process::id(), std::thread::current().id()));
fs::write(&tmp, prompt)
.map_err(|e| format!("write temp prompt: {}", e))?;
let mut cmd = Command::new("claude");
cmd.args(["-p", "--model", model, "--tools", "", "--no-session-persistence",
"--strict-mcp-config"])
.stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.env_remove("CLAUDECODE");
// Use separate OAuth credentials for agent work if configured
if let Some(ref dir) = crate::config::get().agent_config_dir {
cmd.env("CLAUDE_CONFIG_DIR", dir);
}
// Tell hooks this is a daemon agent call, not interactive
cmd.env("POC_AGENT", "1");
let start = std::time::Instant::now();
let mut child = unsafe {
cmd.pre_exec(|| {
libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM);
Ok(())
})
.spawn()
.map_err(|e| format!("spawn claude: {}", e))?
};
// Spawn a watchdog thread that kills the child after the timeout.
// Uses a cancellation flag so the thread exits promptly when the child finishes.
let child_id = child.id();
let cancel = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
let cancel_flag = cancel.clone();
let watchdog = std::thread::spawn(move || {
// Sleep in 1s increments so we can check the cancel flag
let deadline = std::time::Instant::now() + SUBPROCESS_TIMEOUT;
while std::time::Instant::now() < deadline {
if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
return;
}
std::thread::sleep(std::time::Duration::from_secs(1));
}
if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
return;
}
// Send SIGTERM, then SIGKILL after 5s grace period
unsafe { libc::kill(child_id as i32, libc::SIGTERM); }
for _ in 0..5 {
std::thread::sleep(std::time::Duration::from_secs(1));
if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
return;
}
}
unsafe { libc::kill(child_id as i32, libc::SIGKILL); }
});
let result = child.wait_with_output();
// Cancel the watchdog thread
cancel.store(true, std::sync::atomic::Ordering::Relaxed);
watchdog.join().ok();
fs::remove_file(&tmp).ok();
match result {
Ok(output) => {
let elapsed = start.elapsed().as_millis();
if elapsed > SUBPROCESS_TIMEOUT.as_millis() - 1000 {
log_usage(agent, model, prompt, "TIMEOUT", elapsed, false);
return Err(format!("claude timed out after {:.0}s", elapsed as f64 / 1000.0));
}
if output.status.success() {
let response = String::from_utf8_lossy(&output.stdout).trim().to_string();
log_usage(agent, model, prompt, &response, elapsed, true);
Ok(response)
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
let preview = crate::util::first_n_chars(&stderr, 500);
log_usage(agent, model, prompt, &preview, elapsed, false);
Err(format!("claude exited {}: {}", output.status, preview.trim()))
}
}
Err(e) => Err(format!("wait claude: {}", e)),
}
}
/// Call Sonnet via claude CLI.
pub(crate) fn call_sonnet(agent: &str, prompt: &str) -> Result<String, String> {
call_model(agent, "sonnet", prompt)
}
/// Call Haiku via claude CLI (cheaper, faster — good for high-volume extraction).
pub(crate) fn call_haiku(agent: &str, prompt: &str) -> Result<String, String> {
call_model(agent, "haiku", prompt)
}
/// Parse a JSON response, handling markdown fences.
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
let cleaned = response.trim();
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);
let cleaned = cleaned.strip_prefix("```").unwrap_or(cleaned);
let cleaned = cleaned.strip_suffix("```").unwrap_or(cleaned);
let cleaned = cleaned.trim();
if let Ok(v) = serde_json::from_str(cleaned) {
return Ok(v);
}
// Try to find JSON object or array
let re_obj = Regex::new(r"\{[\s\S]*\}").unwrap();
let re_arr = Regex::new(r"\[[\s\S]*\]").unwrap();
if let Some(m) = re_obj.find(cleaned) {
if let Ok(v) = serde_json::from_str(m.as_str()) {
return Ok(v);
}
}
if let Some(m) = re_arr.find(cleaned) {
if let Ok(v) = serde_json::from_str(m.as_str()) {
return Ok(v);
}
}
let preview = crate::util::first_n_chars(cleaned, 200);
Err(format!("no valid JSON in response: {preview}..."))
}
/// Get all keys for prompt context.
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
let mut keys: Vec<String> = store.nodes.keys()
.cloned()
.collect();
keys.sort();
keys.truncate(200);
keys
}

View file

@ -6,21 +6,23 @@
// //
// llm — model invocation, response parsing // llm — model invocation, response parsing
// prompts — prompt generation from store data // prompts — prompt generation from store data
// defs — agent file loading and placeholder resolution
// audit — link quality review via Sonnet // audit — link quality review via Sonnet
// consolidate — full consolidation pipeline // consolidate — full consolidation pipeline
// knowledge — agent execution, conversation fragment selection // knowledge — knowledge production agents + convergence loop
// enrich — journal enrichment, experience mining // enrich — journal enrichment, experience mining
// fact_mine — fact extraction from transcripts
// digest — episodic digest generation (daily/weekly/monthly) // digest — episodic digest generation (daily/weekly/monthly)
// daemon — background job scheduler // daemon — background job scheduler
// transcript — shared JSONL transcript parsing // transcript — shared JSONL transcript parsing
//
// The session hook (context injection, agent orchestration) moved to claude/hook.
pub mod transcript;
pub mod llm;
pub mod prompts;
pub mod defs;
pub mod audit; pub mod audit;
pub mod consolidate; pub mod consolidate;
pub mod daemon; pub mod knowledge;
pub mod defs; pub mod enrich;
pub mod fact_mine;
pub mod digest; pub mod digest;
pub mod learn; pub mod daemon;
pub mod prompts;

View file

@ -3,6 +3,7 @@
use crate::store::Store; use crate::store::Store;
use crate::graph::Graph; use crate::graph::Graph;
use crate::similarity;
use crate::neuro::{ use crate::neuro::{
ReplayItem, ReplayItem,
@ -12,18 +13,23 @@ use crate::neuro::{
/// Result of building an agent prompt — includes both the prompt text /// Result of building an agent prompt — includes both the prompt text
/// and the keys of nodes selected for processing, so the caller can /// and the keys of nodes selected for processing, so the caller can
/// record visits after successful completion. /// record visits after successful completion.
/// A resolved step ready for execution.
pub struct ResolvedStep {
pub prompt: String,
pub phase: String,
}
pub struct AgentBatch { pub struct AgentBatch {
pub steps: Vec<ResolvedStep>, pub prompt: String,
pub node_keys: Vec<String>, pub node_keys: Vec<String>,
} }
pub(super) fn format_topology_header(graph: &Graph) -> String { /// Load a prompt template, replacing {{PLACEHOLDER}} with data
pub fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
let path = crate::config::get().prompts_dir.join(format!("{}.md", name));
let mut content = std::fs::read_to_string(&path)
.map_err(|e| format!("load prompt {}: {}", path.display(), e))?;
for (placeholder, data) in replacements {
content = content.replace(placeholder, data);
}
Ok(content)
}
pub fn format_topology_header(graph: &Graph) -> String {
let sigma = graph.small_world_sigma(); let sigma = graph.small_world_sigma();
let alpha = graph.degree_power_law_exponent(); let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini(); let gini = graph.degree_gini();
@ -66,7 +72,7 @@ pub(super) fn format_topology_header(graph: &Graph) -> String {
n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list) n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list)
} }
pub(super) fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String { pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String {
let hub_thresh = graph.hub_threshold(); let hub_thresh = graph.hub_threshold();
let mut out = String::new(); let mut out = String::new();
for item in items { for item in items {
@ -113,9 +119,15 @@ pub(super) fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &
out.push_str(&format!("Search hits: {} ← actively found by search, prefer to keep\n", hits)); out.push_str(&format!("Search hits: {} ← actively found by search, prefer to keep\n", hits));
} }
// Full content — the agent needs to see everything to do quality work // Content (truncated for large nodes)
let content = &node.content; let content = &node.content;
out.push_str(&format!("\nContent:\n{}\n\n", content)); if content.len() > 1500 {
let truncated = crate::util::truncate(content, 1500, "\n[...]");
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n\n",
content.len(), truncated));
} else {
out.push_str(&format!("\nContent:\n{}\n\n", content));
}
// Neighbors // Neighbors
let neighbors = graph.neighbors(&item.key); let neighbors = graph.neighbors(&item.key);
@ -134,12 +146,38 @@ pub(super) fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &
} }
} }
// Suggested link targets: text-similar semantic nodes not already neighbors
let neighbor_keys: std::collections::HashSet<&str> = neighbors.iter()
.map(|(k, _)| k.as_str()).collect();
let mut candidates: Vec<(&str, f32)> = store.nodes.iter()
.filter(|(k, _)| {
*k != &item.key
&& !neighbor_keys.contains(k.as_str())
})
.map(|(k, n)| {
let sim = similarity::cosine_similarity(content, &n.content);
(k.as_str(), sim)
})
.filter(|(_, sim)| *sim > 0.1)
.collect();
candidates.sort_by(|a, b| b.1.total_cmp(&a.1));
candidates.truncate(8);
if !candidates.is_empty() {
out.push_str("\nSuggested link targets (by text similarity, not yet linked):\n");
for (k, sim) in &candidates {
let is_hub = graph.degree(k) >= hub_thresh;
out.push_str(&format!(" - {} (sim={:.3}{})\n",
k, sim, if is_hub { ", HUB" } else { "" }));
}
}
out.push_str("\n---\n\n"); out.push_str("\n---\n\n");
} }
out out
} }
pub(super) fn format_health_section(store: &Store, graph: &Graph) -> String { pub fn format_health_section(store: &Store, graph: &Graph) -> String {
use crate::graph; use crate::graph;
let health = graph::health_report(graph, store); let health = graph::health_report(graph, store);
@ -195,7 +233,7 @@ pub(super) fn format_health_section(store: &Store, graph: &Graph) -> String {
out out
} }
pub(super) fn format_pairs_section( pub fn format_pairs_section(
pairs: &[(String, String, f32)], pairs: &[(String, String, f32)],
store: &Store, store: &Store,
graph: &Graph, graph: &Graph,
@ -230,12 +268,12 @@ pub(super) fn format_pairs_section(
out out
} }
pub(super) fn format_rename_candidates(store: &Store, count: usize) -> (Vec<String>, String) { pub fn format_rename_candidates(store: &Store, count: usize) -> (Vec<String>, String) {
let mut candidates: Vec<(&str, &crate::store::Node)> = store.nodes.iter() let mut candidates: Vec<(&str, &crate::store::Node)> = store.nodes.iter()
.filter(|(key, node)| { .filter(|(key, _)| {
if key.starts_with("_facts-") { return true; } if key.starts_with("_facts-") { return true; }
if key.len() < 60 { return false; } if key.len() < 60 { return false; }
if node.node_type == crate::store::NodeType::EpisodicSession { return true; } if key.starts_with("journal#j-") { return true; }
if key.starts_with("_mined-transcripts#f-") { return true; } if key.starts_with("_mined-transcripts#f-") { return true; }
false false
}) })
@ -260,9 +298,9 @@ pub(super) fn format_rename_candidates(store: &Store, count: usize) -> (Vec<Stri
let mut out = String::new(); let mut out = String::new();
out.push_str(&format!("## Nodes to rename ({} of {} candidates)\n\n", out.push_str(&format!("## Nodes to rename ({} of {} candidates)\n\n",
candidates.len(), candidates.len(),
store.nodes.iter().filter(|(k, n)| k.starts_with("_facts-") || store.nodes.keys().filter(|k| k.starts_with("_facts-") ||
(k.len() >= 60 && (k.len() >= 60 &&
(n.node_type == crate::store::NodeType::EpisodicSession || k.starts_with("_mined-transcripts#f-")))).count())); (k.starts_with("journal#j-") || k.starts_with("_mined-transcripts#f-")))).count()));
for (key, node) in &candidates { for (key, node) in &candidates {
out.push_str(&format!("### {}\n", key)); out.push_str(&format!("### {}\n", key));
@ -292,40 +330,22 @@ pub(super) fn format_rename_candidates(store: &Store, count: usize) -> (Vec<Stri
(keys, out) (keys, out)
} }
/// Format specific target keys as rename candidates (for --target mode) /// Get split candidates sorted by size (largest first)
pub(super) fn format_rename_targets(store: &Store, keys: &[String]) -> String { pub fn split_candidates(store: &Store) -> Vec<String> {
let mut out = String::new(); let mut candidates: Vec<(&str, usize)> = store.nodes.iter()
out.push_str(&format!("## Nodes to rename ({} targets)\n\n", keys.len())); .filter(|(key, node)| {
!key.starts_with('_')
for key in keys { && !node.deleted
let Some(node) = store.nodes.get(key) else { && matches!(node.node_type, crate::store::NodeType::Semantic)
out.push_str(&format!("### {}\n\n(node not found)\n\n---\n\n", key)); })
continue; .map(|(k, n)| (k.as_str(), n.content.len()))
}; .collect();
out.push_str(&format!("### {}\n", key)); candidates.sort_by(|a, b| b.1.cmp(&a.1));
let created = if node.timestamp > 0 { candidates.into_iter().map(|(k, _)| k.to_string()).collect()
crate::store::format_datetime(node.timestamp)
} else {
"unknown".to_string()
};
out.push_str(&format!("Created: {}\n", created));
let content = &node.content;
if content.len() > 800 {
let truncated = crate::util::truncate(content, 800, "\n[...]");
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n\n",
content.len(), truncated));
} else {
out.push_str(&format!("\nContent:\n{}\n\n", content));
}
out.push_str("---\n\n");
}
out
} }
/// Format a single node for split-plan prompt (phase 1) /// Format a single node for split-plan prompt (phase 1)
pub(super) fn format_split_plan_node(store: &Store, graph: &Graph, key: &str) -> String { pub fn format_split_plan_node(store: &Store, graph: &Graph, key: &str) -> String {
let communities = graph.communities(); let communities = graph.communities();
let node = match store.nodes.get(key) { let node = match store.nodes.get(key) {
Some(n) => n, Some(n) => n,
@ -368,16 +388,36 @@ pub(super) fn format_split_plan_node(store: &Store, graph: &Graph, key: &str) ->
out out
} }
/// Build split-plan prompt for a single node (phase 1).
/// Uses the split.agent template with placeholders resolved for the given key.
pub fn split_plan_prompt(store: &Store, key: &str) -> Result<String, String> {
let def = super::defs::get_def("split")
.ok_or_else(|| "no split.agent file".to_string())?;
let graph = store.build_graph();
// Override the query — we have a specific key to split
let keys = vec![key.to_string()];
let (prompt, _) = super::defs::resolve_placeholders(&def.prompt, store, &graph, &keys, 1);
Ok(prompt)
}
/// Build split-extract prompt for one child (phase 2)
pub fn split_extract_prompt(store: &Store, parent_key: &str, child_key: &str, child_desc: &str, child_sections: &str) -> Result<String, String> {
let parent_content = store.nodes.get(parent_key)
.map(|n| n.content.as_str())
.ok_or_else(|| format!("No node '{}'", parent_key))?;
load_prompt("split-extract", &[
("{{CHILD_KEY}}", child_key),
("{{CHILD_DESC}}", child_desc),
("{{CHILD_SECTIONS}}", child_sections),
("{{PARENT_CONTENT}}", parent_content),
])
}
/// Show consolidation batch status or generate an agent prompt. /// Show consolidation batch status or generate an agent prompt.
pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> { pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
if auto { if auto {
let batch = agent_prompt(store, "replay", count)?; let batch = agent_prompt(store, "replay", count)?;
for (i, s) in batch.steps.iter().enumerate() { println!("{}", batch.prompt);
if batch.steps.len() > 1 {
println!("=== STEP {} ({}) ===\n", i + 1, s.phase);
}
println!("{}", s.prompt);
}
return Ok(()); return Ok(());
} }
@ -420,5 +460,5 @@ pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<()
pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<AgentBatch, String> { pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<AgentBatch, String> {
let def = super::defs::get_def(agent) let def = super::defs::get_def(agent)
.ok_or_else(|| format!("Unknown agent: {}", agent))?; .ok_or_else(|| format!("Unknown agent: {}", agent))?;
super::defs::run_agent(store, &def, count, &Default::default()) super::defs::run_agent(store, &def, count)
} }

View file

@ -0,0 +1,94 @@
// Shared JSONL transcript parsing
//
// Three agents (enrich, fact_mine, knowledge) all parse Claude Code JSONL
// transcripts. This module provides the shared core: parse each line, extract
// message type, text content from string-or-array blocks, timestamp, and
// user type. Callers filter and transform as needed.
use std::fs;
use std::path::Path;
/// A single message extracted from a JSONL transcript.
pub struct TranscriptMessage {
/// 1-based line number in the JSONL file.
pub line: usize,
/// Raw role: "user" or "assistant".
pub role: String,
/// Extracted text content (trimmed, blocks joined with newlines).
pub text: String,
/// ISO timestamp from the message, or empty string.
pub timestamp: String,
/// For user messages: "external", "internal", etc. None for assistant.
pub user_type: Option<String>,
}
/// Parse a JSONL transcript into structured messages.
///
/// Extracts all user and assistant messages. Content blocks of type "text"
/// are joined; tool_use, tool_result, thinking blocks are skipped.
/// System-reminder blocks are filtered out.
pub fn parse_transcript(path: &Path) -> Result<Vec<TranscriptMessage>, String> {
let content = fs::read_to_string(path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
let mut messages = Vec::new();
for (i, line) in content.lines().enumerate() {
let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if msg_type != "user" && msg_type != "assistant" { continue; }
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let user_type = obj.get("userType")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let Some(text) = extract_text_content(&obj) else { continue };
let text = text.trim().to_string();
if text.is_empty() { continue; }
messages.push(TranscriptMessage {
line: i + 1,
role: msg_type.to_string(),
text,
timestamp,
user_type,
});
}
Ok(messages)
}
/// Extract text content from a JSONL message object.
///
/// Handles both string content and array-of-blocks content (filtering to
/// type="text" blocks only). Strips `<system-reminder>` tags.
fn extract_text_content(obj: &serde_json::Value) -> Option<String> {
let msg = obj.get("message").unwrap_or(obj);
let content = msg.get("content")?;
let text = match content {
serde_json::Value::String(s) => s.clone(),
serde_json::Value::Array(arr) => {
let texts: Vec<&str> = arr.iter()
.filter_map(|block| {
let block_type = block.get("type").and_then(|v| v.as_str())?;
if block_type != "text" { return None; }
let t = block.get("text").and_then(|v| v.as_str())?;
// Skip system-reminder blocks entirely
if t.contains("<system-reminder>") { return None; }
Some(t)
})
.collect();
if texts.is_empty() { return None; }
texts.join("\n")
}
_ => return None,
};
Some(text)
}

View file

@ -0,0 +1,640 @@
// memory-search: combined hook for session context loading + ambient memory retrieval
//
// Modes:
// --hook Run as Claude Code UserPromptSubmit hook (reads stdin, injects into conversation)
// --debug Replay last stashed input, dump every stage to stdout
// --seen Show the seen set for current session
// (default) No-op (future: manual search modes)
use clap::Parser;
use poc_memory::search::{self, AlgoStage};
use poc_memory::store;
use std::collections::{BTreeMap, HashSet};
use std::fs;
use std::io::{self, Read, Write};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{Duration, SystemTime};
#[derive(Parser)]
#[command(name = "memory-search")]
struct Args {
/// Run as Claude Code hook (reads stdin, outputs for injection)
#[arg(long)]
hook: bool,
/// Debug mode: replay last stashed input, dump every stage
#[arg(short, long)]
debug: bool,
/// Show the seen set and returned memories for this session
#[arg(long)]
seen: bool,
/// Show full seen set (list all keys)
#[arg(long)]
seen_full: bool,
/// Max results to return
#[arg(long, default_value = "5")]
max_results: usize,
/// Algorithm pipeline stages: e.g. spread spectral,k=20 spread,max_hops=4
/// Default: spread.
pipeline: Vec<String>,
}
const STASH_PATH: &str = "/tmp/claude-memory-search/last-input.json";
/// Max bytes per context chunk (hook output limit is ~10K chars)
const CHUNK_SIZE: usize = 9000;
fn main() {
// Daemon agent calls set POC_AGENT=1 — skip memory search.
if std::env::var("POC_AGENT").is_ok() {
return;
}
let args = Args::parse();
if args.seen || args.seen_full {
show_seen();
return;
}
let input = if args.hook {
// Hook mode: read from stdin, stash for later debug runs
let mut buf = String::new();
io::stdin().read_to_string(&mut buf).unwrap_or_default();
fs::create_dir_all("/tmp/claude-memory-search").ok();
fs::write(STASH_PATH, &buf).ok();
buf
} else {
// All other modes: replay stashed input
fs::read_to_string(STASH_PATH).unwrap_or_else(|_| {
eprintln!("No stashed input at {}", STASH_PATH);
std::process::exit(1);
})
};
let debug = args.debug || !args.hook;
let json: serde_json::Value = match serde_json::from_str(&input) {
Ok(v) => v,
Err(_) => return,
};
let prompt = json["prompt"].as_str().unwrap_or("");
let session_id = json["session_id"].as_str().unwrap_or("");
if session_id.is_empty() {
return;
}
let state_dir = PathBuf::from("/tmp/claude-memory-search");
fs::create_dir_all(&state_dir).ok();
// Detect post-compaction reload via mmap backward scan
let transcript_path = json["transcript_path"].as_str().unwrap_or("");
let is_compaction = poc_memory::transcript::detect_new_compaction(
&state_dir, session_id, transcript_path,
);
// First prompt or post-compaction: load full context
let cookie_path = state_dir.join(format!("cookie-{}", session_id));
let is_first = !cookie_path.exists();
if is_first || is_compaction {
// Reset seen set to keys that load-context will inject
let seen_path = state_dir.join(format!("seen-{}", session_id));
fs::remove_file(&seen_path).ok();
}
if debug {
println!("[memory-search] session={} is_first={} is_compaction={}", session_id, is_first, is_compaction);
}
if is_first || is_compaction {
// Create/touch the cookie
let cookie = if is_first {
let c = generate_cookie();
fs::write(&cookie_path, &c).ok();
c
} else {
fs::read_to_string(&cookie_path).unwrap_or_default().trim().to_string()
};
if debug { println!("[memory-search] loading full context"); }
// Load full memory context, chunk it, print first chunk, save rest
if let Ok(output) = Command::new("poc-memory").args(["admin", "load-context"]).output() {
if output.status.success() {
let ctx = String::from_utf8_lossy(&output.stdout).to_string();
if !ctx.trim().is_empty() {
// Extract keys from all chunks for seen set
for line in ctx.lines() {
if line.starts_with("--- ") && line.ends_with(" ---") {
let inner = &line[4..line.len() - 4];
if let Some(paren) = inner.rfind(" (") {
let key = inner[..paren].trim();
mark_seen(&state_dir, session_id, key);
}
}
}
let chunks = chunk_context(&ctx, CHUNK_SIZE);
if debug {
println!("[memory-search] context: {} bytes, {} chunks",
ctx.len(), chunks.len());
}
// Print first chunk
if let Some(first) = chunks.first() {
if args.hook {
print!("{}", first);
}
}
// Save remaining chunks for drip-feeding
save_pending_chunks(&state_dir, session_id, &chunks[1..]);
}
}
}
let _ = cookie;
} else {
// Not first call: drip-feed next pending chunk
if let Some(chunk) = pop_pending_chunk(&state_dir, session_id) {
if debug {
println!("[memory-search] drip-feeding pending chunk: {} bytes", chunk.len());
}
if args.hook {
print!("{}", chunk);
}
}
}
// Search requires a prompt (PostToolUse events don't have one)
if prompt.is_empty() {
return;
}
// Skip system/AFK prompts
for prefix in &["is AFK", "You're on your own", "IRC mention"] {
if prompt.starts_with(prefix) {
return;
}
}
let store = match store::Store::load() {
Ok(s) => s,
Err(_) => return,
};
// Search for node keys in last ~150k tokens of transcript
if debug { println!("[memory-search] transcript: {}", transcript_path); }
let mut terms = extract_weighted_terms(transcript_path, 150_000, &store);
// Also extract terms from the prompt itself (handles fresh sessions
// and queries about topics not yet mentioned in the transcript)
let prompt_terms = search::extract_query_terms(prompt, 8);
if !prompt_terms.is_empty() {
if debug { println!("[memory-search] prompt terms: {}", prompt_terms); }
for word in prompt_terms.split_whitespace() {
let lower = word.to_lowercase();
// Prompt terms get weight 1.0 (same as direct mention)
terms.entry(lower).or_insert(1.0);
}
}
if debug {
println!("[memory-search] {} terms total", terms.len());
let mut by_weight: Vec<_> = terms.iter().collect();
by_weight.sort_by(|a, b| b.1.total_cmp(a.1));
for (term, weight) in by_weight.iter().take(20) {
println!(" {:.3} {}", weight, term);
}
}
if terms.is_empty() {
if debug { println!("[memory-search] no terms found, done"); }
return;
}
// Parse algorithm pipeline
let pipeline: Vec<AlgoStage> = if args.pipeline.is_empty() {
// Default: just spreading activation
vec![AlgoStage::parse("spread").unwrap()]
} else {
let mut stages = Vec::new();
for arg in &args.pipeline {
match AlgoStage::parse(arg) {
Ok(s) => stages.push(s),
Err(e) => {
eprintln!("error: {}", e);
std::process::exit(1);
}
}
}
stages
};
if debug {
let names: Vec<String> = pipeline.iter().map(|s| format!("{}", s.algo)).collect();
println!("[memory-search] pipeline: {}", names.join(""));
}
// Extract seeds from terms
let graph = poc_memory::graph::build_graph_fast(&store);
let (seeds, direct_hits) = search::match_seeds(&terms, &store);
if seeds.is_empty() {
if debug { println!("[memory-search] no seeds matched, done"); }
return;
}
if debug {
println!("[memory-search] {} seeds", seeds.len());
let mut sorted = seeds.clone();
sorted.sort_by(|a, b| b.1.total_cmp(&a.1));
for (key, score) in sorted.iter().take(20) {
println!(" {:.4} {}", score, key);
}
}
let max_results = if debug { args.max_results.max(25) } else { args.max_results };
let raw_results = search::run_pipeline(&pipeline, seeds, &graph, &store, debug, max_results);
let results: Vec<search::SearchResult> = raw_results.into_iter()
.map(|(key, activation)| {
let is_direct = direct_hits.contains(&key);
search::SearchResult { key, activation, is_direct, snippet: None }
}).collect();
if debug {
println!("[memory-search] {} search results", results.len());
for r in results.iter().take(10) {
let marker = if r.is_direct { "" } else { " " };
println!(" {} [{:.4}] {}", marker, r.activation, r.key);
}
}
if results.is_empty() {
if debug { println!("[memory-search] no results, done"); }
return;
}
let seen = load_seen(&state_dir, session_id);
if debug { println!("[memory-search] {} keys in seen set", seen.len()); }
// Format results like poc-memory search output
let search_output = search::format_results(&results);
let cookie = fs::read_to_string(&cookie_path).unwrap_or_default().trim().to_string();
let mut result_output = String::new();
let mut count = 0;
let max_entries = 5;
for line in search_output.lines() {
if count >= max_entries { break; }
let trimmed = line.trim();
if trimmed.is_empty() { continue; }
if let Some(key) = extract_key_from_line(trimmed) {
if seen.contains(&key) { continue; }
mark_seen(&state_dir, session_id, &key);
mark_returned(&state_dir, session_id, &key);
result_output.push_str(line);
result_output.push('\n');
count += 1;
} else if count > 0 {
result_output.push_str(line);
result_output.push('\n');
}
}
if count == 0 {
if debug { println!("[memory-search] all results already seen"); }
return;
}
if args.hook {
println!("Recalled memories [{}]:", cookie);
}
print!("{}", result_output);
// Record search hits with daemon (fire-and-forget)
let hit_keys: Vec<&str> = results.iter().map(|r| r.key.as_str()).collect();
if debug { println!("[memory-search] recording {} search hits", hit_keys.len()); }
match poc_memory::agents::daemon::rpc_record_hits(&hit_keys) {
Ok(()) => { if debug { println!("[memory-search] hits recorded"); } }
Err(e) => { if debug { println!("[memory-search] hit recording failed: {}", e); } }
}
// Clean up stale state files (opportunistic)
cleanup_stale_files(&state_dir, Duration::from_secs(86400));
}
/// Split context output into chunks of approximately `max_bytes`, breaking
/// at section boundaries ("--- KEY (group) ---" lines).
fn chunk_context(ctx: &str, max_bytes: usize) -> Vec<String> {
// Split into sections at group boundaries, then merge small adjacent
// sections into chunks up to max_bytes.
let mut sections: Vec<String> = Vec::new();
let mut current = String::new();
for line in ctx.lines() {
// Group headers start new sections
if line.starts_with("--- ") && line.ends_with(" ---") && !current.is_empty() {
sections.push(std::mem::take(&mut current));
}
if !current.is_empty() {
current.push('\n');
}
current.push_str(line);
}
if !current.is_empty() {
sections.push(current);
}
// Merge small sections into chunks, respecting max_bytes
let mut chunks: Vec<String> = Vec::new();
let mut chunk = String::new();
for section in sections {
if !chunk.is_empty() && chunk.len() + section.len() + 1 > max_bytes {
chunks.push(std::mem::take(&mut chunk));
}
if !chunk.is_empty() {
chunk.push('\n');
}
chunk.push_str(&section);
}
if !chunk.is_empty() {
chunks.push(chunk);
}
chunks
}
/// Save remaining chunks to disk for drip-feeding on subsequent hook calls.
fn save_pending_chunks(dir: &Path, session_id: &str, chunks: &[String]) {
let chunks_dir = dir.join(format!("chunks-{}", session_id));
// Clear any old chunks
let _ = fs::remove_dir_all(&chunks_dir);
if chunks.is_empty() { return; }
fs::create_dir_all(&chunks_dir).ok();
for (i, chunk) in chunks.iter().enumerate() {
let path = chunks_dir.join(format!("{:04}", i));
fs::write(path, chunk).ok();
}
}
/// Pop the next pending chunk (lowest numbered file). Returns None if no chunks remain.
fn pop_pending_chunk(dir: &Path, session_id: &str) -> Option<String> {
let chunks_dir = dir.join(format!("chunks-{}", session_id));
if !chunks_dir.exists() { return None; }
let mut entries: Vec<_> = fs::read_dir(&chunks_dir).ok()?
.flatten()
.filter(|e| e.file_type().map(|t| t.is_file()).unwrap_or(false))
.collect();
entries.sort_by_key(|e| e.file_name());
let first = entries.first()?;
let content = fs::read_to_string(first.path()).ok()?;
fs::remove_file(first.path()).ok();
// Clean up directory if empty
if fs::read_dir(&chunks_dir).ok().map(|mut d| d.next().is_none()).unwrap_or(true) {
fs::remove_dir(&chunks_dir).ok();
}
Some(content)
}
/// Reverse-scan the transcript JSONL, extracting text from user/assistant
/// messages until we accumulate `max_tokens` tokens of text content.
/// Then search for all node keys as substrings, weighted by position.
fn extract_weighted_terms(
path: &str,
max_tokens: usize,
store: &poc_memory::store::Store,
) -> BTreeMap<String, f64> {
if path.is_empty() { return BTreeMap::new(); }
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return BTreeMap::new(),
};
// Collect text from messages, scanning backwards, until token budget hit
let mut message_texts: Vec<String> = Vec::new();
let mut token_count = 0;
for line in content.lines().rev() {
if token_count >= max_tokens { break; }
let obj: serde_json::Value = match serde_json::from_str(line) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if msg_type != "user" && msg_type != "assistant" { continue; }
let mut msg_text = String::new();
let msg = obj.get("message").unwrap_or(&obj);
match msg.get("content") {
Some(serde_json::Value::String(s)) => {
msg_text.push_str(s);
}
Some(serde_json::Value::Array(arr)) => {
for block in arr {
if block.get("type").and_then(|v| v.as_str()) == Some("text") {
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
msg_text.push(' ');
msg_text.push_str(t);
}
}
}
}
_ => {}
}
token_count += msg_text.len() / 4;
message_texts.push(msg_text);
}
// Reverse so oldest is first (position weighting: later = more recent = higher)
message_texts.reverse();
let all_text = message_texts.join(" ").to_lowercase();
let text_len = all_text.len();
if text_len == 0 { return BTreeMap::new(); }
// Search for each node key as a substring (casefolded), accumulate position-weighted score
let mut terms = BTreeMap::new();
for (key, _node) in &store.nodes {
let key_folded = key.to_lowercase();
let mut pos = 0;
while let Some(found) = all_text[pos..].find(&key_folded) {
let abs_pos = pos + found;
let weight = (abs_pos + 1) as f64 / text_len as f64;
*terms.entry(key_folded.clone()).or_insert(0.0) += weight;
pos = abs_pos + key_folded.len();
}
}
terms
}
fn extract_key_from_line(line: &str) -> Option<String> {
let after_bracket = line.find("] ")?;
let rest = &line[after_bracket + 2..];
let key_end = rest.find(" (c").unwrap_or(rest.len());
let key = rest[..key_end].trim();
if key.is_empty() {
None
} else {
Some(key.to_string())
}
}
fn generate_cookie() -> String {
uuid::Uuid::new_v4().as_simple().to_string()[..12].to_string()
}
/// Parse a seen-file line: "TIMESTAMP\tKEY" or legacy "KEY"
fn parse_seen_line(line: &str) -> &str {
line.split_once('\t').map(|(_, key)| key).unwrap_or(line)
}
fn load_seen(dir: &Path, session_id: &str) -> HashSet<String> {
let path = dir.join(format!("seen-{}", session_id));
if path.exists() {
fs::read_to_string(path)
.unwrap_or_default()
.lines()
.filter(|s| !s.is_empty())
.map(|s| parse_seen_line(s).to_string())
.collect()
} else {
HashSet::new()
}
}
fn mark_seen(dir: &Path, session_id: &str, key: &str) {
let path = dir.join(format!("seen-{}", session_id));
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
let ts = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S");
writeln!(f, "{}\t{}", ts, key).ok();
}
}
fn mark_returned(dir: &Path, session_id: &str, key: &str) {
let returned = load_returned(dir, session_id);
if returned.contains(&key.to_string()) { return; }
let path = dir.join(format!("returned-{}", session_id));
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
writeln!(f, "{}", key).ok();
}
}
fn load_returned(dir: &Path, session_id: &str) -> Vec<String> {
let path = dir.join(format!("returned-{}", session_id));
if path.exists() {
let mut seen = HashSet::new();
fs::read_to_string(path)
.unwrap_or_default()
.lines()
.filter(|s| !s.is_empty())
.filter(|s| seen.insert(s.to_string()))
.map(|s| s.to_string())
.collect()
} else {
Vec::new()
}
}
fn show_seen() {
let state_dir = PathBuf::from("/tmp/claude-memory-search");
// Read stashed input for session_id
let input = match fs::read_to_string(STASH_PATH) {
Ok(s) => s,
Err(_) => {
eprintln!("No stashed input at {}", STASH_PATH);
return;
}
};
let json: serde_json::Value = match serde_json::from_str(&input) {
Ok(v) => v,
Err(_) => {
eprintln!("Failed to parse stashed input");
return;
}
};
let session_id = json["session_id"].as_str().unwrap_or("");
if session_id.is_empty() {
eprintln!("No session_id in stashed input");
return;
}
println!("Session: {}", session_id);
let cookie_path = state_dir.join(format!("cookie-{}", session_id));
if let Ok(cookie) = fs::read_to_string(&cookie_path) {
println!("Cookie: {}", cookie.trim());
}
let returned = load_returned(&state_dir, session_id);
if !returned.is_empty() {
println!("\nReturned by search ({}):", returned.len());
for key in &returned {
println!(" {}", key);
}
}
// Read seen file in insertion order (append-only file)
let seen_path = state_dir.join(format!("seen-{}", session_id));
let seen_lines: Vec<String> = fs::read_to_string(&seen_path)
.unwrap_or_default()
.lines()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
.collect();
let returned_set: HashSet<_> = returned.iter().cloned().collect();
let pre_seeded = seen_lines.len().saturating_sub(returned.len());
println!("\nSeen set ({} total, {} pre-seeded):", seen_lines.len(), pre_seeded);
if Args::parse().seen_full {
for line in &seen_lines {
let key = parse_seen_line(line);
let marker = if returned_set.contains(key) { "" } else { " " };
// Show timestamp if present, otherwise just key
if let Some((ts, k)) = line.split_once('\t') {
println!(" {} {}{}", ts, marker, k);
} else {
println!(" (no ts) {}{}", marker, line);
}
}
}
}
fn cleanup_stale_files(dir: &Path, max_age: Duration) {
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
let cutoff = SystemTime::now() - max_age;
for entry in entries.flatten() {
if let Ok(meta) = entry.metadata() {
if let Ok(modified) = meta.modified() {
if modified < cutoff {
fs::remove_file(entry.path()).ok();
}
}
}
}
}

View file

@ -0,0 +1,328 @@
// parse-claude-conversation: debug tool for inspecting what's in the context window
//
// Two-layer design:
// 1. extract_context_items() — walks JSONL from last compaction, yields
// structured records representing what's in the context window
// 2. format_as_context() — renders those records as they appear to Claude
//
// The transcript is mmap'd and scanned backwards from EOF using brace-depth
// tracking to find complete JSON objects, avoiding a full forward scan of
// what can be a 500MB+ file.
//
// Usage:
// parse-claude-conversation [TRANSCRIPT_PATH]
// parse-claude-conversation --last # use the last stashed session
use clap::Parser;
use memmap2::Mmap;
use poc_memory::transcript::{JsonlBackwardIter, find_last_compaction};
use serde_json::Value;
use std::fs;
#[derive(Parser)]
#[command(name = "parse-claude-conversation")]
struct Args {
/// Transcript JSONL path (or --last to use stashed session)
path: Option<String>,
/// Use the last stashed session from memory-search
#[arg(long)]
last: bool,
/// Dump raw JSONL objects. Optional integer: number of extra objects
/// to include before the compaction boundary.
#[arg(long, num_args = 0..=1, default_missing_value = "0")]
raw: Option<usize>,
}
// --- Context extraction ---
/// A single item in the context window, as Claude sees it.
enum ContextItem {
UserText(String),
SystemReminder(String),
AssistantText(String),
AssistantThinking,
ToolUse { name: String, input: String },
ToolResult(String),
}
/// Extract context items from the transcript, starting from the last compaction.
fn extract_context_items(data: &[u8]) -> Vec<ContextItem> {
let start = find_last_compaction(data).unwrap_or(0);
let region = &data[start..];
let mut items = Vec::new();
// Forward scan through JSONL lines from compaction onward
for line in region.split(|&b| b == b'\n') {
if line.is_empty() { continue; }
let obj: Value = match serde_json::from_slice(line) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
match msg_type {
"user" => {
if let Some(content) = obj.get("message").and_then(|m| m.get("content")) {
extract_user_content(content, &mut items);
}
}
"assistant" => {
if let Some(content) = obj.get("message").and_then(|m| m.get("content")) {
extract_assistant_content(content, &mut items);
}
}
_ => {}
}
}
items
}
/// Parse user message content into context items.
fn extract_user_content(content: &Value, items: &mut Vec<ContextItem>) {
match content {
Value::String(s) => {
split_system_reminders(s, items, false);
}
Value::Array(arr) => {
for block in arr {
let btype = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
match btype {
"text" => {
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
split_system_reminders(t, items, false);
}
}
"tool_result" => {
let result_text = extract_tool_result_text(block);
if !result_text.is_empty() {
split_system_reminders(&result_text, items, true);
}
}
_ => {}
}
}
}
_ => {}
}
}
/// Extract text from a tool_result block (content can be string or array).
fn extract_tool_result_text(block: &Value) -> String {
match block.get("content") {
Some(Value::String(s)) => s.clone(),
Some(Value::Array(arr)) => {
arr.iter()
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
.collect::<Vec<_>>()
.join("\n")
}
_ => String::new(),
}
}
/// Split text on <system-reminder> tags. Non-reminder text emits UserText
/// or ToolResult depending on `is_tool_result`.
fn split_system_reminders(text: &str, items: &mut Vec<ContextItem>, is_tool_result: bool) {
let mut remaining = text;
loop {
if let Some(start) = remaining.find("<system-reminder>") {
let before = remaining[..start].trim();
if !before.is_empty() {
if is_tool_result {
items.push(ContextItem::ToolResult(before.to_string()));
} else {
items.push(ContextItem::UserText(before.to_string()));
}
}
let after_open = &remaining[start + "<system-reminder>".len()..];
if let Some(end) = after_open.find("</system-reminder>") {
let reminder = after_open[..end].trim();
if !reminder.is_empty() {
items.push(ContextItem::SystemReminder(reminder.to_string()));
}
remaining = &after_open[end + "</system-reminder>".len()..];
} else {
let reminder = after_open.trim();
if !reminder.is_empty() {
items.push(ContextItem::SystemReminder(reminder.to_string()));
}
break;
}
} else {
let trimmed = remaining.trim();
if !trimmed.is_empty() {
if is_tool_result {
items.push(ContextItem::ToolResult(trimmed.to_string()));
} else {
items.push(ContextItem::UserText(trimmed.to_string()));
}
}
break;
}
}
}
/// Parse assistant message content into context items.
fn extract_assistant_content(content: &Value, items: &mut Vec<ContextItem>) {
match content {
Value::String(s) => {
let trimmed = s.trim();
if !trimmed.is_empty() {
items.push(ContextItem::AssistantText(trimmed.to_string()));
}
}
Value::Array(arr) => {
for block in arr {
let btype = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
match btype {
"text" => {
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
let trimmed = t.trim();
if !trimmed.is_empty() {
items.push(ContextItem::AssistantText(trimmed.to_string()));
}
}
}
"tool_use" => {
let name = block.get("name")
.and_then(|v| v.as_str())
.unwrap_or("?")
.to_string();
let input = block.get("input")
.map(|v| v.to_string())
.unwrap_or_default();
items.push(ContextItem::ToolUse { name, input });
}
"thinking" => {
items.push(ContextItem::AssistantThinking);
}
_ => {}
}
}
}
_ => {}
}
}
// --- Formatting layer ---
fn truncate(s: &str, max: usize) -> String {
if s.len() <= max {
s.to_string()
} else {
format!("{}...({} total)", &s[..max], s.len())
}
}
fn format_as_context(items: &[ContextItem]) {
for item in items {
match item {
ContextItem::UserText(text) => {
println!("USER: {}", truncate(text, 300));
println!();
}
ContextItem::SystemReminder(text) => {
println!("<system-reminder>");
println!("{}", truncate(text, 500));
println!("</system-reminder>");
println!();
}
ContextItem::AssistantText(text) => {
println!("ASSISTANT: {}", truncate(text, 300));
println!();
}
ContextItem::AssistantThinking => {
println!("[thinking]");
println!();
}
ContextItem::ToolUse { name, input } => {
println!("TOOL_USE: {} {}", name, truncate(input, 200));
println!();
}
ContextItem::ToolResult(text) => {
println!("TOOL_RESULT: {}", truncate(text, 300));
println!();
}
}
}
}
fn main() {
let args = Args::parse();
let path = if args.last {
let stash = fs::read_to_string("/tmp/claude-memory-search/last-input.json")
.expect("No stashed input");
let json: Value = serde_json::from_str(&stash).expect("Bad JSON");
json["transcript_path"]
.as_str()
.expect("No transcript_path")
.to_string()
} else if let Some(p) = args.path {
p
} else {
eprintln!("error: provide a transcript path or --last");
std::process::exit(1);
};
let file = fs::File::open(&path).expect("Can't open transcript");
let mmap = unsafe { Mmap::map(&file).expect("Failed to mmap") };
eprintln!(
"Transcript: {} ({:.1} MB)",
&path,
mmap.len() as f64 / 1_000_000.0
);
let compaction_offset = find_last_compaction(&mmap).unwrap_or(0);
eprintln!("Compaction at byte offset: {}", compaction_offset);
if let Some(extra) = args.raw {
use std::io::Write;
// Collect `extra` JSON objects before the compaction boundary
let mut before = Vec::new();
if extra > 0 && compaction_offset > 0 {
for obj_bytes in JsonlBackwardIter::new(&mmap[..compaction_offset]) {
if let Ok(obj) = serde_json::from_slice::<Value>(obj_bytes) {
let t = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if t == "file-history-snapshot" { continue; }
}
before.push(obj_bytes.to_vec());
if before.len() >= extra {
break;
}
}
before.reverse();
}
for obj in &before {
std::io::stdout().write_all(obj).ok();
println!();
}
// Then dump everything from compaction onward
let region = &mmap[compaction_offset..];
for line in region.split(|&b| b == b'\n') {
if line.is_empty() { continue; }
if let Ok(obj) = serde_json::from_slice::<Value>(line) {
let t = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if t == "file-history-snapshot" { continue; }
std::io::stdout().write_all(line).ok();
println!();
}
}
} else {
let items = extract_context_items(&mmap);
eprintln!("Context items: {}", items.len());
format_as_context(&items);
}
}

View file

@ -0,0 +1,214 @@
// Unified Claude Code hook.
//
// Single binary handling all hook events:
// UserPromptSubmit — signal daemon, check notifications, check context
// PostToolUse — check context (rate-limited)
// Stop — signal daemon response
//
// Replaces: record-user-message-time.sh, check-notifications.sh,
// check-context-usage.sh, notify-done.sh, context-check
use serde_json::Value;
use std::fs;
use std::io::{self, Read};
use std::path::PathBuf;
use std::process::Command;
use std::time::{SystemTime, UNIX_EPOCH};
const CONTEXT_THRESHOLD: u64 = 130_000;
const RATE_LIMIT_SECS: u64 = 60;
const SOCK_PATH: &str = ".claude/hooks/idle-timer.sock";
fn now_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs()
}
fn home() -> PathBuf {
PathBuf::from(std::env::var("HOME").unwrap_or_else(|_| "/root".into()))
}
fn daemon_cmd(args: &[&str]) {
Command::new("poc-daemon")
.args(args)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.ok();
}
fn daemon_available() -> bool {
home().join(SOCK_PATH).exists()
}
fn signal_user() {
let pane = std::env::var("TMUX_PANE").unwrap_or_default();
if pane.is_empty() {
daemon_cmd(&["user"]);
} else {
daemon_cmd(&["user", &pane]);
}
}
fn signal_response() {
daemon_cmd(&["response"]);
}
fn check_notifications() {
if !daemon_available() {
return;
}
let output = Command::new("poc-daemon")
.arg("notifications")
.output()
.ok();
if let Some(out) = output {
let text = String::from_utf8_lossy(&out.stdout);
if !text.trim().is_empty() {
println!("You have pending notifications:");
print!("{text}");
}
}
}
fn check_context(transcript: &PathBuf, rate_limit: bool) {
if rate_limit {
let rate_file = PathBuf::from("/tmp/claude-context-check-last");
if let Ok(s) = fs::read_to_string(&rate_file) {
if let Ok(last) = s.trim().parse::<u64>() {
if now_secs() - last < RATE_LIMIT_SECS {
return;
}
}
}
let _ = fs::write(&rate_file, now_secs().to_string());
}
if !transcript.exists() {
return;
}
let content = match fs::read_to_string(transcript) {
Ok(c) => c,
Err(_) => return,
};
let mut usage: u64 = 0;
for line in content.lines().rev().take(500) {
if !line.contains("cache_read_input_tokens") {
continue;
}
if let Ok(v) = serde_json::from_str::<Value>(line) {
let u = &v["message"]["usage"];
let input_tokens = u["input_tokens"].as_u64().unwrap_or(0);
let cache_creation = u["cache_creation_input_tokens"].as_u64().unwrap_or(0);
let cache_read = u["cache_read_input_tokens"].as_u64().unwrap_or(0);
usage = input_tokens + cache_creation + cache_read;
break;
}
}
if usage > CONTEXT_THRESHOLD {
print!(
"\
CONTEXT WARNING: Compaction approaching ({usage} tokens). Write a journal entry NOW.
Use `poc-memory journal write \"entry text\"` to save a dated entry covering:
- What you're working on and current state (done / in progress / blocked)
- Key things learned this session (patterns, debugging insights)
- Anything half-finished that needs pickup
Keep it narrative, not a task log."
);
}
}
fn main() {
let mut input = String::new();
io::stdin().read_to_string(&mut input).ok();
let hook: Value = match serde_json::from_str(&input) {
Ok(v) => v,
Err(_) => return,
};
let hook_type = hook["hook_event_name"].as_str().unwrap_or("unknown");
let transcript = hook["transcript_path"]
.as_str()
.filter(|p| !p.is_empty())
.map(PathBuf::from);
// Daemon agent calls set POC_AGENT=1 — skip all signaling.
// Without this, the daemon's claude -p calls trigger hooks that
// signal "user active", keeping the idle timer permanently reset.
if std::env::var("POC_AGENT").is_ok() {
return;
}
match hook_type {
"UserPromptSubmit" => {
signal_user();
check_notifications();
// Run memory-search, passing through the hook input it needs
if let Ok(output) = Command::new("memory-search")
.arg("--hook")
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::null())
.spawn()
.and_then(|mut child| {
if let Some(ref mut stdin) = child.stdin {
use std::io::Write;
let _ = stdin.write_all(input.as_bytes());
}
child.wait_with_output()
})
{
let text = String::from_utf8_lossy(&output.stdout);
if !text.is_empty() {
print!("{text}");
}
}
if let Some(ref t) = transcript {
check_context(t, false);
}
}
"PostToolUse" => {
// Drip-feed pending context chunks from initial load
if let Ok(output) = Command::new("memory-search")
.arg("--hook")
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::null())
.spawn()
.and_then(|mut child| {
if let Some(ref mut stdin) = child.stdin {
use std::io::Write;
let _ = stdin.write_all(input.as_bytes());
}
child.wait_with_output()
})
{
let text = String::from_utf8_lossy(&output.stdout);
if !text.is_empty() {
print!("{text}");
}
}
if let Some(ref t) = transcript {
check_context(t, true);
}
}
"Stop" => {
let stop_hook_active = hook["stop_hook_active"].as_bool().unwrap_or(false);
if !stop_hook_active {
signal_response();
}
}
_ => {}
}
}

191
poc-memory/src/config.rs Normal file
View file

@ -0,0 +1,191 @@
// Configuration for poc-memory
//
// Loaded from ~/.config/poc-memory/config.jsonl (or POC_MEMORY_CONFIG env).
// Falls back to sensible defaults if no config file exists.
//
// Format: JSONL — one JSON object per line.
// First line with "config" key: global settings.
// Lines with "group" key: context loading groups (order preserved).
//
// Example:
// {"config": {"user_name": "Alice", "data_dir": "~/.claude/memory"}}
// {"group": "identity", "keys": ["identity"]}
// {"group": "orientation", "keys": ["where-am-i.md"], "source": "file"}
use std::path::PathBuf;
use std::sync::OnceLock;
static CONFIG: OnceLock<Config> = OnceLock::new();
#[derive(Debug, Clone, PartialEq)]
pub enum ContextSource {
Store,
File,
Journal,
}
#[derive(Debug, Clone)]
pub struct ContextGroup {
pub label: String,
pub keys: Vec<String>,
pub source: ContextSource,
}
#[derive(Debug, Clone)]
pub struct Config {
/// Display name for the human user in transcripts/prompts.
pub user_name: String,
/// Display name for the AI assistant.
pub assistant_name: String,
/// Base directory for memory data (store, logs, status).
pub data_dir: PathBuf,
/// Directory containing Claude session transcripts.
pub projects_dir: PathBuf,
/// Core node keys that should never be decayed/deleted.
pub core_nodes: Vec<String>,
/// How many days of journal to include in load-context.
pub journal_days: u32,
/// Max journal entries to include in load-context.
pub journal_max: usize,
/// Ordered context groups for session-start loading.
pub context_groups: Vec<ContextGroup>,
/// Max concurrent LLM calls in the daemon.
pub llm_concurrency: usize,
/// Directory containing prompt templates for agents.
pub prompts_dir: PathBuf,
/// Separate Claude config dir for background agent work (daemon jobs).
/// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
/// with different OAuth credentials than the interactive session.
pub agent_config_dir: Option<PathBuf>,
}
impl Default for Config {
fn default() -> Self {
let home = PathBuf::from(std::env::var("HOME").expect("HOME not set"));
Self {
user_name: "User".to_string(),
assistant_name: "Assistant".to_string(),
data_dir: home.join(".claude/memory"),
projects_dir: home.join(".claude/projects"),
core_nodes: vec!["identity".to_string(), "core-practices".to_string()],
journal_days: 7,
journal_max: 20,
context_groups: vec![
ContextGroup {
label: "identity".into(),
keys: vec!["identity".into()],
source: ContextSource::Store,
},
ContextGroup {
label: "core-practices".into(),
keys: vec!["core-practices".into()],
source: ContextSource::Store,
},
],
llm_concurrency: 1,
prompts_dir: home.join("poc/memory/prompts"),
agent_config_dir: None,
}
}
}
impl Config {
fn load_from_file() -> Self {
let path = std::env::var("POC_MEMORY_CONFIG")
.map(PathBuf::from)
.unwrap_or_else(|_| {
PathBuf::from(std::env::var("HOME").expect("HOME not set"))
.join(".config/poc-memory/config.jsonl")
});
let mut config = Config::default();
let Ok(content) = std::fs::read_to_string(&path) else {
return config;
};
let mut context_groups: Vec<ContextGroup> = Vec::new();
// Parse as a stream of JSON values (handles multi-line objects)
let stream = serde_json::Deserializer::from_str(&content)
.into_iter::<serde_json::Value>();
for result in stream {
let Ok(obj) = result else { continue };
// Global config line
if let Some(cfg) = obj.get("config") {
if let Some(s) = cfg.get("user_name").and_then(|v| v.as_str()) {
config.user_name = s.to_string();
}
if let Some(s) = cfg.get("assistant_name").and_then(|v| v.as_str()) {
config.assistant_name = s.to_string();
}
if let Some(s) = cfg.get("data_dir").and_then(|v| v.as_str()) {
config.data_dir = expand_home(s);
}
if let Some(s) = cfg.get("projects_dir").and_then(|v| v.as_str()) {
config.projects_dir = expand_home(s);
}
if let Some(arr) = cfg.get("core_nodes").and_then(|v| v.as_array()) {
config.core_nodes = arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect();
}
if let Some(d) = cfg.get("journal_days").and_then(|v| v.as_u64()) {
config.journal_days = d as u32;
}
if let Some(m) = cfg.get("journal_max").and_then(|v| v.as_u64()) {
config.journal_max = m as usize;
}
if let Some(n) = cfg.get("llm_concurrency").and_then(|v| v.as_u64()) {
config.llm_concurrency = n.max(1) as usize;
}
if let Some(s) = cfg.get("prompts_dir").and_then(|v| v.as_str()) {
config.prompts_dir = expand_home(s);
}
if let Some(s) = cfg.get("agent_config_dir").and_then(|v| v.as_str()) {
config.agent_config_dir = Some(expand_home(s));
}
continue;
}
// Context group line
if let Some(label) = obj.get("group").and_then(|v| v.as_str()) {
let keys = obj.get("keys")
.and_then(|v| v.as_array())
.map(|arr| arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect())
.unwrap_or_default();
let source = match obj.get("source").and_then(|v| v.as_str()) {
Some("file") => ContextSource::File,
Some("journal") => ContextSource::Journal,
_ => ContextSource::Store,
};
context_groups.push(ContextGroup { label: label.to_string(), keys, source });
}
}
if !context_groups.is_empty() {
config.context_groups = context_groups;
}
config
}
}
fn expand_home(path: &str) -> PathBuf {
if let Some(rest) = path.strip_prefix("~/") {
PathBuf::from(std::env::var("HOME").expect("HOME not set")).join(rest)
} else {
PathBuf::from(path)
}
}
/// Get the global config (loaded once on first access).
pub fn get() -> &'static Config {
CONFIG.get_or_init(Config::load_from_file)
}

View file

@ -7,7 +7,7 @@
// search_hits: key → u64 (how often memory-search found this node) // search_hits: key → u64 (how often memory-search found this node)
// last_hit_ts: key → i64 (unix timestamp of last search hit) // last_hit_ts: key → i64 (unix timestamp of last search hit)
use redb::{Database, ReadableDatabase, ReadableTable, TableDefinition}; use redb::{Database, ReadableTable, TableDefinition};
use std::path::PathBuf; use std::path::PathBuf;
const SEARCH_HITS: TableDefinition<&str, u64> = TableDefinition::new("search_hits"); const SEARCH_HITS: TableDefinition<&str, u64> = TableDefinition::new("search_hits");
@ -18,7 +18,7 @@ fn db_path() -> PathBuf {
} }
/// Open (or create) the counters database. /// Open (or create) the counters database.
fn open() -> Result<Database, String> { pub fn open() -> Result<Database, String> {
Database::create(db_path()).map_err(|e| format!("open counters db: {}", e)) Database::create(db_path()).map_err(|e| format!("open counters db: {}", e))
} }

View file

@ -12,16 +12,6 @@ use crate::store::{Store, RelationType, StoreView};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque}; use std::collections::{HashMap, HashSet, VecDeque};
/// Community info for reporting
#[derive(Clone, Debug)]
pub struct CommunityInfo {
pub id: u32,
pub members: Vec<String>,
pub size: usize,
pub isolation: f32,
pub cross_edges: usize,
}
/// Weighted edge in the graph /// Weighted edge in the graph
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Edge { pub struct Edge {
@ -74,43 +64,6 @@ impl Graph {
.unwrap_or_default() .unwrap_or_default()
} }
/// Jaccard similarity between two nodes' neighborhoods.
/// Measures overlap: |intersection| / |union| of their neighbor sets.
pub fn jaccard(&self, a: &str, b: &str) -> f32 {
let na = self.neighbor_keys(a);
let nb = self.neighbor_keys(b);
let intersection = na.intersection(&nb).count();
let union = na.union(&nb).count();
if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
}
/// Compute Jaccard-based strength for every edge in the graph.
/// Returns (source_key, target_key, jaccard_strength) triples.
/// Scales raw Jaccard (typically 0.0-0.3) to a useful range.
pub fn jaccard_strengths(&self) -> Vec<(String, String, f32)> {
let mut result = Vec::new();
let mut seen = HashSet::new();
for (key, edges) in &self.adj {
for edge in edges {
// Deduplicate undirected edges
let pair = if key < &edge.target {
(key.as_str(), edge.target.as_str())
} else {
(edge.target.as_str(), key.as_str())
};
if !seen.insert((pair.0.to_string(), pair.1.to_string())) {
continue;
}
let j = self.jaccard(key, &edge.target);
// Scale: raw Jaccard 0.05 → 0.15, 0.15 → 0.45, 0.30 → 0.90
// Formula: clamp(j * 3, 0.1, 1.0)
let strength = (j * 3.0).clamp(0.1, 1.0);
result.push((key.clone(), edge.target.clone(), strength));
}
}
result
}
pub fn community_count(&self) -> usize { pub fn community_count(&self) -> usize {
let labels: HashSet<_> = self.communities.values().collect(); let labels: HashSet<_> = self.communities.values().collect();
labels.len() labels.len()
@ -120,75 +73,6 @@ impl Graph {
&self.communities &self.communities
} }
/// Community isolation scores: for each community, what fraction of its
/// total edge weight is internal (vs cross-community). Returns community_id → score
/// where 1.0 = fully isolated (no external edges), 0.0 = all edges external.
/// Singleton communities (1 node, no edges) get score 1.0.
pub fn community_isolation(&self) -> HashMap<u32, f32> {
// Accumulate internal and total edge weight per community
let mut internal: HashMap<u32, f32> = HashMap::new();
let mut total: HashMap<u32, f32> = HashMap::new();
for (key, edges) in &self.adj {
let Some(&my_comm) = self.communities.get(key) else { continue };
for edge in edges {
let nbr_comm = self.communities.get(&edge.target).copied().unwrap_or(u32::MAX);
*total.entry(my_comm).or_default() += edge.strength;
if my_comm == nbr_comm {
*internal.entry(my_comm).or_default() += edge.strength;
}
}
}
let mut scores = HashMap::new();
let all_communities: HashSet<u32> = self.communities.values().copied().collect();
for &comm in &all_communities {
let t = total.get(&comm).copied().unwrap_or(0.0);
if t < 0.001 {
scores.insert(comm, 1.0); // no edges = fully isolated
} else {
let i = internal.get(&comm).copied().unwrap_or(0.0);
scores.insert(comm, i / t);
}
}
scores
}
/// Community info: id → (member keys, size, isolation score, cross-community edge count)
pub fn community_info(&self) -> Vec<CommunityInfo> {
let isolation = self.community_isolation();
// Group members by community
let mut members: HashMap<u32, Vec<String>> = HashMap::new();
for (key, &comm) in &self.communities {
members.entry(comm).or_default().push(key.clone());
}
// Count cross-community edges per community
let mut cross_edges: HashMap<u32, usize> = HashMap::new();
for (key, edges) in &self.adj {
let Some(&my_comm) = self.communities.get(key) else { continue };
for edge in edges {
let nbr_comm = self.communities.get(&edge.target).copied().unwrap_or(u32::MAX);
if my_comm != nbr_comm {
*cross_edges.entry(my_comm).or_default() += 1;
}
}
}
let mut result: Vec<CommunityInfo> = members.into_iter()
.map(|(id, mut keys)| {
keys.sort();
let size = keys.len();
let iso = isolation.get(&id).copied().unwrap_or(1.0);
let cross = cross_edges.get(&id).copied().unwrap_or(0) / 2; // undirected
CommunityInfo { id, members: keys, size, isolation: iso, cross_edges: cross }
})
.collect();
result.sort_by(|a, b| b.isolation.total_cmp(&a.isolation));
result
}
/// Hub degree threshold: top 5% by degree /// Hub degree threshold: top 5% by degree
pub fn hub_threshold(&self) -> usize { pub fn hub_threshold(&self) -> usize {
let mut degrees: Vec<usize> = self.keys.iter() let mut degrees: Vec<usize> = self.keys.iter()
@ -542,184 +426,9 @@ fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashS
}); });
}); });
add_implicit_temporal_edges(store, &keys, &mut adj);
(adj, keys) (adj, keys)
} }
/// Add implicit edges for the temporal/digest hierarchy.
///
/// These edges are derived from node types and dates — they don't
/// need to be stored. Two kinds:
/// - parent/child: session→daily→weekly→monthly (by date containment)
/// - prev/next: chronological ordering within each level
///
/// Sessions use their timestamp for date. Digest nodes (daily/weekly/monthly)
/// extract the date they *cover* from the key name, since their timestamp
/// is when the digest was created, not what period it covers.
fn add_implicit_temporal_edges(
store: &impl StoreView,
keys: &HashSet<String>,
adj: &mut HashMap<String, Vec<Edge>>,
) {
use crate::store::NodeType::*;
use chrono::{Datelike, DateTime, NaiveDate};
// Extract the covered date from a key name.
// Patterns: "daily-2026-03-06", "daily-2026-03-06-identity"
fn date_from_key(key: &str) -> Option<NaiveDate> {
let rest = key.strip_prefix("daily-")?;
if rest.len() >= 10 {
NaiveDate::parse_from_str(&rest[..10], "%Y-%m-%d").ok()
} else {
None
}
}
fn week_from_key(key: &str) -> Option<(i32, u32)> {
// "weekly-2026-W09" → (2026, 9)
let rest = key.strip_prefix("weekly-")?;
let (year_str, w_str) = rest.split_once("-W")?;
let year: i32 = year_str.parse().ok()?;
// Week string might have a suffix like "-foo"
let week_str = w_str.split('-').next()?;
let week: u32 = week_str.parse().ok()?;
Some((year, week))
}
fn month_from_key(key: &str) -> Option<(i32, u32)> {
// "monthly-2026-02" → (2026, 2)
let rest = key.strip_prefix("monthly-")?;
let (year_str, month_str) = rest.split_once('-')?;
let year: i32 = year_str.parse().ok()?;
let month_str = month_str.split('-').next()?;
let month: u32 = month_str.parse().ok()?;
Some((year, month))
}
// Collect episodic nodes by type
struct Dated { key: String, ts: i64, date: NaiveDate }
let mut sessions: Vec<Dated> = Vec::new();
let mut dailies: Vec<(String, NaiveDate)> = Vec::new();
let mut weeklies: Vec<(String, (i32, u32))> = Vec::new();
let mut monthlies: Vec<(String, (i32, u32))> = Vec::new();
store.for_each_node_meta(|key, node_type, ts| {
if !keys.contains(key) { return; }
match node_type {
EpisodicSession => {
// Prefer date from key (local time) over timestamp (UTC)
// to avoid timezone mismatches
let date = date_from_key(key).or_else(|| {
DateTime::from_timestamp(ts, 0).map(|dt| dt.date_naive())
});
if let Some(date) = date {
sessions.push(Dated { key: key.to_owned(), ts, date });
}
}
EpisodicDaily => {
if let Some(date) = date_from_key(key) {
dailies.push((key.to_owned(), date));
}
}
EpisodicWeekly => {
if let Some(yw) = week_from_key(key) {
weeklies.push((key.to_owned(), yw));
}
}
EpisodicMonthly => {
if let Some(ym) = month_from_key(key) {
monthlies.push((key.to_owned(), ym));
}
}
_ => {}
}
});
sessions.sort_by_key(|d| d.ts);
dailies.sort_by_key(|(_, d)| *d);
weeklies.sort_by_key(|(_, yw)| *yw);
monthlies.sort_by_key(|(_, ym)| *ym);
let add_edge = |adj: &mut HashMap<String, Vec<Edge>>, a: &str, b: &str| {
if let Some(edges) = adj.get(a)
&& edges.iter().any(|e| e.target == b) { return; }
adj.entry(a.to_owned()).or_default().push(Edge {
target: b.to_owned(),
strength: 1.0,
rel_type: RelationType::Auto,
});
adj.entry(b.to_owned()).or_default().push(Edge {
target: a.to_owned(),
strength: 1.0,
rel_type: RelationType::Auto,
});
};
// Build indexes: date→dailies, (year,week)→weekly, (year,month)→monthly
// Note: multiple dailies can share a date (e.g. daily-2026-03-06-identity,
// daily-2026-03-06-technical), so we collect all of them.
let mut date_to_dailies: HashMap<NaiveDate, Vec<String>> = HashMap::new();
for (key, date) in &dailies {
date_to_dailies.entry(*date).or_default().push(key.clone());
}
let mut yw_to_weekly: HashMap<(i32, u32), String> = HashMap::new();
for (key, yw) in &weeklies {
yw_to_weekly.insert(*yw, key.clone());
}
let mut ym_to_monthly: HashMap<(i32, u32), String> = HashMap::new();
for (key, ym) in &monthlies {
ym_to_monthly.insert(*ym, key.clone());
}
// Session → Daily (parent): each session links to all dailies for its date
for sess in &sessions {
if let Some(daily_keys) = date_to_dailies.get(&sess.date) {
for daily in daily_keys {
add_edge(adj, &sess.key, daily);
}
}
}
// Daily → Weekly (parent)
for (key, date) in &dailies {
let yw = (date.iso_week().year(), date.iso_week().week());
if let Some(weekly) = yw_to_weekly.get(&yw) {
add_edge(adj, key, weekly);
}
}
// Weekly → Monthly (parent)
for (key, yw) in &weeklies {
// A week can span two months; use the Thursday date (ISO week convention)
let thursday = NaiveDate::from_isoywd_opt(yw.0, yw.1, chrono::Weekday::Thu);
if let Some(d) = thursday {
let ym = (d.year(), d.month());
if let Some(monthly) = ym_to_monthly.get(&ym) {
add_edge(adj, key, monthly);
}
}
}
// Prev/next within each level
for pair in sessions.windows(2) {
add_edge(adj, &pair[0].key, &pair[1].key);
}
for pair in dailies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
for pair in weeklies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
for pair in monthlies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
}
/// Label propagation community detection. /// Label propagation community detection.
/// ///
/// Each node starts with its own label. Each iteration: adopt the most /// Each node starts with its own label. Each iteration: adopt the most

32
poc-memory/src/lib.rs Normal file
View file

@ -0,0 +1,32 @@
// poc-memory library — shared modules for all binaries
//
// Re-exports modules so that memory-search and other binaries
// can call library functions directly instead of shelling out.
// Core infrastructure
pub mod config;
pub mod store;
pub mod util;
pub mod graph;
pub mod search;
pub mod similarity;
pub mod spectral;
pub mod lookups;
pub mod query;
pub mod transcript;
pub mod neuro;
pub mod counters;
// Agent layer (LLM-powered operations)
pub mod agents;
pub mod tui;
// Re-export agent submodules at crate root for backwards compatibility
pub use agents::{
llm, audit, consolidate, knowledge,
enrich, fact_mine, digest, daemon,
};
pub mod memory_capnp {
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
}

View file

@ -197,3 +197,8 @@ pub fn dump_resolved(date: &str, keys: &[String]) -> Result<Vec<(String, u32)>,
Ok(resolved) Ok(resolved)
} }
/// Hash a key (exposed for testing/external use).
pub fn hash_key(key: &str) -> u64 {
fnv1a(key)
}

2636
poc-memory/src/main.rs Normal file

File diff suppressed because it is too large Load diff

368
poc-memory/src/migrate.rs Normal file
View file

@ -0,0 +1,368 @@
// Migration from old weights.json + markdown marker system
//
// Reads:
// ~/.claude/memory/weights.json (1,874 entries with metrics)
// ~/.claude/memory/*.md (content + mem markers + edges)
//
// Emits:
// ~/.claude/memory/nodes.capnp (all nodes with preserved metadata)
// ~/.claude/memory/relations.capnp (all edges from markers + md links)
// ~/.claude/memory/state.json (derived cache)
//
// Old files are preserved as backup. Run once.
use crate::store::{
self, Store, Node, NodeType, RelationType,
parse_units, new_relation,
};
use serde::Deserialize;
use uuid::Uuid;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
fn home() -> PathBuf {
PathBuf::from(env::var("HOME").expect("HOME not set"))
}
// Old system data structures (just enough for deserialization)
#[derive(Deserialize)]
struct OldStore {
#[serde(default)]
entries: HashMap<String, OldEntry>,
#[serde(default)]
retrieval_log: Vec<OldRetrievalEvent>,
#[serde(default)]
params: OldParams,
}
#[derive(Deserialize)]
#[allow(dead_code)] // fields needed for deserialization of old format
struct OldEntry {
weight: f64,
created: String,
#[serde(default)]
last_retrieved: Option<String>,
#[serde(default)]
last_used: Option<String>,
#[serde(default)]
retrievals: u32,
#[serde(default)]
uses: u32,
#[serde(default)]
wrongs: u32,
#[serde(default = "default_category")]
category: String,
}
fn default_category() -> String { "General".to_string() }
#[derive(Deserialize)]
struct OldRetrievalEvent {
query: String,
timestamp: String,
results: Vec<String>,
#[serde(default)]
used: Option<Vec<String>>,
}
#[derive(Deserialize)]
struct OldParams {
#[serde(default = "default_0_7")]
default_weight: f64,
#[serde(default = "default_0_95")]
decay_factor: f64,
#[serde(default = "default_0_15")]
use_boost: f64,
#[serde(default = "default_0_1")]
prune_threshold: f64,
#[serde(default = "default_0_3")]
edge_decay: f64,
#[serde(default = "default_3")]
max_hops: u32,
#[serde(default = "default_0_05")]
min_activation: f64,
}
impl Default for OldParams {
fn default() -> Self {
OldParams {
default_weight: 0.7,
decay_factor: 0.95,
use_boost: 0.15,
prune_threshold: 0.1,
edge_decay: 0.3,
max_hops: 3,
min_activation: 0.05,
}
}
}
fn default_0_7() -> f64 { 0.7 }
fn default_0_95() -> f64 { 0.95 }
fn default_0_15() -> f64 { 0.15 }
fn default_0_1() -> f64 { 0.1 }
fn default_0_3() -> f64 { 0.3 }
fn default_3() -> u32 { 3 }
fn default_0_05() -> f64 { 0.05 }
pub fn migrate() -> Result<(), String> {
let weights_path = home().join(".claude/memory/weights.json");
let memory_dir = home().join(".claude/memory");
let nodes_path = memory_dir.join("nodes.capnp");
let rels_path = memory_dir.join("relations.capnp");
// Safety check
if nodes_path.exists() || rels_path.exists() {
return Err("nodes.capnp or relations.capnp already exist. \
Remove them first if you want to re-migrate.".into());
}
// Load old store
let old_store: OldStore = if weights_path.exists() {
let data = fs::read_to_string(&weights_path)
.map_err(|e| format!("read weights.json: {}", e))?;
serde_json::from_str(&data)
.map_err(|e| format!("parse weights.json: {}", e))?
} else {
eprintln!("Warning: no weights.json found, migrating markdown only");
OldStore {
entries: HashMap::new(),
retrieval_log: Vec::new(),
params: OldParams::default(),
}
};
eprintln!("Old store: {} entries, {} retrieval events",
old_store.entries.len(), old_store.retrieval_log.len());
// Scan markdown files to get content + edges
let mut units_by_key: HashMap<String, store::MemoryUnit> = HashMap::new();
scan_markdown_dir(&memory_dir, &mut units_by_key)?;
eprintln!("Scanned {} markdown units", units_by_key.len());
// Create new store
let mut store = Store::default();
// Migrate params
store.params.default_weight = old_store.params.default_weight;
store.params.decay_factor = old_store.params.decay_factor;
store.params.use_boost = old_store.params.use_boost;
store.params.prune_threshold = old_store.params.prune_threshold;
store.params.edge_decay = old_store.params.edge_decay;
store.params.max_hops = old_store.params.max_hops;
store.params.min_activation = old_store.params.min_activation;
// Migrate retrieval log
store.retrieval_log = old_store.retrieval_log.iter().map(|e| {
store::RetrievalEvent {
query: e.query.clone(),
timestamp: e.timestamp.clone(),
results: e.results.clone(),
used: e.used.clone(),
}
}).collect();
// Phase 1: Create nodes
// Merge old entries (weight metadata) with markdown units (content)
let mut all_nodes: Vec<Node> = Vec::new();
let mut key_to_uuid: HashMap<String, [u8; 16]> = HashMap::new();
// First, all entries from the old store
for (key, old_entry) in &old_store.entries {
let uuid = *Uuid::new_v4().as_bytes();
key_to_uuid.insert(key.clone(), uuid);
let content = units_by_key.get(key)
.map(|u| u.content.clone())
.unwrap_or_default();
let state_tag = units_by_key.get(key)
.and_then(|u| u.state.clone())
.unwrap_or_default();
let node = Node {
uuid,
version: 1,
timestamp: store::now_epoch(),
node_type: if key.contains("journal") {
NodeType::EpisodicSession
} else {
NodeType::Semantic
},
provenance: "manual".to_string(),
key: key.clone(),
content,
weight: old_entry.weight as f32,
emotion: 0.0,
deleted: false,
source_ref: String::new(),
created: old_entry.created.clone(),
retrievals: old_entry.retrievals,
uses: old_entry.uses,
wrongs: old_entry.wrongs,
state_tag,
last_replayed: 0,
spaced_repetition_interval: 1,
position: 0,
created_at: 0,
community_id: None,
clustering_coefficient: None,
degree: None,
};
all_nodes.push(node);
}
// Then, any markdown units not in the old store
for (key, unit) in &units_by_key {
if key_to_uuid.contains_key(key) { continue; }
let uuid = *Uuid::new_v4().as_bytes();
key_to_uuid.insert(key.clone(), uuid);
let node = Node {
uuid,
version: 1,
timestamp: store::now_epoch(),
node_type: if key.contains("journal") {
NodeType::EpisodicSession
} else {
NodeType::Semantic
},
provenance: "manual".to_string(),
key: key.clone(),
content: unit.content.clone(),
weight: 0.7,
emotion: 0.0,
deleted: false,
source_ref: String::new(),
created: String::new(),
retrievals: 0,
uses: 0,
wrongs: 0,
state_tag: unit.state.clone().unwrap_or_default(),
last_replayed: 0,
spaced_repetition_interval: 1,
position: 0,
created_at: 0,
community_id: None,
clustering_coefficient: None,
degree: None,
};
all_nodes.push(node);
}
// Write nodes to capnp log
store.append_nodes(&all_nodes)?;
for node in &all_nodes {
store.uuid_to_key.insert(node.uuid, node.key.clone());
store.nodes.insert(node.key.clone(), node.clone());
}
eprintln!("Migrated {} nodes", all_nodes.len());
// Phase 2: Create relations from markdown links + causal edges
let mut all_relations = Vec::new();
for (key, unit) in &units_by_key {
let source_uuid = match key_to_uuid.get(key) {
Some(u) => *u,
None => continue,
};
// Association links (bidirectional)
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
let target_uuid = match key_to_uuid.get(link) {
Some(u) => *u,
None => continue,
};
// Avoid duplicate relations
let exists = all_relations.iter().any(|r: &store::Relation|
(r.source == source_uuid && r.target == target_uuid) ||
(r.source == target_uuid && r.target == source_uuid));
if exists { continue; }
all_relations.push(new_relation(
source_uuid, target_uuid,
RelationType::Link, 1.0,
key, link,
));
}
// Causal edges (directed)
for cause in &unit.causes {
let cause_uuid = match key_to_uuid.get(cause) {
Some(u) => *u,
None => continue,
};
all_relations.push(new_relation(
cause_uuid, source_uuid,
RelationType::Causal, 1.0,
cause, key,
));
}
}
// Write relations to capnp log
store.append_relations(&all_relations)?;
store.relations = all_relations;
eprintln!("Migrated {} relations", store.relations.len());
// Phase 3: Compute graph metrics
store.update_graph_metrics();
// Save derived cache
store.save()?;
eprintln!("Migration complete. Files:");
eprintln!(" {}", nodes_path.display());
eprintln!(" {}", rels_path.display());
eprintln!(" {}", memory_dir.join("state.json").display());
// Verify
let g = store.build_graph();
eprintln!("\nVerification:");
eprintln!(" Nodes: {}", store.nodes.len());
eprintln!(" Relations: {}", store.relations.len());
eprintln!(" Graph edges: {}", g.edge_count());
eprintln!(" Communities: {}", g.community_count());
eprintln!(" Avg CC: {:.4}", g.avg_clustering_coefficient());
Ok(())
}
fn scan_markdown_dir(
dir: &Path,
units: &mut HashMap<String, store::MemoryUnit>,
) -> Result<(), String> {
let entries = fs::read_dir(dir)
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
scan_markdown_dir(&path, units)?;
continue;
}
let Some(ext) = path.extension() else { continue };
if ext != "md" { continue }
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = match fs::read_to_string(&path) {
Ok(c) => c,
Err(_) => continue,
};
for unit in parse_units(&filename, &content) {
units.insert(unit.key.clone(), unit);
}
}
Ok(())
}

View file

@ -65,7 +65,7 @@ pub fn differentiate_hub(store: &Store, hub_key: &str) -> Option<Vec<LinkMove>>
} }
/// Like differentiate_hub but uses a pre-built graph. /// Like differentiate_hub but uses a pre-built graph.
fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph) -> Option<Vec<LinkMove>> { pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph) -> Option<Vec<LinkMove>> {
let degree = graph.degree(hub_key); let degree = graph.degree(hub_key);
// Only differentiate actual hubs // Only differentiate actual hubs

View file

@ -163,70 +163,36 @@ pub fn detect_interference(
.collect() .collect()
} }
/// Agent allocation from the control loop. /// Agent allocation from the control loop
/// Agent types and counts are data-driven — add agents by adding
/// entries to the counts map.
#[derive(Default)] #[derive(Default)]
pub struct ConsolidationPlan { pub struct ConsolidationPlan {
/// agent_name → run count pub replay_count: usize,
pub counts: std::collections::HashMap<String, usize>, pub linker_count: usize,
pub separator_count: usize,
pub transfer_count: usize,
pub run_health: bool, pub run_health: bool,
pub rationale: Vec<String>, pub rationale: Vec<String>,
} }
impl ConsolidationPlan { impl ConsolidationPlan {
pub fn count(&self, agent: &str) -> usize {
self.counts.get(agent).copied().unwrap_or(0)
}
pub fn set(&mut self, agent: &str, count: usize) {
self.counts.insert(agent.to_string(), count);
}
pub fn add(&mut self, agent: &str, count: usize) {
*self.counts.entry(agent.to_string()).or_default() += count;
}
pub fn total(&self) -> usize {
self.counts.values().sum::<usize>() + if self.run_health { 1 } else { 0 }
}
/// Expand the plan into a flat list of (agent_name, batch_size) runs. /// Expand the plan into a flat list of (agent_name, batch_size) runs.
/// Interleaves agent types so different types alternate. pub fn to_agent_runs(&self, batch_size: usize) -> Vec<(&'static str, usize)> {
pub fn to_agent_runs(&self, batch_size: usize) -> Vec<(String, usize)> {
let mut runs = Vec::new(); let mut runs = Vec::new();
if self.run_health { if self.run_health {
runs.push(("health".to_string(), 0)); runs.push(("health", 0));
} }
for (name, count) in [
// Sort by count descending so high-volume agents interleave well ("replay", self.replay_count),
let mut types: Vec<(&String, &usize)> = self.counts.iter() ("linker", self.linker_count),
.filter(|(_, c)| **c > 0) ("separator", self.separator_count),
.collect(); ("transfer", self.transfer_count),
types.sort_by(|a, b| b.1.cmp(a.1)); ] {
let mut remaining = count;
let mut queues: Vec<Vec<(String, usize)>> = types.iter().map(|(name, count)| {
let mut q = Vec::new();
let mut remaining = **count;
while remaining > 0 { while remaining > 0 {
let batch = remaining.min(batch_size); let batch = remaining.min(batch_size);
q.push((name.to_string(), batch)); runs.push((name, batch));
remaining -= batch; remaining -= batch;
} }
q
}).collect();
// Round-robin interleave
loop {
let mut added = false;
for q in &mut queues {
if let Some(run) = q.first() {
runs.push(run.clone());
q.remove(0);
added = true;
}
}
if !added { break; }
} }
runs runs
} }
@ -249,7 +215,7 @@ fn consolidation_plan_inner(store: &Store, detect_interf: bool) -> Consolidation
let graph = store.build_graph(); let graph = store.build_graph();
let alpha = graph.degree_power_law_exponent(); let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini(); let gini = graph.degree_gini();
let _avg_cc = graph.avg_clustering_coefficient(); let avg_cc = graph.avg_clustering_coefficient();
let interference_count = if detect_interf { let interference_count = if detect_interf {
detect_interference(store, &graph, 0.5).len() detect_interference(store, &graph, 0.5).len()
} else { } else {
@ -259,112 +225,87 @@ fn consolidation_plan_inner(store: &Store, detect_interf: bool) -> Consolidation
let episodic_count = store.nodes.iter() let episodic_count = store.nodes.iter()
.filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession)) .filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
.count(); .count();
let _episodic_ratio = if store.nodes.is_empty() { 0.0 } let episodic_ratio = if store.nodes.is_empty() { 0.0 }
else { episodic_count as f32 / store.nodes.len() as f32 }; else { episodic_count as f32 / store.nodes.len() as f32 };
let mut plan = ConsolidationPlan { let mut plan = ConsolidationPlan {
counts: std::collections::HashMap::new(), replay_count: 0,
linker_count: 0,
separator_count: 0,
transfer_count: 0,
run_health: true, run_health: true,
rationale: Vec::new(), rationale: Vec::new(),
}; };
// Active agent types from config
let config = crate::config::get();
let agent_types: Vec<&str> = config.agent_types.iter().map(|s| s.as_str()).collect();
// Target: α ≥ 2.5 (healthy scale-free) // Target: α ≥ 2.5 (healthy scale-free)
if alpha < 2.0 { if alpha < 2.0 {
plan.add("linker", 100); plan.replay_count += 10;
plan.linker_count += 5;
plan.rationale.push(format!( plan.rationale.push(format!(
"α={:.2} (target ≥2.5): extreme hub dominance → 100 linker", alpha)); "α={:.2} (target ≥2.5): extreme hub dominance → 10 replay + 5 linker",
alpha));
} else if alpha < 2.5 { } else if alpha < 2.5 {
plan.add("linker", 50); plan.replay_count += 5;
plan.linker_count += 3;
plan.rationale.push(format!( plan.rationale.push(format!(
"α={:.2} (target ≥2.5): moderate hub dominance → 50 linker", alpha)); "α={:.2} (target ≥2.5): moderate hub dominance → 5 replay + 3 linker",
alpha));
} else { } else {
plan.add("linker", 20); plan.replay_count += 3;
plan.rationale.push(format!( plan.rationale.push(format!(
"α={:.2}: healthy — 20 linker for maintenance", alpha)); "α={:.2}: healthy — 3 replay for maintenance", alpha));
} }
// Target: Gini ≤ 0.4 // Target: Gini ≤ 0.4
if gini > 0.5 { if gini > 0.5 {
plan.add("linker", 50); plan.replay_count += 3;
plan.rationale.push(format!( plan.rationale.push(format!(
"Gini={:.3} (target ≤0.4): high inequality → +50 linker", gini)); "Gini={:.3} (target ≤0.4): high inequality → +3 replay",
gini));
} }
// Interference: separator disambiguates confusable nodes // Target: avg CC ≥ 0.2
if avg_cc < 0.1 {
plan.replay_count += 5;
plan.rationale.push(format!(
"CC={:.3} (target ≥0.2): very poor integration → +5 replay",
avg_cc));
} else if avg_cc < 0.2 {
plan.replay_count += 2;
plan.rationale.push(format!(
"CC={:.3} (target ≥0.2): low integration → +2 replay",
avg_cc));
}
// Interference: >100 pairs is a lot, <10 is clean
if interference_count > 100 { if interference_count > 100 {
plan.add("separator", 10); plan.separator_count += 10;
plan.rationale.push(format!( plan.rationale.push(format!(
"Interference: {} pairs (target <50) → 10 separator", interference_count)); "Interference: {} pairs (target <50) → 10 separator",
interference_count));
} else if interference_count > 20 { } else if interference_count > 20 {
plan.add("separator", 5); plan.separator_count += 5;
plan.rationale.push(format!( plan.rationale.push(format!(
"Interference: {} pairs → 5 separator", interference_count)); "Interference: {} pairs (target <50) → 5 separator",
interference_count));
} else if interference_count > 0 { } else if interference_count > 0 {
plan.add("separator", interference_count.min(3)); plan.separator_count += interference_count.min(3);
plan.rationale.push(format!(
"Interference: {} pairs → {} separator",
interference_count, plan.separator_count));
} }
// Organize: proportional to linker — synthesizes what linker connects // Episodic → semantic transfer
let linker = plan.count("linker"); if episodic_ratio > 0.6 {
plan.set("organize", linker / 2); plan.transfer_count += 10;
plan.rationale.push(format!(
"Organize: {} (half of linker count)", plan.count("organize")));
// Distill: core concept maintenance
let organize = plan.count("organize");
let mut distill = organize;
if gini > 0.4 { distill += 20; }
if alpha < 2.0 { distill += 20; }
plan.set("distill", distill);
plan.rationale.push(format!(
"Distill: {} (synthesize hub content)", plan.count("distill")));
// Split: handle oversized nodes
plan.set("split", 5);
// Distribute agent budget using Elo ratings
let budget = crate::config::get().agent_budget;
let elo_path = crate::config::get().data_dir.join("agent-elo.json");
if let Ok(elo_json) = std::fs::read_to_string(&elo_path) {
if let Ok(ratings) = serde_json::from_str::<std::collections::HashMap<String, f64>>(&elo_json) {
let elos: Vec<f64> = agent_types.iter()
.map(|t| ratings.get(*t).copied().unwrap_or(1000.0))
.collect();
let min_elo = elos.iter().copied().fold(f64::MAX, f64::min);
let weights: Vec<f64> = elos.iter()
.map(|e| {
let shifted = e - min_elo + 50.0;
shifted * shifted
})
.collect();
let total_weight: f64 = weights.iter().sum();
let allocate = |w: f64| -> usize {
((w / total_weight * budget as f64).round() as usize).max(2)
};
for (i, agent) in agent_types.iter().enumerate() {
plan.set(agent, allocate(weights[i]));
}
let summary: Vec<String> = agent_types.iter()
.map(|a| format!("{}={}", a, plan.count(a)))
.collect();
plan.rationale.push(format!(
"Elo allocation (budget={}): {}", budget, summary.join(" ")));
}
} else {
// No Elo file — use budget with equal distribution
let per_type = budget / agent_types.len();
for agent in &agent_types {
plan.set(agent, per_type);
}
plan.rationale.push(format!( plan.rationale.push(format!(
"No Elo ratings — equal distribution ({} each, budget={})", per_type, budget)); "Episodic ratio: {:.0}% ({}/{}) → 10 transfer",
episodic_ratio * 100.0, episodic_count, store.nodes.len()));
} else if episodic_ratio > 0.4 {
plan.transfer_count += 5;
plan.rationale.push(format!(
"Episodic ratio: {:.0}% → 5 transfer",
episodic_ratio * 100.0));
} }
plan plan
@ -381,19 +322,34 @@ pub fn format_plan(plan: &ConsolidationPlan) -> String {
out.push_str("\nAgent allocation:\n"); out.push_str("\nAgent allocation:\n");
if plan.run_health { if plan.run_health {
out.push_str(" 1. health — system audit\n"); out.push_str(" 1. health — system audit\n");
} }
let mut step = 2; let mut step = 2;
let mut sorted: Vec<_> = plan.counts.iter() if plan.replay_count > 0 {
.filter(|(_, c)| **c > 0) out.push_str(&format!(" {}. replay ×{:2} — schema assimilation + lateral linking\n",
.collect(); step, plan.replay_count));
sorted.sort_by(|a, b| b.1.cmp(a.1));
for (agent, count) in &sorted {
out.push_str(&format!(" {}. {} ×{}\n", step, agent, count));
step += 1; step += 1;
} }
if plan.linker_count > 0 {
out.push_str(&format!(" {}. linker ×{:2} — relational binding from episodes\n",
step, plan.linker_count));
step += 1;
}
if plan.separator_count > 0 {
out.push_str(&format!(" {}. separator ×{} — pattern separation\n",
step, plan.separator_count));
step += 1;
}
if plan.transfer_count > 0 {
out.push_str(&format!(" {}. transfer ×{:2} — episodic→semantic extraction\n",
step, plan.transfer_count));
}
let total = plan.replay_count + plan.linker_count
+ plan.separator_count + plan.transfer_count
+ if plan.run_health { 1 } else { 0 };
out.push_str(&format!("\nTotal agent runs: {}\n", total));
out.push_str(&format!("\nTotal agent runs: {}\n", plan.total()));
out out
} }

View file

@ -63,8 +63,6 @@ pub enum Stage {
Limit(usize), Limit(usize),
Select(Vec<String>), Select(Vec<String>),
Count, Count,
Connectivity,
DominatingSet,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -90,8 +88,6 @@ peg::parser! {
/ "limit" _ n:integer() { Stage::Limit(n) } / "limit" _ n:integer() { Stage::Limit(n) }
/ "select" _ f:field_list() { Stage::Select(f) } / "select" _ f:field_list() { Stage::Select(f) }
/ "count" { Stage::Count } / "count" { Stage::Count }
/ "connectivity" { Stage::Connectivity }
/ "dominating-set" { Stage::DominatingSet }
rule asc_desc() -> bool rule asc_desc() -> bool
= "asc" { true } = "asc" { true }
@ -129,7 +125,7 @@ peg::parser! {
= "WHERE" _ e:expr() { e } = "WHERE" _ e:expr() { e }
rule field() -> String rule field() -> String
= s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-']*) { = s:$(['a'..='z' | 'A'..='Z' | '_']['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*) {
s.to_string() s.to_string()
} }
@ -182,8 +178,7 @@ fn resolve_field(field: &str, key: &str, store: &Store, graph: &Graph) -> Option
"retrievals" => Some(Value::Num(node.retrievals as f64)), "retrievals" => Some(Value::Num(node.retrievals as f64)),
"uses" => Some(Value::Num(node.uses as f64)), "uses" => Some(Value::Num(node.uses as f64)),
"wrongs" => Some(Value::Num(node.wrongs as f64)), "wrongs" => Some(Value::Num(node.wrongs as f64)),
"created" => Some(Value::Num(node.created_at as f64)), "created" => Some(Value::Str(node.created.clone())),
"timestamp" => Some(Value::Num(node.timestamp as f64)),
"content" => Some(Value::Str(node.content.clone())), "content" => Some(Value::Str(node.content.clone())),
"degree" => Some(Value::Num(graph.degree(key) as f64)), "degree" => Some(Value::Num(graph.degree(key) as f64)),
"community_id" => { "community_id" => {
@ -393,10 +388,11 @@ fn execute_parsed(
for r in &mut results { for r in &mut results {
for f in &needed { for f in &needed {
if !r.fields.contains_key(f) if !r.fields.contains_key(f) {
&& let Some(v) = resolve_field(f, &r.key, store, graph) { if let Some(v) = resolve_field(f, &r.key, store, graph) {
r.fields.insert(f.clone(), v); r.fields.insert(f.clone(), v);
} }
}
} }
} }
@ -424,17 +420,7 @@ fn execute_parsed(
Stage::Limit(n) => { Stage::Limit(n) => {
results.truncate(*n); results.truncate(*n);
} }
Stage::Connectivity => {} // handled in output
Stage::Select(_) | Stage::Count => {} // handled in output Stage::Select(_) | Stage::Count => {} // handled in output
Stage::DominatingSet => {
let mut items: Vec<(String, f64)> = results.iter()
.map(|r| (r.key.clone(), graph.degree(&r.key) as f64))
.collect();
let xform = super::engine::Transform::DominatingSet;
items = super::engine::run_transform(&xform, items, store, graph);
let keep: std::collections::HashSet<String> = items.into_iter().map(|(k, _)| k).collect();
results.retain(|r| keep.contains(&r.key));
}
} }
} }
@ -484,12 +470,6 @@ pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), St
return Ok(()); return Ok(());
} }
// Connectivity stage
if q.stages.iter().any(|s| matches!(s, Stage::Connectivity)) {
print_connectivity(&results, graph);
return Ok(());
}
// Select stage // Select stage
let fields: Option<&Vec<String>> = q.stages.iter().find_map(|s| match s { let fields: Option<&Vec<String>> = q.stages.iter().find_map(|s| match s {
Stage::Select(f) => Some(f), Stage::Select(f) => Some(f),
@ -519,164 +499,3 @@ pub fn run_query(store: &Store, graph: &Graph, query_str: &str) -> Result<(), St
Ok(()) Ok(())
} }
/// Run a query and return the output as a string (for tool calls).
pub fn query_to_string(store: &Store, graph: &Graph, query_str: &str) -> Result<String, String> {
let q = query_parser::query(query_str)
.map_err(|e| format!("Parse error: {}", e))?;
let results = execute_parsed(store, graph, &q)?;
if q.stages.iter().any(|s| matches!(s, Stage::Count)) {
return Ok(results.len().to_string());
}
if results.is_empty() {
return Ok("no results".to_string());
}
let fields: Option<&Vec<String>> = q.stages.iter().find_map(|s| match s {
Stage::Select(f) => Some(f),
_ => None,
});
let mut out = String::new();
if let Some(fields) = fields {
let mut header = vec!["key".to_string()];
header.extend(fields.iter().cloned());
out.push_str(&header.join("\t"));
out.push('\n');
for r in &results {
let mut row = vec![r.key.clone()];
for f in fields {
row.push(match r.fields.get(f) {
Some(v) => format_value(v),
None => "-".to_string(),
});
}
out.push_str(&row.join("\t"));
out.push('\n');
}
} else {
for r in &results {
out.push_str(&r.key);
out.push('\n');
}
}
Ok(out)
}
// -- Connectivity analysis --
/// BFS shortest path between two nodes, max_hops limit.
fn bfs_path(graph: &Graph, from: &str, to: &str, max_hops: usize) -> Option<Vec<String>> {
use std::collections::{VecDeque, HashMap};
if from == to { return Some(vec![from.to_string()]); }
let mut parent: HashMap<String, String> = HashMap::new();
parent.insert(from.to_string(), String::new());
let mut queue: VecDeque<(String, usize)> = VecDeque::new();
queue.push_back((from.to_string(), 0));
while let Some((current, depth)) = queue.pop_front() {
if depth >= max_hops { continue; }
for (neighbor, _) in graph.neighbors(&current) {
if parent.contains_key(neighbor.as_str()) { continue; }
parent.insert(neighbor.clone(), current.clone());
if neighbor == to {
let mut path = vec![to.to_string()];
let mut node = to.to_string();
while let Some(p) = parent.get(&node) {
if p.is_empty() { break; }
path.push(p.clone());
node = p.clone();
}
path.reverse();
return Some(path);
}
queue.push_back((neighbor.clone(), depth + 1));
}
}
None
}
/// Find connected components among result nodes via BFS through the full graph.
fn find_components(keys: &[&str], graph: &Graph, max_hops: usize) -> Vec<Vec<String>> {
use std::collections::HashSet;
let mut assigned: HashSet<&str> = HashSet::new();
let mut components: Vec<Vec<String>> = Vec::new();
for &start in keys {
if assigned.contains(start) { continue; }
let mut component = vec![start.to_string()];
assigned.insert(start);
for &other in keys {
if assigned.contains(other) { continue; }
if bfs_path(graph, start, other, max_hops).is_some() {
component.push(other.to_string());
assigned.insert(other);
}
}
components.push(component);
}
components
}
/// Print connectivity report for query results.
fn print_connectivity(results: &[QueryResult], graph: &Graph) {
let max_hops = 4;
let keys: Vec<&str> = results.iter().map(|r| r.key.as_str()).collect();
let components = find_components(&keys, graph, max_hops);
println!("Connectivity: {} nodes, {} components (max {} hops)\n",
results.len(), components.len(), max_hops);
let result_set: std::collections::HashSet<&str> = keys.iter().copied().collect();
// Find the largest cluster to use as link-add target for islands
let largest_cluster = components.iter()
.max_by_key(|c| c.len())
.and_then(|c| if c.len() > 1 {
// Pick highest-degree node in largest cluster as link target
c.iter().max_by_key(|k| graph.degree(k)).cloned()
} else { None });
let mut islands: Vec<&str> = Vec::new();
for (i, component) in components.iter().enumerate() {
if component.len() == 1 {
println!(" island: {}", component[0]);
islands.push(&component[0]);
} else {
println!(" cluster {} ({} nodes):", i + 1, component.len());
for node in component {
println!(" {} (degree {})", node, graph.degree(node));
}
// Show a sample path between first two nodes
if component.len() >= 2
&& let Some(path) = bfs_path(graph, &component[0], &component[1], max_hops) {
print!(" path: ");
for (j, step) in path.iter().enumerate() {
if j > 0 { print!(""); }
if result_set.contains(step.as_str()) {
print!("{}", step);
} else {
print!("[{}]", step);
}
}
println!();
}
}
}
// Suggest link-add commands for islands
if !islands.is_empty()
&& let Some(ref hub) = largest_cluster {
println!("\nFix islands:");
for island in &islands {
println!(" poc-memory graph link-add {} {}", island, hub);
}
}
}

View file

@ -157,7 +157,6 @@ pub enum Filter {
pub enum Transform { pub enum Transform {
Sort(SortField), Sort(SortField),
Limit(usize), Limit(usize),
DominatingSet,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@ -167,21 +166,6 @@ pub enum SortField {
ContentLen, ContentLen,
Degree, Degree,
Weight, Weight,
Isolation,
Composite(Vec<(ScoreField, f64)>),
}
/// Individual scoring dimensions for composite sorts.
/// Each computes a 0.0-1.0 score per node.
#[derive(Clone, Debug)]
pub enum ScoreField {
Isolation,
Degree,
Weight,
ContentLen,
Priority,
/// Time since last visit by named agent. 1.0 = never visited, decays toward 0.
Recency(String),
} }
/// Numeric comparison operator. /// Numeric comparison operator.
@ -243,111 +227,6 @@ fn parse_duration_or_number(s: &str) -> Result<f64, String> {
} }
} }
/// Parse composite sort: "isolation*0.7+recency(linker)*0.3"
/// Each term is field or field(arg), optionally *weight (default 1.0).
fn parse_composite_sort(s: &str) -> Result<Vec<(ScoreField, f64)>, String> {
let mut terms = Vec::new();
for term in s.split('+') {
let term = term.trim();
let (field_part, weight) = if let Some((f, w)) = term.rsplit_once('*') {
(f, w.parse::<f64>().map_err(|_| format!("bad weight: {}", w))?)
} else {
(term, 1.0)
};
// Parse field, possibly with (arg)
let field = if let Some((name, arg)) = field_part.split_once('(') {
let arg = arg.strip_suffix(')').ok_or("missing ) in sort field")?;
match name {
"recency" => ScoreField::Recency(arg.to_string()),
_ => return Err(format!("unknown parameterized sort field: {}", name)),
}
} else {
match field_part {
"isolation" => ScoreField::Isolation,
"degree" => ScoreField::Degree,
"weight" => ScoreField::Weight,
"content-len" => ScoreField::ContentLen,
"priority" => ScoreField::Priority,
_ => return Err(format!("unknown sort field: {}", field_part)),
}
};
terms.push((field, weight));
}
if terms.is_empty() {
return Err("empty composite sort".into());
}
Ok(terms)
}
/// Compute a 0-1 score for a node on a single dimension.
fn score_field(
field: &ScoreField,
key: &str,
store: &Store,
graph: &Graph,
precomputed: &CompositeCache,
) -> f64 {
match field {
ScoreField::Isolation => {
let comm = graph.communities().get(key).copied().unwrap_or(0);
precomputed.isolation.get(&comm).copied().unwrap_or(1.0) as f64
}
ScoreField::Degree => {
let d = graph.degree(key) as f64;
let max = precomputed.max_degree.max(1.0);
(d / max).min(1.0)
}
ScoreField::Weight => {
store.nodes.get(key).map(|n| n.weight as f64).unwrap_or(0.0)
}
ScoreField::ContentLen => {
let len = store.nodes.get(key).map(|n| n.content.len()).unwrap_or(0) as f64;
let max = precomputed.max_content_len.max(1.0);
(len / max).min(1.0)
}
ScoreField::Priority => {
let p = crate::neuro::consolidation_priority(store, key, graph, None);
// Priority is already roughly 0-1 from the scoring function
p.min(1.0)
}
ScoreField::Recency(agent) => {
let last = store.last_visited(key, agent);
if last == 0 {
1.0 // never visited = highest recency score
} else {
let age = (crate::store::now_epoch() - last) as f64;
// Sigmoid decay: 1.0 at 7+ days, ~0.5 at 1 day, ~0.1 at 1 hour
let hours = age / 3600.0;
1.0 - (-0.03 * hours).exp()
}
}
}
}
/// Cached values for composite scoring (computed once per sort).
struct CompositeCache {
isolation: HashMap<u32, f32>,
max_degree: f64,
max_content_len: f64,
}
impl CompositeCache {
fn build(items: &[(String, f64)], store: &Store, graph: &Graph) -> Self {
let max_degree = items.iter()
.map(|(k, _)| graph.degree(k) as f64)
.fold(0.0f64, f64::max);
let max_content_len = items.iter()
.map(|(k, _)| store.nodes.get(k).map(|n| n.content.len()).unwrap_or(0) as f64)
.fold(0.0f64, f64::max);
Self {
isolation: graph.community_isolation(),
max_degree,
max_content_len,
}
}
}
/// Parse a NodeType from a label. /// Parse a NodeType from a label.
fn parse_node_type(s: &str) -> Result<NodeType, String> { fn parse_node_type(s: &str) -> Result<NodeType, String> {
match s { match s {
@ -378,16 +257,12 @@ impl Stage {
return Ok(Stage::Generator(Generator::All)); return Ok(Stage::Generator(Generator::All));
} }
// Transform: "dominating-set"
if s == "dominating-set" {
return Ok(Stage::Transform(Transform::DominatingSet));
}
// Try algorithm parse first (bare words, no colon) // Try algorithm parse first (bare words, no colon)
if !s.contains(':') if !s.contains(':') {
&& let Ok(algo) = AlgoStage::parse(s) { if let Ok(algo) = AlgoStage::parse(s) {
return Ok(Stage::Algorithm(algo)); return Ok(Stage::Algorithm(algo));
} }
}
// Algorithm with params: "spread,max_hops=4" (contains comma but no colon) // Algorithm with params: "spread,max_hops=4" (contains comma but no colon)
if s.contains(',') && !s.contains(':') { if s.contains(',') && !s.contains(':') {
@ -420,19 +295,13 @@ impl Stage {
agent: value.to_string(), agent: value.to_string(),
}), }),
"sort" => { "sort" => {
// Check for composite sort: field*weight+field*weight+... let field = match value {
let field = if value.contains('+') || value.contains('*') { "priority" => SortField::Priority,
SortField::Composite(parse_composite_sort(value)?) "timestamp" => SortField::Timestamp,
} else { "content-len" => SortField::ContentLen,
match value { "degree" => SortField::Degree,
"priority" => SortField::Priority, "weight" => SortField::Weight,
"timestamp" => SortField::Timestamp, _ => return Err(format!("unknown sort field: {}", value)),
"content-len" => SortField::ContentLen,
"degree" => SortField::Degree,
"weight" => SortField::Weight,
"isolation" => SortField::Isolation,
_ => return Err(format!("unknown sort field: {}", value)),
}
}; };
Stage::Transform(Transform::Sort(field)) Stage::Transform(Transform::Sort(field))
} }
@ -479,7 +348,6 @@ impl fmt::Display for Stage {
Stage::Filter(filt) => write!(f, "{}", filt), Stage::Filter(filt) => write!(f, "{}", filt),
Stage::Transform(Transform::Sort(field)) => write!(f, "sort:{:?}", field), Stage::Transform(Transform::Sort(field)) => write!(f, "sort:{:?}", field),
Stage::Transform(Transform::Limit(n)) => write!(f, "limit:{}", n), Stage::Transform(Transform::Limit(n)) => write!(f, "limit:{}", n),
Stage::Transform(Transform::DominatingSet) => write!(f, "dominating-set"),
Stage::Algorithm(a) => write!(f, "{}", a.algo), Stage::Algorithm(a) => write!(f, "{}", a.algo),
} }
} }
@ -559,7 +427,7 @@ pub fn run_query(
} }
current = match stage { current = match stage {
Stage::Generator(g) => run_generator(g, store), Stage::Generator(gen) => run_generator(gen, store),
Stage::Filter(filt) => { Stage::Filter(filt) => {
current.into_iter() current.into_iter()
@ -595,8 +463,8 @@ pub fn run_query(
current current
} }
fn run_generator(g: &Generator, store: &Store) -> Vec<(String, f64)> { fn run_generator(gen: &Generator, store: &Store) -> Vec<(String, f64)> {
match g { match gen {
Generator::All => { Generator::All => {
store.nodes.iter() store.nodes.iter()
.filter(|(_, n)| !n.deleted) .filter(|(_, n)| !n.deleted)
@ -640,7 +508,7 @@ fn eval_filter(filt: &Filter, key: &str, store: &Store, now: i64) -> bool {
} }
} }
pub fn run_transform( fn run_transform(
xform: &Transform, xform: &Transform,
mut items: Vec<(String, f64)>, mut items: Vec<(String, f64)>,
store: &Store, store: &Store,
@ -673,19 +541,6 @@ pub fn run_transform(
db.cmp(&da) // desc db.cmp(&da) // desc
}); });
} }
SortField::Isolation => {
// Score nodes by their community's isolation.
// Most isolated communities first (highest internal edge ratio).
let iso = graph.community_isolation();
let comms = graph.communities();
items.sort_by(|a, b| {
let ca = comms.get(&a.0).copied().unwrap_or(0);
let cb = comms.get(&b.0).copied().unwrap_or(0);
let sa = iso.get(&ca).copied().unwrap_or(1.0);
let sb = iso.get(&cb).copied().unwrap_or(1.0);
sb.total_cmp(&sa) // most isolated first
});
}
SortField::Priority => { SortField::Priority => {
// Pre-compute priorities to avoid O(n log n) calls // Pre-compute priorities to avoid O(n log n) calls
// inside the sort comparator. // inside the sort comparator.
@ -702,22 +557,6 @@ pub fn run_transform(
pb.total_cmp(&pa) // desc pb.total_cmp(&pa) // desc
}); });
} }
SortField::Composite(terms) => {
let cache = CompositeCache::build(&items, store, graph);
let scores: HashMap<String, f64> = items.iter()
.map(|(key, _)| {
let s: f64 = terms.iter()
.map(|(field, w)| score_field(field, key, store, graph, &cache) * w)
.sum();
(key.clone(), s)
})
.collect();
items.sort_by(|a, b| {
let sa = scores.get(&a.0).copied().unwrap_or(0.0);
let sb = scores.get(&b.0).copied().unwrap_or(0.0);
sb.total_cmp(&sa) // highest composite score first
});
}
} }
items items
} }
@ -725,55 +564,6 @@ pub fn run_transform(
items.truncate(*n); items.truncate(*n);
items items
} }
Transform::DominatingSet => {
// Greedy 3-covering dominating set: pick the node that covers
// the most under-covered neighbors, repeat until every node
// has been covered 3 times (by 3 different selected seeds).
use std::collections::HashMap as HMap;
let input_keys: std::collections::HashSet<String> = items.iter().map(|(k, _)| k.clone()).collect();
let mut cover_count: HMap<String, usize> = items.iter().map(|(k, _)| (k.clone(), 0)).collect();
let mut selected: Vec<(String, f64)> = Vec::new();
let mut selected_set: std::collections::HashSet<String> = std::collections::HashSet::new();
const REQUIRED_COVERAGE: usize = 3;
loop {
// Find the unselected node that covers the most under-covered nodes
let best = items.iter()
.filter(|(k, _)| !selected_set.contains(k.as_str()))
.map(|(k, _)| {
let mut value = 0usize;
// Count self if under-covered
if cover_count.get(k).copied().unwrap_or(0) < REQUIRED_COVERAGE {
value += 1;
}
for (nbr, _) in graph.neighbors(k) {
if input_keys.contains(nbr.as_str())
&& cover_count.get(nbr.as_str()).copied().unwrap_or(0) < REQUIRED_COVERAGE {
value += 1;
}
}
(k.clone(), value)
})
.max_by_key(|(_, v)| *v);
let Some((key, value)) = best else { break };
if value == 0 { break; } // everything covered 3x
// Mark coverage
*cover_count.entry(key.clone()).or_default() += 1;
for (nbr, _) in graph.neighbors(&key) {
if let Some(c) = cover_count.get_mut(nbr.as_str()) {
*c += 1;
}
}
let score = items.iter().find(|(k, _)| k == &key).map(|(_, s)| *s).unwrap_or(1.0);
selected.push((key.clone(), score));
selected_set.insert(key);
}
selected
}
} }
} }
@ -812,7 +602,7 @@ pub fn match_seeds_opts(
key_map.insert(lkey.clone(), (key.to_owned(), weight as f64)); key_map.insert(lkey.clone(), (key.to_owned(), weight as f64));
// Split key on hyphens, underscores, dots, hashes for component matching // Split key on hyphens, underscores, dots, hashes for component matching
for component in lkey.split(['-', '_', '.', '#']) { for component in lkey.split(|c: char| c == '-' || c == '_' || c == '.' || c == '#') {
if component.len() >= 3 { if component.len() >= 3 {
component_map.entry(component.to_owned()) component_map.entry(component.to_owned())
.or_default() .or_default()
@ -831,8 +621,8 @@ pub fn match_seeds_opts(
} }
// Strategy 2: key component match (0.5× weight) — only when explicitly requested // Strategy 2: key component match (0.5× weight) — only when explicitly requested
if component_match if component_match {
&& let Some(matches) = component_map.get(term.as_str()) { if let Some(matches) = component_map.get(term.as_str()) {
for (orig_key, node_weight) in matches { for (orig_key, node_weight) in matches {
let score = term_weight * node_weight * 0.5; let score = term_weight * node_weight * 0.5;
*seed_map.entry(orig_key.clone()).or_insert(0.0) += score; *seed_map.entry(orig_key.clone()).or_insert(0.0) += score;
@ -840,6 +630,7 @@ pub fn match_seeds_opts(
} }
continue; continue;
} }
}
// Strategy 3: content match (0.2× weight) — only when explicitly requested // Strategy 3: content match (0.2× weight) — only when explicitly requested
if content_fallback { if content_fallback {
@ -1377,7 +1168,7 @@ fn run_manifold(
/// sum at each node, and the combined activation map propagates on /// sum at each node, and the combined activation map propagates on
/// the next hop. This creates interference patterns — nodes where /// the next hop. This creates interference patterns — nodes where
/// multiple wavefronts overlap get reinforced and radiate stronger. /// multiple wavefronts overlap get reinforced and radiate stronger.
pub fn spreading_activation( fn spreading_activation(
seeds: &[(String, f64)], seeds: &[(String, f64)],
graph: &Graph, graph: &Graph,
store: &impl StoreView, store: &impl StoreView,
@ -1441,6 +1232,15 @@ pub fn search_weighted(
search_weighted_inner(terms, store, false, 5) search_weighted_inner(terms, store, false, 5)
} }
/// Like search_weighted but with debug output and configurable result count.
pub fn search_weighted_debug(
terms: &BTreeMap<String, f64>,
store: &impl StoreView,
max_results: usize,
) -> Vec<SearchResult> {
search_weighted_inner(terms, store, true, max_results)
}
fn search_weighted_inner( fn search_weighted_inner(
terms: &BTreeMap<String, f64>, terms: &BTreeMap<String, f64>,
store: &impl StoreView, store: &impl StoreView,
@ -1487,3 +1287,41 @@ pub fn search(query: &str, store: &impl StoreView) -> Vec<SearchResult> {
search_weighted(&terms, store) search_weighted(&terms, store)
} }
/// Extract meaningful search terms from natural language.
/// Strips common English stop words, returns up to max_terms words.
pub fn extract_query_terms(text: &str, max_terms: usize) -> String {
const STOP_WORDS: &[&str] = &[
"the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
"have", "has", "had", "will", "would", "could", "should", "can",
"may", "might", "shall", "been", "being", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
"no", "if", "then", "than", "that", "this", "it", "its", "my",
"your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
"what", "how", "why", "when", "where", "about", "just", "let",
"want", "tell", "show", "think", "know", "see", "look", "make",
"get", "go", "some", "any", "all", "very", "really", "also", "too",
"so", "up", "out", "here", "there",
];
text.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
.take(max_terms)
.collect::<Vec<_>>()
.join(" ")
}
/// Format search results as text lines (for hook consumption).
pub fn format_results(results: &[SearchResult]) -> String {
let mut out = String::new();
for (i, r) in results.iter().enumerate().take(5) {
let marker = if r.is_direct { "" } else { " " };
out.push_str(&format!("{}{:2}. [{:.2}/{:.2}] {}",
marker, i + 1, r.activation, r.activation, r.key));
out.push('\n');
if let Some(ref snippet) = r.snippet {
out.push_str(&format!(" {}\n", snippet));
}
}
out
}

View file

@ -8,47 +8,42 @@ use std::collections::HashMap;
/// Minimal Porter stemmer — handles the most common English suffixes. /// Minimal Porter stemmer — handles the most common English suffixes.
/// Not linguistically complete but good enough for similarity matching. /// Not linguistically complete but good enough for similarity matching.
/// Single allocation: works on one String buffer throughout. pub fn stem(word: &str) -> String {
/// let w = word.to_lowercase();
/// If this is still a hot spot, replace the sequential suffix checks
/// with a reversed-suffix trie: single pass from the end of the word
/// matches the longest applicable suffix in O(suffix_len) instead of
/// O(n_rules).
pub(crate) fn stem(word: &str) -> String {
let mut w = word.to_lowercase();
if w.len() <= 3 { return w; } if w.len() <= 3 { return w; }
strip_suffix_inplace(&mut w, "ation", "ate"); let w = strip_suffix(&w, "ation", "ate");
strip_suffix_inplace(&mut w, "ness", ""); let w = strip_suffix(&w, "ness", "");
strip_suffix_inplace(&mut w, "ment", ""); let w = strip_suffix(&w, "ment", "");
strip_suffix_inplace(&mut w, "ting", "t"); let w = strip_suffix(&w, "ting", "t");
strip_suffix_inplace(&mut w, "ling", "l"); let w = strip_suffix(&w, "ling", "l");
strip_suffix_inplace(&mut w, "ring", "r"); let w = strip_suffix(&w, "ring", "r");
strip_suffix_inplace(&mut w, "ning", "n"); let w = strip_suffix(&w, "ning", "n");
strip_suffix_inplace(&mut w, "ding", "d"); let w = strip_suffix(&w, "ding", "d");
strip_suffix_inplace(&mut w, "ping", "p"); let w = strip_suffix(&w, "ping", "p");
strip_suffix_inplace(&mut w, "ging", "g"); let w = strip_suffix(&w, "ging", "g");
strip_suffix_inplace(&mut w, "ying", "y"); let w = strip_suffix(&w, "ying", "y");
strip_suffix_inplace(&mut w, "ied", "y"); let w = strip_suffix(&w, "ied", "y");
strip_suffix_inplace(&mut w, "ies", "y"); let w = strip_suffix(&w, "ies", "y");
strip_suffix_inplace(&mut w, "ing", ""); let w = strip_suffix(&w, "ing", "");
strip_suffix_inplace(&mut w, "ed", ""); let w = strip_suffix(&w, "ed", "");
strip_suffix_inplace(&mut w, "ly", ""); let w = strip_suffix(&w, "ly", "");
strip_suffix_inplace(&mut w, "er", ""); let w = strip_suffix(&w, "er", "");
strip_suffix_inplace(&mut w, "al", ""); let w = strip_suffix(&w, "al", "");
strip_suffix_inplace(&mut w, "s", ""); strip_suffix(&w, "s", "")
w
} }
fn strip_suffix_inplace(word: &mut String, suffix: &str, replacement: &str) { fn strip_suffix(word: &str, suffix: &str, replacement: &str) -> String {
if word.len() > suffix.len() + 2 && word.ends_with(suffix) { if word.len() > suffix.len() + 2 && word.ends_with(suffix) {
word.truncate(word.len() - suffix.len()); let base = &word[..word.len() - suffix.len()];
word.push_str(replacement); format!("{}{}", base, replacement)
} else {
word.to_string()
} }
} }
/// Tokenize and stem a text into a term frequency map /// Tokenize and stem a text into a term frequency map
pub(crate) fn term_frequencies(text: &str) -> HashMap<String, u32> { pub fn term_frequencies(text: &str) -> HashMap<String, u32> {
let mut tf = HashMap::new(); let mut tf = HashMap::new();
for word in text.split(|c: char| !c.is_alphanumeric()) { for word in text.split(|c: char| !c.is_alphanumeric()) {
if word.len() > 2 { if word.len() > 2 {

599
poc-memory/src/spectral.rs Normal file
View file

@ -0,0 +1,599 @@
// Spectral decomposition of the memory graph.
//
// Computes eigenvalues and eigenvectors of the normalized graph Laplacian.
// The eigenvectors provide natural coordinates for each node — connected
// nodes land nearby, communities form clusters, bridges sit between clusters.
//
// The eigenvalue spectrum reveals:
// - Number of connected components (count of zero eigenvalues)
// - Number of natural communities (eigenvalues near zero, before the gap)
// - How well-connected the graph is (Fiedler value = second eigenvalue)
//
// The eigenvectors provide:
// - Spectral coordinates for each node (the embedding)
// - Community membership (sign/magnitude of Fiedler vector)
// - Natural projections (select which eigenvectors to include)
use crate::graph::Graph;
use faer::Mat;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
pub struct SpectralResult {
/// Node keys in index order
pub keys: Vec<String>,
/// Eigenvalues in ascending order
pub eigenvalues: Vec<f64>,
/// Eigenvectors: eigvecs[k] is the k-th eigenvector (ascending eigenvalue order),
/// with eigvecs[k][i] being the value for node keys[i]
pub eigvecs: Vec<Vec<f64>>,
}
/// Per-node spectral embedding, serializable to disk.
#[derive(Serialize, Deserialize)]
pub struct SpectralEmbedding {
/// Number of dimensions (eigenvectors)
pub dims: usize,
/// Eigenvalues for each dimension
pub eigenvalues: Vec<f64>,
/// Node key → coordinate vector
pub coords: HashMap<String, Vec<f64>>,
}
fn embedding_path() -> PathBuf {
crate::store::memory_dir().join("spectral-embedding.json")
}
/// Compute spectral decomposition of the memory graph.
///
/// Returns the smallest `k` eigenvalues and their eigenvectors of the
/// normalized Laplacian L_sym = I - D^{-1/2} A D^{-1/2}.
///
/// We compute the full decomposition (it's only 2000×2000, takes <1s)
/// and return the bottom k.
pub fn decompose(graph: &Graph, k: usize) -> SpectralResult {
// Only include nodes with edges (filter isolates)
let mut keys: Vec<String> = graph.nodes().iter()
.filter(|k| graph.degree(k) > 0)
.cloned()
.collect();
keys.sort();
let n = keys.len();
let isolates = graph.nodes().len() - n;
if isolates > 0 {
eprintln!("note: filtered {} isolated nodes, decomposing {} connected nodes", isolates, n);
}
let key_to_idx: HashMap<&str, usize> = keys.iter()
.enumerate()
.map(|(i, k)| (k.as_str(), i))
.collect();
// Build weighted degree vector and adjacency
let mut degree = vec![0.0f64; n];
let mut adj_entries: Vec<(usize, usize, f64)> = Vec::new();
for (i, key) in keys.iter().enumerate() {
for (neighbor, strength) in graph.neighbors(key) {
if let Some(&j) = key_to_idx.get(neighbor.as_str()) {
if j > i { // each edge once
let w = strength as f64;
adj_entries.push((i, j, w));
degree[i] += w;
degree[j] += w;
}
}
}
}
// Build normalized Laplacian: L_sym = I - D^{-1/2} A D^{-1/2}
let mut laplacian = Mat::<f64>::zeros(n, n);
// Diagonal = 1 for nodes with edges, 0 for isolates
for i in 0..n {
if degree[i] > 0.0 {
laplacian[(i, i)] = 1.0;
}
}
// Off-diagonal: -w / sqrt(d_i * d_j)
for &(i, j, w) in &adj_entries {
if degree[i] > 0.0 && degree[j] > 0.0 {
let val = -w / (degree[i] * degree[j]).sqrt();
laplacian[(i, j)] = val;
laplacian[(j, i)] = val;
}
}
// Eigendecompose
let eig = laplacian.self_adjoint_eigen(faer::Side::Lower)
.expect("eigendecomposition failed");
let s = eig.S();
let u = eig.U();
let mut eigenvalues = Vec::with_capacity(k);
let mut eigvecs = Vec::with_capacity(k);
let s_col = s.column_vector();
// Skip trivial eigenvalues (near-zero = null space from disconnected components).
// The number of zero eigenvalues equals the number of connected components.
let mut start = 0;
while start < n && s_col[start].abs() < 1e-8 {
start += 1;
}
let k = k.min(n.saturating_sub(start));
for col in start..start + k {
eigenvalues.push(s_col[col]);
let mut vec = Vec::with_capacity(n);
for row in 0..n {
vec.push(u[(row, col)]);
}
eigvecs.push(vec);
}
SpectralResult { keys, eigenvalues, eigvecs }
}
/// Print the spectral summary: eigenvalue spectrum, then each axis with
/// its extreme nodes (what the axis "means").
pub fn print_summary(result: &SpectralResult, graph: &Graph) {
let n = result.keys.len();
let k = result.eigenvalues.len();
println!("Spectral Decomposition — {} nodes, {} eigenpairs", n, k);
println!("=========================================\n");
// Compact eigenvalue table
println!("Eigenvalue spectrum:");
for (i, &ev) in result.eigenvalues.iter().enumerate() {
let gap = if i > 0 {
ev - result.eigenvalues[i - 1]
} else {
0.0
};
let gap_bar = if i > 0 {
let bars = (gap * 500.0).min(40.0) as usize;
"#".repeat(bars)
} else {
String::new()
};
println!(" λ_{:<2} = {:.6} {}", i, ev, gap_bar);
}
// Connected components
let near_zero = result.eigenvalues.iter()
.filter(|&&v| v.abs() < 1e-6)
.count();
if near_zero > 1 {
println!("\n {} eigenvalues near 0 = {} disconnected components", near_zero, near_zero);
}
// Each axis: what are the extremes?
println!("\n\nNatural axes of the knowledge space");
println!("====================================");
for axis in 0..k {
let ev = result.eigenvalues[axis];
let vec = &result.eigvecs[axis];
// Sort nodes by their value on this axis
let mut indexed: Vec<(usize, f64)> = vec.iter()
.enumerate()
.map(|(i, &v)| (i, v))
.collect();
indexed.sort_by(|a, b| a.1.total_cmp(&b.1));
// Compute the "spread" — how much this axis differentiates
let min_val = indexed.first().map(|x| x.1).unwrap_or(0.0);
let max_val = indexed.last().map(|x| x.1).unwrap_or(0.0);
println!("\n--- Axis {} (λ={:.6}, range={:.4}) ---", axis, ev, max_val - min_val);
// Show extremes: 5 most negative, 5 most positive
let show = 5;
println!(" Negative pole:");
for &(idx, val) in indexed.iter().take(show) {
let key = &result.keys[idx];
// Shorten key for display: take last component
let short = shorten_key(key);
let deg = graph.degree(key);
let comm = graph.communities().get(key).copied().unwrap_or(999);
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
}
println!(" Positive pole:");
for &(idx, val) in indexed.iter().rev().take(show) {
let key = &result.keys[idx];
let short = shorten_key(key);
let deg = graph.degree(key);
let comm = graph.communities().get(key).copied().unwrap_or(999);
println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short);
}
}
}
/// Shorten a node key for display.
fn shorten_key(key: &str) -> &str {
if key.len() > 60 { &key[..60] } else { key }
}
/// Convert SpectralResult to a per-node embedding (transposing the layout).
pub fn to_embedding(result: &SpectralResult) -> SpectralEmbedding {
let dims = result.eigvecs.len();
let mut coords = HashMap::new();
for (i, key) in result.keys.iter().enumerate() {
let mut vec = Vec::with_capacity(dims);
for d in 0..dims {
vec.push(result.eigvecs[d][i]);
}
coords.insert(key.clone(), vec);
}
SpectralEmbedding {
dims,
eigenvalues: result.eigenvalues.clone(),
coords,
}
}
/// Save embedding to disk.
pub fn save_embedding(emb: &SpectralEmbedding) -> Result<(), String> {
let path = embedding_path();
let json = serde_json::to_string(emb)
.map_err(|e| format!("serialize embedding: {}", e))?;
std::fs::write(&path, json)
.map_err(|e| format!("write {}: {}", path.display(), e))?;
eprintln!("Saved {}-dim embedding for {} nodes to {}",
emb.dims, emb.coords.len(), path.display());
Ok(())
}
/// Load embedding from disk.
pub fn load_embedding() -> Result<SpectralEmbedding, String> {
let path = embedding_path();
let data = std::fs::read_to_string(&path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
serde_json::from_str(&data)
.map_err(|e| format!("parse embedding: {}", e))
}
/// Find the k nearest neighbors to a node in spectral space.
///
/// Uses weighted euclidean distance where each dimension is weighted
/// by 1/eigenvalue — lower eigenvalues (coarser structure) matter more.
pub fn nearest_neighbors(
emb: &SpectralEmbedding,
key: &str,
k: usize,
) -> Vec<(String, f64)> {
let target = match emb.coords.get(key) {
Some(c) => c,
None => return vec![],
};
let weights = eigenvalue_weights(&emb.eigenvalues);
let mut distances: Vec<(String, f64)> = emb.coords.iter()
.filter(|(k, _)| k.as_str() != key)
.map(|(k, coords)| (k.clone(), weighted_distance(target, coords, &weights)))
.collect();
distances.sort_by(|a, b| a.1.total_cmp(&b.1));
distances.truncate(k);
distances
}
/// Find nearest neighbors to a set of seed nodes (multi-seed query).
/// Returns nodes ranked by minimum distance to any seed.
pub fn nearest_to_seeds(
emb: &SpectralEmbedding,
seeds: &[&str],
k: usize,
) -> Vec<(String, f64)> {
nearest_to_seeds_weighted(emb, &seeds.iter().map(|&s| (s, 1.0)).collect::<Vec<_>>(), None, k)
}
/// Find nearest neighbors to weighted seed nodes, using link weights.
///
/// Each seed has a weight (from query term weighting). For candidates
/// directly linked to a seed, the spectral distance is scaled by
/// 1/link_strength — strong links make effective distance shorter.
/// Seed weight scales the contribution: high-weight seeds pull harder.
///
/// Returns (key, effective_distance) sorted by distance ascending.
pub fn nearest_to_seeds_weighted(
emb: &SpectralEmbedding,
seeds: &[(&str, f64)], // (key, seed_weight)
graph: Option<&crate::graph::Graph>,
k: usize,
) -> Vec<(String, f64)> {
let seed_set: HashSet<&str> = seeds.iter().map(|(s, _)| *s).collect();
let seed_data: Vec<(&str, &Vec<f64>, f64)> = seeds.iter()
.filter_map(|(s, w)| {
emb.coords.get(*s)
.filter(|c| c.iter().any(|&v| v.abs() > 1e-12)) // skip degenerate seeds
.map(|c| (*s, c, *w))
})
.collect();
if seed_data.is_empty() {
return vec![];
}
// Build seed→neighbor link strength lookup
let link_strengths: HashMap<(&str, &str), f32> = if let Some(g) = graph {
let mut map = HashMap::new();
for &(seed_key, _) in seeds {
for (neighbor, strength) in g.neighbors(seed_key) {
map.insert((seed_key, neighbor.as_str()), strength);
}
}
map
} else {
HashMap::new()
};
let dim_weights = eigenvalue_weights(&emb.eigenvalues);
let mut distances: Vec<(String, f64)> = emb.coords.iter()
.filter(|(k, coords)| {
!seed_set.contains(k.as_str())
&& coords.iter().any(|&v| v.abs() > 1e-12) // skip degenerate zero-coord nodes
})
.map(|(candidate_key, coords)| {
let min_dist = seed_data.iter()
.map(|(seed_key, sc, seed_weight)| {
let raw_dist = weighted_distance(coords, sc, &dim_weights);
// Scale by link strength if directly connected
let link_scale = link_strengths
.get(&(*seed_key, candidate_key.as_str()))
.map(|&s| 1.0 / (1.0 + s as f64)) // strong link → smaller distance
.unwrap_or(1.0);
raw_dist * link_scale / seed_weight
})
.fold(f64::MAX, f64::min);
(candidate_key.clone(), min_dist)
})
.collect();
distances.sort_by(|a, b| a.1.total_cmp(&b.1));
distances.truncate(k);
distances
}
/// Weighted euclidean distance in spectral space.
/// Dimensions weighted by 1/eigenvalue — coarser structure matters more.
fn weighted_distance(a: &[f64], b: &[f64], weights: &[f64]) -> f64 {
a.iter()
.zip(b.iter())
.zip(weights.iter())
.map(|((&x, &y), &w)| w * (x - y) * (x - y))
.sum::<f64>()
.sqrt()
}
/// Compute eigenvalue-inverse weights for distance calculations.
fn eigenvalue_weights(eigenvalues: &[f64]) -> Vec<f64> {
eigenvalues.iter()
.map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 })
.collect()
}
/// Compute cluster centers (centroids) in spectral space.
pub fn cluster_centers(
emb: &SpectralEmbedding,
communities: &HashMap<String, u32>,
) -> HashMap<u32, Vec<f64>> {
let mut sums: HashMap<u32, (Vec<f64>, usize)> = HashMap::new();
for (key, coords) in &emb.coords {
if let Some(&comm) = communities.get(key) {
let entry = sums.entry(comm)
.or_insert_with(|| (vec![0.0; emb.dims], 0));
for (i, &c) in coords.iter().enumerate() {
entry.0[i] += c;
}
entry.1 += 1;
}
}
sums.into_iter()
.map(|(comm, (sum, count))| {
let center: Vec<f64> = sum.iter()
.map(|s| s / count as f64)
.collect();
(comm, center)
})
.collect()
}
/// Per-node analysis of spectral position relative to communities.
pub struct SpectralPosition {
pub key: String,
pub community: u32,
/// Distance to own community center
pub dist_to_center: f64,
/// Distance to nearest OTHER community center
pub dist_to_nearest: f64,
/// Which community is nearest (other than own)
pub nearest_community: u32,
/// dist_to_center / median_dist_in_community (>1 = outlier)
pub outlier_score: f64,
/// dist_to_center / dist_to_nearest (>1 = between clusters, potential bridge)
pub bridge_score: f64,
}
/// Analyze spectral positions for all nodes.
///
/// Returns positions sorted by outlier_score descending (most displaced first).
pub fn analyze_positions(
emb: &SpectralEmbedding,
communities: &HashMap<String, u32>,
) -> Vec<SpectralPosition> {
let centers = cluster_centers(emb, communities);
let weights = eigenvalue_weights(&emb.eigenvalues);
// Compute distances to own community center
let mut by_community: HashMap<u32, Vec<f64>> = HashMap::new();
let mut node_dists: Vec<(String, u32, f64)> = Vec::new();
for (key, coords) in &emb.coords {
if let Some(&comm) = communities.get(key) {
if let Some(center) = centers.get(&comm) {
let dist = weighted_distance(coords, center, &weights);
by_community.entry(comm).or_default().push(dist);
node_dists.push((key.clone(), comm, dist));
}
}
}
// Median distance per community for outlier scoring
let medians: HashMap<u32, f64> = by_community.into_iter()
.map(|(comm, mut dists)| {
dists.sort_by(|a, b| a.total_cmp(b));
let median = if dists.is_empty() {
1.0
} else if dists.len() % 2 == 0 {
(dists[dists.len() / 2 - 1] + dists[dists.len() / 2]) / 2.0
} else {
dists[dists.len() / 2]
};
(comm, median.max(1e-6))
})
.collect();
let mut positions: Vec<SpectralPosition> = node_dists.into_iter()
.map(|(key, comm, dist_to_center)| {
let coords = &emb.coords[&key];
let (nearest_community, dist_to_nearest) = centers.iter()
.filter(|(&c, _)| c != comm)
.map(|(&c, center)| (c, weighted_distance(coords, center, &weights)))
.min_by(|a, b| a.1.total_cmp(&b.1))
.unwrap_or((comm, f64::MAX));
let median = medians.get(&comm).copied().unwrap_or(1.0);
let outlier_score = dist_to_center / median;
let bridge_score = if dist_to_nearest > 1e-8 {
dist_to_center / dist_to_nearest
} else {
0.0
};
SpectralPosition {
key, community: comm,
dist_to_center, dist_to_nearest, nearest_community,
outlier_score, bridge_score,
}
})
.collect();
positions.sort_by(|a, b| b.outlier_score.total_cmp(&a.outlier_score));
positions
}
/// Find pairs of nodes that are spectrally close but not linked in the graph.
///
/// These are the most valuable candidates for extractor agents —
/// the spectral structure says they should be related, but nobody
/// has articulated why.
pub fn unlinked_neighbors(
emb: &SpectralEmbedding,
linked_pairs: &HashSet<(String, String)>,
max_pairs: usize,
) -> Vec<(String, String, f64)> {
let weights = eigenvalue_weights(&emb.eigenvalues);
let keys: Vec<&String> = emb.coords.keys().collect();
let mut pairs: Vec<(String, String, f64)> = Vec::new();
for (i, k1) in keys.iter().enumerate() {
let c1 = &emb.coords[*k1];
for k2 in keys.iter().skip(i + 1) {
// Skip if already linked
let pair_fwd = ((*k1).clone(), (*k2).clone());
let pair_rev = ((*k2).clone(), (*k1).clone());
if linked_pairs.contains(&pair_fwd) || linked_pairs.contains(&pair_rev) {
continue;
}
let dist = weighted_distance(c1, &emb.coords[*k2], &weights);
pairs.push(((*k1).clone(), (*k2).clone(), dist));
}
}
pairs.sort_by(|a, b| a.2.total_cmp(&b.2));
pairs.truncate(max_pairs);
pairs
}
/// Approximate spectral coordinates for a new node using Nyström extension.
///
/// Given a new node's edges to existing nodes, estimate where it would
/// land in spectral space without recomputing the full decomposition.
/// Uses weighted average of neighbors' coordinates, weighted by edge strength.
pub fn nystrom_project(
emb: &SpectralEmbedding,
neighbors: &[(&str, f32)], // (key, edge_strength)
) -> Option<Vec<f64>> {
let mut weighted_sum = vec![0.0f64; emb.dims];
let mut total_weight = 0.0f64;
for &(key, strength) in neighbors {
if let Some(coords) = emb.coords.get(key) {
let w = strength as f64;
for (i, &c) in coords.iter().enumerate() {
weighted_sum[i] += w * c;
}
total_weight += w;
}
}
if total_weight < 1e-8 {
return None;
}
Some(weighted_sum.iter().map(|s| s / total_weight).collect())
}
/// Classify a spectral position: well-integrated, outlier, bridge, or orphan.
pub fn classify_position(pos: &SpectralPosition) -> &'static str {
if pos.bridge_score > 0.7 {
"bridge" // between two communities
} else if pos.outlier_score > 2.0 {
"outlier" // far from own community center
} else if pos.outlier_score < 0.5 {
"core" // close to community center
} else {
"peripheral" // normal community member
}
}
/// Identify which spectral dimensions a set of nodes load on most heavily.
/// Returns dimension indices sorted by total loading.
pub fn dominant_dimensions(emb: &SpectralEmbedding, keys: &[&str]) -> Vec<(usize, f64)> {
let coords: Vec<&Vec<f64>> = keys.iter()
.filter_map(|k| emb.coords.get(*k))
.collect();
if coords.is_empty() {
return vec![];
}
let mut dim_loading: Vec<(usize, f64)> = (0..emb.dims)
.map(|d| {
let loading: f64 = coords.iter()
.map(|c| c[d].abs())
.sum();
(d, loading)
})
.collect();
dim_loading.sort_by(|a, b| b.1.total_cmp(&a.1));
dim_loading
}

View file

@ -36,7 +36,7 @@ pub use types::{
pub use parse::{MemoryUnit, parse_units}; pub use parse::{MemoryUnit, parse_units};
pub use view::{StoreView, AnyView}; pub use view::{StoreView, AnyView};
pub use persist::fsck; pub use persist::fsck;
pub use ops::current_provenance; pub use persist::strip_md_keys;
use crate::graph::{self, Graph}; use crate::graph::{self, Graph};
@ -47,7 +47,7 @@ use std::path::Path;
use parse::classify_filename; use parse::classify_filename;
/// Strip .md suffix from a key, handling both bare keys and section keys. /// Strip .md suffix from a key, handling both bare keys and section keys.
/// "identity.md" → "identity", "foo.md#section" → "foo#section", "identity" → "identity" /// "journal.md#j-2026" → "journal#j-2026", "identity.md" → "identity", "identity" → "identity"
pub fn strip_md_suffix(key: &str) -> String { pub fn strip_md_suffix(key: &str) -> String {
if let Some((file, section)) = key.split_once('#') { if let Some((file, section)) = key.split_once('#') {
let bare = file.strip_suffix(".md").unwrap_or(file); let bare = file.strip_suffix(".md").unwrap_or(file);

View file

@ -7,13 +7,6 @@ use super::types::*;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
/// Fallback provenance for non-tool-dispatch paths (CLI, digest, etc.).
/// Tool dispatch passes provenance directly through thought::dispatch.
pub fn current_provenance() -> String {
std::env::var("POC_PROVENANCE")
.unwrap_or_else(|_| "manual".to_string())
}
impl Store { impl Store {
/// Add or update a node (appends to log + updates cache). /// Add or update a node (appends to log + updates cache).
/// Holds StoreLock across refresh + check + write to prevent duplicate UUIDs. /// Holds StoreLock across refresh + check + write to prevent duplicate UUIDs.
@ -38,24 +31,15 @@ impl Store {
Ok(()) Ok(())
} }
/// Recent nodes by provenance, sorted newest-first. Returns (key, timestamp).
pub fn recent_by_provenance(&self, provenance: &str, limit: usize) -> Vec<(String, i64)> {
let mut nodes: Vec<_> = self.nodes.values()
.filter(|n| !n.deleted && n.provenance == provenance)
.map(|n| (n.key.clone(), n.timestamp))
.collect();
nodes.sort_by(|a, b| b.1.cmp(&a.1));
nodes.truncate(limit);
nodes
}
/// Upsert a node: update if exists (and content changed), create if not. /// Upsert a node: update if exists (and content changed), create if not.
/// Returns: "created", "updated", or "unchanged". /// Returns: "created", "updated", or "unchanged".
/// ///
/// Provenance is determined by the POC_PROVENANCE env var if set, /// Provenance is determined by the POC_PROVENANCE env var if set,
/// otherwise defaults to Manual. /// otherwise defaults to Manual.
pub fn upsert(&mut self, key: &str, content: &str) -> Result<&'static str, String> { pub fn upsert(&mut self, key: &str, content: &str) -> Result<&'static str, String> {
let prov = current_provenance(); let prov = Provenance::from_env()
.map(|p| p.label().to_string())
.unwrap_or_else(|| "manual".to_string());
self.upsert_provenance(key, content, &prov) self.upsert_provenance(key, content, &prov)
} }
@ -72,7 +56,6 @@ impl Store {
let mut node = existing.clone(); let mut node = existing.clone();
node.content = content.to_string(); node.content = content.to_string();
node.provenance = provenance.to_string(); node.provenance = provenance.to_string();
node.timestamp = now_epoch();
node.version += 1; node.version += 1;
self.append_nodes_unlocked(std::slice::from_ref(&node))?; self.append_nodes_unlocked(std::slice::from_ref(&node))?;
self.nodes.insert(key.to_string(), node); self.nodes.insert(key.to_string(), node);
@ -93,15 +76,11 @@ impl Store {
let _lock = StoreLock::acquire()?; let _lock = StoreLock::acquire()?;
self.refresh_nodes()?; self.refresh_nodes()?;
let prov = current_provenance();
let node = self.nodes.get(key) let node = self.nodes.get(key)
.ok_or_else(|| format!("No node '{}'", key))?; .ok_or_else(|| format!("No node '{}'", key))?;
let mut deleted = node.clone(); let mut deleted = node.clone();
deleted.deleted = true; deleted.deleted = true;
deleted.version += 1; deleted.version += 1;
deleted.provenance = prov;
deleted.timestamp = now_epoch();
self.append_nodes_unlocked(std::slice::from_ref(&deleted))?; self.append_nodes_unlocked(std::slice::from_ref(&deleted))?;
self.nodes.remove(key); self.nodes.remove(key);
Ok(()) Ok(())
@ -130,21 +109,15 @@ impl Store {
.ok_or_else(|| format!("No node '{}'", old_key))? .ok_or_else(|| format!("No node '{}'", old_key))?
.clone(); .clone();
let prov = current_provenance();
// New version under the new key // New version under the new key
let mut renamed = node.clone(); let mut renamed = node.clone();
renamed.key = new_key.to_string(); renamed.key = new_key.to_string();
renamed.version += 1; renamed.version += 1;
renamed.provenance = prov.clone();
renamed.timestamp = now_epoch();
// Deletion record for the old key (same UUID, independent version counter) // Deletion record for the old key (same UUID, independent version counter)
let mut tombstone = node.clone(); let mut tombstone = node.clone();
tombstone.deleted = true; tombstone.deleted = true;
tombstone.version += 1; tombstone.version += 1;
tombstone.provenance = prov;
tombstone.timestamp = now_epoch();
// Collect affected relations and update their debug key strings // Collect affected relations and update their debug key strings
let updated_rels: Vec<_> = self.relations.iter() let updated_rels: Vec<_> = self.relations.iter()
@ -297,99 +270,15 @@ impl Store {
Ok((hubs_capped, to_delete.len())) Ok((hubs_capped, to_delete.len()))
} }
/// Set a node's weight directly. Returns (old, new). /// Update graph-derived fields on all nodes
pub fn set_weight(&mut self, key: &str, weight: f32) -> Result<(f32, f32), String> { pub fn update_graph_metrics(&mut self) {
let weight = weight.clamp(0.01, 1.0); let g = self.build_graph();
let node = self.nodes.get_mut(key) let communities = g.communities();
.ok_or_else(|| format!("node not found: {}", key))?;
let old = node.weight;
node.weight = weight;
Ok((old, weight))
}
/// Update a node's weight with a new score and record the scoring for (key, node) in &mut self.nodes {
/// timestamp. Uses asymmetric smoothing: responds quickly to high node.community_id = communities.get(key).copied();
/// scores (alpha=0.5) but decays slowly on low scores (alpha=0.1). node.clustering_coefficient = Some(g.clustering_coefficient(key));
/// This keeps memories surfaced even if they're only useful 1 in 4 times. node.degree = Some(g.degree(key) as u32);
/// Returns (old_weight, new_weight).
pub fn score_weight(&mut self, key: &str, score: f64) -> Result<(f32, f32), String> {
let node = self.nodes.get_mut(key)
.ok_or_else(|| format!("node not found: {}", key))?;
let old = node.weight;
let alpha = if score > old as f64 { 0.5 } else { 0.1 };
let new = (alpha * score + (1.0 - alpha) * old as f64) as f32;
node.weight = new.clamp(0.01, 1.0);
node.last_scored = chrono::Utc::now().timestamp();
Ok((old, node.weight))
}
/// Set the strength of a link between two nodes. Deduplicates if
/// multiple links exist. Returns the old strength, or error if no link.
pub fn set_link_strength(&mut self, source: &str, target: &str, strength: f32) -> Result<f32, String> {
let strength = strength.clamp(0.01, 1.0);
let mut old = 0.0f32;
let mut found = false;
let mut first = true;
for rel in &mut self.relations {
if rel.deleted { continue; }
if (rel.source_key == source && rel.target_key == target)
|| (rel.source_key == target && rel.target_key == source)
{
if first {
old = rel.strength;
rel.strength = strength;
first = false;
} else {
rel.deleted = true; // deduplicate
}
found = true;
}
} }
if !found {
// Upsert: create the link if it doesn't exist
self.add_link(source, target, "link_set")?;
// Set the strength on the newly created link
for rel in self.relations.iter_mut().rev() {
if !rel.deleted && rel.source_key == source && rel.target_key == target {
rel.strength = strength;
break;
}
}
return Ok(0.0);
}
Ok(old)
}
/// Add a link between two nodes with Jaccard-based initial strength.
/// Returns the strength, or a message if the link already exists.
pub fn add_link(&mut self, source: &str, target: &str, provenance: &str) -> Result<f32, String> {
// Check for existing
let exists = self.relations.iter().any(|r|
!r.deleted &&
((r.source_key == source && r.target_key == target) ||
(r.source_key == target && r.target_key == source)));
if exists {
return Err(format!("link already exists: {}{}", source, target));
}
let source_uuid = self.nodes.get(source)
.map(|n| n.uuid)
.ok_or_else(|| format!("source not found: {}", source))?;
let target_uuid = self.nodes.get(target)
.map(|n| n.uuid)
.ok_or_else(|| format!("target not found: {}", target))?;
let graph = self.build_graph();
let jaccard = graph.jaccard(source, target);
let strength = (jaccard * 3.0).clamp(0.1, 1.0) as f32;
let mut rel = new_relation(
source_uuid, target_uuid,
RelationType::Link, strength,
source, target,
);
rel.provenance = provenance.to_string();
self.add_relation(rel)?;
Ok(strength)
} }
} }

View file

@ -23,7 +23,7 @@ pub struct MemoryUnit {
pub source_ref: Option<String>, pub source_ref: Option<String>,
} }
pub(super) fn classify_filename(filename: &str) -> NodeType { pub fn classify_filename(filename: &str) -> NodeType {
let bare = filename.strip_suffix(".md").unwrap_or(filename); let bare = filename.strip_suffix(".md").unwrap_or(filename);
if bare.starts_with("daily-") { NodeType::EpisodicDaily } if bare.starts_with("daily-") { NodeType::EpisodicDaily }
else if bare.starts_with("weekly-") { NodeType::EpisodicWeekly } else if bare.starts_with("weekly-") { NodeType::EpisodicWeekly }
@ -147,7 +147,7 @@ fn extract_md_links(content: &str, re: &Regex, source_file: &str) -> Vec<String>
.collect() .collect()
} }
fn normalize_link(target: &str, source_file: &str) -> String { pub fn normalize_link(target: &str, source_file: &str) -> String {
let source_bare = source_file.strip_suffix(".md").unwrap_or(source_file); let source_bare = source_file.strip_suffix(".md").unwrap_or(source_file);
if target.starts_with('#') { if target.starts_with('#') {

View file

@ -16,38 +16,10 @@ use capnp::serialize;
use std::collections::HashMap; use std::collections::HashMap;
use std::fs; use std::fs;
use std::io::{BufReader, Seek}; use std::io::{BufReader, BufWriter, Seek};
use std::path::Path; use std::path::Path;
use std::sync::Arc;
/// Process-global cached store. Reloads only when log files change.
static CACHED_STORE: tokio::sync::OnceCell<Arc<tokio::sync::Mutex<Store>>> =
tokio::sync::OnceCell::const_new();
impl Store { impl Store {
/// Get or create the process-global cached store.
/// Reloads from disk if log files have changed since last load.
pub async fn cached() -> Result<Arc<tokio::sync::Mutex<Store>>, String> {
let store = CACHED_STORE.get_or_try_init(|| async {
let s = Store::load()?;
Ok::<_, String>(Arc::new(tokio::sync::Mutex::new(s)))
}).await?;
{
let mut guard = store.lock().await;
if guard.is_stale() {
*guard = Store::load()?;
}
}
Ok(store.clone())
}
/// Check if the on-disk logs have grown since we loaded.
pub fn is_stale(&self) -> bool {
let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0);
nodes_size != self.loaded_nodes_size || rels_size != self.loaded_rels_size
}
/// Load store from state.bin cache if fresh, otherwise rebuild from capnp logs. /// Load store from state.bin cache if fresh, otherwise rebuild from capnp logs.
/// ///
/// Staleness check uses log file sizes (not mtimes). Since logs are /// Staleness check uses log file sizes (not mtimes). Since logs are
@ -63,10 +35,6 @@ impl Store {
if visits_p.exists() { if visits_p.exists() {
store.replay_visits(&visits_p).ok(); store.replay_visits(&visits_p).ok();
} }
let tp_p = transcript_progress_path();
if tp_p.exists() {
store.replay_transcript_progress(&tp_p).ok();
}
return Ok(store); return Ok(store);
}, },
Ok(None) => {}, Ok(None) => {},
@ -81,13 +49,13 @@ impl Store {
let nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0); let nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
let rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0); let rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0);
if let Ok(data) = fs::read(&state_p) if let Ok(data) = fs::read(&state_p) {
&& data.len() >= CACHE_HEADER_LEN && data[..4] == CACHE_MAGIC { if data.len() >= CACHE_HEADER_LEN && data[..4] == CACHE_MAGIC {
let cached_nodes = u64::from_le_bytes(data[4..12].try_into().unwrap()); let cached_nodes = u64::from_le_bytes(data[4..12].try_into().unwrap());
let cached_rels = u64::from_le_bytes(data[12..20].try_into().unwrap()); let cached_rels = u64::from_le_bytes(data[12..20].try_into().unwrap());
if cached_nodes == nodes_size && cached_rels == rels_size if cached_nodes == nodes_size && cached_rels == rels_size {
&& let Ok(mut store) = bincode::deserialize::<Store>(&data[CACHE_HEADER_LEN..]) { if let Ok(mut store) = bincode::deserialize::<Store>(&data[CACHE_HEADER_LEN..]) {
// Rebuild uuid_to_key (skipped by serde) // Rebuild uuid_to_key (skipped by serde)
for (key, node) in &store.nodes { for (key, node) in &store.nodes {
store.uuid_to_key.insert(node.uuid, key.clone()); store.uuid_to_key.insert(node.uuid, key.clone());
@ -95,13 +63,16 @@ impl Store {
store.loaded_nodes_size = nodes_size; store.loaded_nodes_size = nodes_size;
store.loaded_rels_size = rels_size; store.loaded_rels_size = rels_size;
// Bootstrap: write rkyv snapshot if missing // Bootstrap: write rkyv snapshot if missing
if !snapshot_path().exists() if !snapshot_path().exists() {
&& let Err(e) = store.save_snapshot(cached_nodes, cached_rels) { if let Err(e) = store.save_snapshot(cached_nodes, cached_rels) {
eprintln!("rkyv bootstrap: {}", e); eprintln!("rkyv bootstrap: {}", e);
} }
}
return Ok(store); return Ok(store);
} }
}
} }
}
// Stale or no cache — rebuild from capnp logs // Stale or no cache — rebuild from capnp logs
let mut store = Store::default(); let mut store = Store::default();
@ -116,10 +87,6 @@ impl Store {
if visits_p.exists() { if visits_p.exists() {
store.replay_visits(&visits_p)?; store.replay_visits(&visits_p)?;
} }
let tp_p = transcript_progress_path();
if tp_p.exists() {
store.replay_transcript_progress(&tp_p)?;
}
// Record log sizes after replay — this is the state we reflect // Record log sizes after replay — this is the state we reflect
store.loaded_nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0); store.loaded_nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0);
@ -152,10 +119,6 @@ impl Store {
if visits_p.exists() { if visits_p.exists() {
store.replay_visits(&visits_p)?; store.replay_visits(&visits_p)?;
} }
let tp_p = transcript_progress_path();
if tp_p.exists() {
store.replay_transcript_progress(&tp_p)?;
}
Ok(store) Ok(store)
} }
@ -200,7 +163,7 @@ impl Store {
// Report duplicate keys // Report duplicate keys
for (key, uuids) in &key_uuids { for (key, uuids) in &key_uuids {
if uuids.len() > 1 { if uuids.len() > 1 {
dbglog!("WARNING: key '{}' has {} UUIDs (duplicate nodes)", key, uuids.len()); eprintln!("WARNING: key '{}' has {} UUIDs (duplicate nodes)", key, uuids.len());
} }
} }
@ -461,141 +424,6 @@ impl Store {
Ok(()) Ok(())
} }
/// Append transcript segment progress records.
pub fn append_transcript_progress(&mut self, segments: &[TranscriptSegment]) -> Result<(), String> {
if segments.is_empty() { return Ok(()); }
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::transcript_progress_log::Builder>();
let mut list = log.init_segments(segments.len() as u32);
for (i, seg) in segments.iter().enumerate() {
seg.to_capnp(list.reborrow().get(i as u32));
}
}
let mut buf = Vec::new();
serialize::write_message(&mut buf, &msg)
.map_err(|e| format!("serialize transcript progress: {}", e))?;
let path = transcript_progress_path();
let file = fs::OpenOptions::new()
.create(true).append(true).open(&path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
use std::io::Write;
(&file).write_all(&buf)
.map_err(|e| format!("write transcript progress: {}", e))?;
// Update in-memory index
for seg in segments {
self.transcript_progress
.entry((seg.transcript_id.clone(), seg.segment_index))
.or_default()
.insert(seg.agent.clone());
}
Ok(())
}
/// Replay transcript progress log to rebuild in-memory index.
fn replay_transcript_progress(&mut self, path: &Path) -> Result<(), String> {
let file = fs::File::open(path)
.map_err(|e| format!("open {}: {}", path.display(), e))?;
let mut reader = BufReader::new(file);
while reader.stream_position().map_err(|e| e.to_string())?
< fs::metadata(path).map_err(|e| e.to_string())?.len()
{
let msg = match serialize::read_message(&mut reader, Default::default()) {
Ok(m) => m,
Err(_) => break,
};
let log = msg.get_root::<memory_capnp::transcript_progress_log::Reader>()
.map_err(|e| format!("read transcript progress: {}", e))?;
for seg in log.get_segments().map_err(|e| e.to_string())? {
let id = seg.get_transcript_id().ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string();
let agent = seg.get_agent().ok()
.and_then(|t| t.to_str().ok())
.unwrap_or("")
.to_string();
let idx = seg.get_segment_index();
if !id.is_empty() && !agent.is_empty() {
self.transcript_progress
.entry((id, idx))
.or_default()
.insert(agent);
}
}
}
Ok(())
}
/// Migrate old stub-node transcript markers into the new progress log.
/// Reads _observed-transcripts-f-*, _mined-transcripts#f-*, and _facts-* keys,
/// extracts transcript_id and segment_index, writes to transcript-progress.capnp,
/// then deletes the stub nodes.
pub fn migrate_transcript_progress(&mut self) -> Result<usize, String> {
let mut segments = Vec::new();
for key in self.nodes.keys() {
// _observed-transcripts-f-{UUID}.{segment}
if let Some(rest) = key.strip_prefix("_observed-transcripts-f-") {
if let Some((uuid, seg_str)) = rest.rsplit_once('.')
&& let Ok(seg) = seg_str.parse::<u32>() {
segments.push(new_transcript_segment(uuid, seg, "observation"));
}
}
// _mined-transcripts#f-{UUID}.{segment}
else if let Some(rest) = key.strip_prefix("_mined-transcripts#f-") {
if let Some((uuid, seg_str)) = rest.rsplit_once('.')
&& let Ok(seg) = seg_str.parse::<u32>() {
segments.push(new_transcript_segment(uuid, seg, "experience"));
}
}
// _mined-transcripts-f-{UUID}.{segment}
else if let Some(rest) = key.strip_prefix("_mined-transcripts-f-") {
if let Some((uuid, seg_str)) = rest.rsplit_once('.')
&& let Ok(seg) = seg_str.parse::<u32>() {
segments.push(new_transcript_segment(uuid, seg, "experience"));
}
}
// _facts-{UUID} (whole-file, segment 0)
else if let Some(uuid) = key.strip_prefix("_facts-") {
if !uuid.contains('-') || uuid.len() < 30 { continue; } // skip non-UUID
segments.push(new_transcript_segment(uuid, 0, "fact"));
}
}
let count = segments.len();
if count > 0 {
self.append_transcript_progress(&segments)?;
}
// Soft-delete the old stub nodes
let keys_to_delete: Vec<String> = self.nodes.keys()
.filter(|k| k.starts_with("_observed-transcripts-")
|| k.starts_with("_mined-transcripts")
|| (k.starts_with("_facts-") && !k.contains("fact_mine")))
.cloned()
.collect();
for key in &keys_to_delete {
if let Some(node) = self.nodes.get_mut(key) {
node.deleted = true;
}
}
if !keys_to_delete.is_empty() {
self.save()?;
}
Ok(count)
}
/// Record visits for a batch of node keys from a successful agent run. /// Record visits for a batch of node keys from a successful agent run.
pub fn record_agent_visits(&mut self, node_keys: &[String], agent: &str) -> Result<(), String> { pub fn record_agent_visits(&mut self, node_keys: &[String], agent: &str) -> Result<(), String> {
let visits: Vec<AgentVisit> = node_keys.iter() let visits: Vec<AgentVisit> = node_keys.iter()
@ -746,6 +574,138 @@ impl Store {
} }
} }
/// Strip .md suffix from all node keys and relation key strings.
/// Merges duplicates (bare key + .md key) by keeping the latest version.
pub fn strip_md_keys() -> Result<(), String> {
use super::strip_md_suffix;
let mut store = Store::load()?;
let mut renamed_nodes = 0usize;
let mut renamed_rels = 0usize;
let mut merged = 0usize;
// Collect keys that need renaming
let old_keys: Vec<String> = store.nodes.keys()
.filter(|k| k.ends_with(".md") || k.contains(".md#"))
.cloned()
.collect();
for old_key in &old_keys {
let new_key = strip_md_suffix(old_key);
if new_key == *old_key { continue; }
let mut node = store.nodes.remove(old_key).unwrap();
store.uuid_to_key.remove(&node.uuid);
if let Some(existing) = store.nodes.get(&new_key) {
// Merge: keep whichever has the higher version
if existing.version >= node.version {
eprintln!(" merge {}{} (keeping existing v{})",
old_key, new_key, existing.version);
merged += 1;
continue;
}
eprintln!(" merge {}{} (replacing v{} with v{})",
old_key, new_key, existing.version, node.version);
merged += 1;
}
node.key = new_key.clone();
node.version += 1;
store.uuid_to_key.insert(node.uuid, new_key.clone());
store.nodes.insert(new_key, node);
renamed_nodes += 1;
}
// Fix relation key strings
for rel in &mut store.relations {
let new_source = strip_md_suffix(&rel.source_key);
let new_target = strip_md_suffix(&rel.target_key);
if new_source != rel.source_key || new_target != rel.target_key {
rel.source_key = new_source;
rel.target_key = new_target;
rel.version += 1;
renamed_rels += 1;
}
}
if renamed_nodes == 0 && renamed_rels == 0 && merged == 0 {
eprintln!("No .md suffixes found — store is clean");
return Ok(());
}
eprintln!("Renamed {} nodes, {} relations, merged {} duplicates",
renamed_nodes, renamed_rels, merged);
// Write fresh logs from the migrated state
rewrite_store(&store)?;
eprintln!("Store rewritten successfully");
Ok(())
}
/// Rewrite the entire store from scratch (fresh logs + caches).
/// Used after migrations that change keys across all nodes/relations.
fn rewrite_store(store: &Store) -> Result<(), String> {
let _lock = StoreLock::acquire()?;
// Write fresh node log
let nodes: Vec<_> = store.nodes.values().cloned().collect();
let nodes_path = nodes_path();
{
let file = fs::File::create(&nodes_path)
.map_err(|e| format!("create {}: {}", nodes_path.display(), e))?;
let mut writer = BufWriter::new(file);
// Write in chunks to keep message sizes reasonable
for chunk in nodes.chunks(100) {
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::node_log::Builder>();
let mut list = log.init_nodes(chunk.len() as u32);
for (i, node) in chunk.iter().enumerate() {
node.to_capnp(list.reborrow().get(i as u32));
}
}
serialize::write_message(&mut writer, &msg)
.map_err(|e| format!("write nodes: {}", e))?;
}
}
// Write fresh relation log
let rels_path = relations_path();
{
let file = fs::File::create(&rels_path)
.map_err(|e| format!("create {}: {}", rels_path.display(), e))?;
let mut writer = BufWriter::new(file);
let rels: Vec<_> = store.relations.iter().filter(|r| !r.deleted).cloned().collect();
if !rels.is_empty() {
for chunk in rels.chunks(100) {
let mut msg = message::Builder::new_default();
{
let log = msg.init_root::<memory_capnp::relation_log::Builder>();
let mut list = log.init_relations(chunk.len() as u32);
for (i, rel) in chunk.iter().enumerate() {
rel.to_capnp(list.reborrow().get(i as u32));
}
}
serialize::write_message(&mut writer, &msg)
.map_err(|e| format!("write relations: {}", e))?;
}
}
}
// Nuke caches so next load rebuilds from fresh logs
for p in [state_path(), snapshot_path()] {
if p.exists() {
fs::remove_file(&p).ok();
}
}
Ok(())
}
/// Check and repair corrupt capnp log files. /// Check and repair corrupt capnp log files.
/// ///
/// Reads each message sequentially, tracking file position. On the first /// Reads each message sequentially, tracking file position. On the first

View file

@ -8,7 +8,7 @@ use crate::memory_capnp;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use uuid::Uuid; use uuid::Uuid;
use std::collections::{HashMap, HashSet}; use std::collections::HashMap;
use std::fs; use std::fs;
use std::os::unix::io::AsRawFd; use std::os::unix::io::AsRawFd;
use std::path::PathBuf; use std::path::PathBuf;
@ -26,7 +26,7 @@ use std::time::{SystemTime, UNIX_EPOCH};
macro_rules! capnp_enum { macro_rules! capnp_enum {
($rust_type:ident, $capnp_type:path, [$($variant:ident),+ $(,)?]) => { ($rust_type:ident, $capnp_type:path, [$($variant:ident),+ $(,)?]) => {
impl $rust_type { impl $rust_type {
#[allow(clippy::wrong_self_convention, dead_code)] #[allow(clippy::wrong_self_convention)]
pub(crate) fn to_capnp(&self) -> $capnp_type { pub(crate) fn to_capnp(&self) -> $capnp_type {
match self { match self {
$(Self::$variant => <$capnp_type>::$variant,)+ $(Self::$variant => <$capnp_type>::$variant,)+
@ -74,7 +74,7 @@ macro_rules! capnp_message {
} }
} }
pub fn to_capnp(&self, mut b: $builder) { pub(crate) fn to_capnp(&self, mut b: $builder) {
paste::paste! { paste::paste! {
$(b.[<set_ $tf>](&self.$tf);)* $(b.[<set_ $tf>](&self.$tf);)*
$(b.[<set_ $uf>](&self.$uf);)* $(b.[<set_ $uf>](&self.$uf);)*
@ -214,10 +214,6 @@ pub struct Node {
#[serde(default)] #[serde(default)]
pub created_at: i64, pub created_at: i64,
// Memory importance scoring — unix epoch seconds, 0 = never scored.
#[serde(default)]
pub last_scored: i64,
// Derived fields (not in capnp, computed from graph) // Derived fields (not in capnp, computed from graph)
#[serde(default)] #[serde(default)]
pub community_id: Option<u32>, pub community_id: Option<u32>,
@ -346,7 +342,7 @@ capnp_message!(Node,
uuid: [uuid], uuid: [uuid],
prim: [version, timestamp, weight, emotion, deleted, prim: [version, timestamp, weight, emotion, deleted,
retrievals, uses, wrongs, last_replayed, retrievals, uses, wrongs, last_replayed,
spaced_repetition_interval, position, created_at, last_scored], spaced_repetition_interval, position, created_at],
enm: [node_type: NodeType], enm: [node_type: NodeType],
skip: [community_id, clustering_coefficient, degree], skip: [community_id, clustering_coefficient, degree],
); );
@ -356,18 +352,10 @@ impl Node {
/// is empty (old record), fall back to the deprecated provenanceOld enum. /// is empty (old record), fall back to the deprecated provenanceOld enum.
pub fn from_capnp_migrate(r: memory_capnp::content_node::Reader<'_>) -> Result<Self, String> { pub fn from_capnp_migrate(r: memory_capnp::content_node::Reader<'_>) -> Result<Self, String> {
let mut node = Self::from_capnp(r)?; let mut node = Self::from_capnp(r)?;
if node.provenance.is_empty() if node.provenance.is_empty() {
&& let Ok(old) = r.get_provenance_old() { if let Ok(old) = r.get_provenance_old() {
node.provenance = Provenance::from_capnp(old).label().to_string(); node.provenance = Provenance::from_capnp(old).label().to_string();
} }
// Sanitize timestamps: old capnp records have raw offsets instead
// of unix epoch. Anything past year 2100 (~4102444800) is bogus.
const MAX_SANE_EPOCH: i64 = 4_102_444_800;
if node.timestamp > MAX_SANE_EPOCH || node.timestamp < 0 {
node.timestamp = node.created_at;
}
if node.created_at > MAX_SANE_EPOCH || node.created_at < 0 {
node.created_at = node.timestamp.min(MAX_SANE_EPOCH);
} }
Ok(node) Ok(node)
} }
@ -386,10 +374,11 @@ capnp_message!(Relation,
impl Relation { impl Relation {
pub fn from_capnp_migrate(r: memory_capnp::relation::Reader<'_>) -> Result<Self, String> { pub fn from_capnp_migrate(r: memory_capnp::relation::Reader<'_>) -> Result<Self, String> {
let mut rel = Self::from_capnp(r)?; let mut rel = Self::from_capnp(r)?;
if rel.provenance.is_empty() if rel.provenance.is_empty() {
&& let Ok(old) = r.get_provenance_old() { if let Ok(old) = r.get_provenance_old() {
rel.provenance = Provenance::from_capnp(old).label().to_string(); rel.provenance = Provenance::from_capnp(old).label().to_string();
} }
}
Ok(rel) Ok(rel)
} }
} }
@ -438,7 +427,7 @@ pub struct GapRecord {
} }
/// Per-node agent visit index: node_key → (agent_type → last_visit_timestamp) /// Per-node agent visit index: node_key → (agent_type → last_visit_timestamp)
pub(super) type VisitIndex = HashMap<String, HashMap<String, i64>>; pub type VisitIndex = HashMap<String, HashMap<String, i64>>;
// The full in-memory store // The full in-memory store
#[derive(Default, Serialize, Deserialize)] #[derive(Default, Serialize, Deserialize)]
@ -453,9 +442,6 @@ pub struct Store {
/// Agent visit tracking: node_key → (agent_type → last_visit_epoch) /// Agent visit tracking: node_key → (agent_type → last_visit_epoch)
#[serde(default)] #[serde(default)]
pub visits: VisitIndex, pub visits: VisitIndex,
/// Transcript mining progress: (transcript_id, segment_index) → set of agents that processed it
#[serde(default)]
pub transcript_progress: HashMap<(String, u32), HashSet<String>>,
/// Log sizes at load time — used by save() to write correct staleness header. /// Log sizes at load time — used by save() to write correct staleness header.
/// If another writer appended since we loaded, our cache will be marked stale /// If another writer appended since we loaded, our cache will be marked stale
/// (recorded size < actual size), forcing the next reader to replay the log. /// (recorded size < actual size), forcing the next reader to replay the log.
@ -505,10 +491,11 @@ pub(crate) fn read_text(result: capnp::Result<capnp::text::Reader>) -> String {
/// Read a capnp data field as [u8; 16], zero-padded /// Read a capnp data field as [u8; 16], zero-padded
pub(crate) fn read_uuid(result: capnp::Result<&[u8]>) -> [u8; 16] { pub(crate) fn read_uuid(result: capnp::Result<&[u8]>) -> [u8; 16] {
let mut out = [0u8; 16]; let mut out = [0u8; 16];
if let Ok(data) = result if let Ok(data) = result {
&& data.len() >= 16 { if data.len() >= 16 {
out.copy_from_slice(&data[..16]); out.copy_from_slice(&data[..16]);
} }
}
out out
} }
@ -535,7 +522,6 @@ pub fn new_node(key: &str, content: &str) -> Node {
spaced_repetition_interval: 1, spaced_repetition_interval: 1,
position: 0, position: 0,
created_at: now_epoch(), created_at: now_epoch(),
last_scored: 0,
community_id: None, community_id: None,
clustering_coefficient: None, clustering_coefficient: None,
degree: None, degree: None,
@ -562,7 +548,7 @@ capnp_message!(AgentVisit,
skip: [], skip: [],
); );
pub(super) fn new_visit(node_uuid: [u8; 16], node_key: &str, agent: &str, outcome: &str) -> AgentVisit { pub fn new_visit(node_uuid: [u8; 16], node_key: &str, agent: &str, outcome: &str) -> AgentVisit {
AgentVisit { AgentVisit {
node_uuid, node_uuid,
node_key: node_key.to_string(), node_key: node_key.to_string(),
@ -574,38 +560,7 @@ pub(super) fn new_visit(node_uuid: [u8; 16], node_key: &str, agent: &str, outcom
pub(crate) fn visits_path() -> PathBuf { memory_dir().join("visits.capnp") } pub(crate) fn visits_path() -> PathBuf { memory_dir().join("visits.capnp") }
/// Transcript mining progress — tracks which segments have been processed /// Create a new relation
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct TranscriptSegment {
pub transcript_id: String,
pub segment_index: u32,
pub agent: String,
pub timestamp: i64,
}
capnp_message!(TranscriptSegment,
reader: memory_capnp::transcript_segment::Reader<'_>,
builder: memory_capnp::transcript_segment::Builder<'_>,
text: [transcript_id, agent],
uuid: [],
prim: [segment_index, timestamp],
enm: [],
skip: [],
);
pub(super) fn new_transcript_segment(transcript_id: &str, segment_index: u32, agent: &str) -> TranscriptSegment {
TranscriptSegment {
transcript_id: transcript_id.to_string(),
segment_index,
agent: agent.to_string(),
timestamp: now_epoch(),
}
}
pub(crate) fn transcript_progress_path() -> PathBuf { memory_dir().join("transcript-progress.capnp") }
/// Create a new relation.
/// Provenance is set from POC_PROVENANCE env var if present, else "manual".
pub fn new_relation( pub fn new_relation(
source_uuid: [u8; 16], source_uuid: [u8; 16],
target_uuid: [u8; 16], target_uuid: [u8; 16],
@ -614,9 +569,6 @@ pub fn new_relation(
source_key: &str, source_key: &str,
target_key: &str, target_key: &str,
) -> Relation { ) -> Relation {
// Use raw env var for provenance — agent names are dynamic
let provenance = std::env::var("POC_PROVENANCE")
.unwrap_or_else(|_| "manual".to_string());
Relation { Relation {
uuid: *Uuid::new_v4().as_bytes(), uuid: *Uuid::new_v4().as_bytes(),
version: 1, version: 1,
@ -625,7 +577,7 @@ pub fn new_relation(
target: target_uuid, target: target_uuid,
rel_type, rel_type,
strength, strength,
provenance, provenance: "manual".to_string(),
deleted: false, deleted: false,
source_key: source_key.to_string(), source_key: source_key.to_string(),
target_key: target_key.to_string(), target_key: target_key.to_string(),

View file

@ -19,9 +19,6 @@ pub trait StoreView {
/// Iterate all nodes. Callback receives (key, content, weight). /// Iterate all nodes. Callback receives (key, content, weight).
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F); fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F);
/// Iterate all nodes with metadata. Callback receives (key, node_type, timestamp).
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, f: F);
/// Iterate all relations. Callback receives (source_key, target_key, strength, rel_type). /// Iterate all relations. Callback receives (source_key, target_key, strength, rel_type).
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F); fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F);
@ -42,12 +39,6 @@ impl StoreView for Store {
} }
} }
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, mut f: F) {
for (key, node) in &self.nodes {
f(key, node.node_type, node.timestamp);
}
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) { fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
for rel in &self.relations { for rel in &self.relations {
if rel.deleted { continue; } if rel.deleted { continue; }
@ -119,20 +110,6 @@ impl StoreView for MmapView {
} }
} }
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, mut f: F) {
let snap = self.snapshot();
for (key, node) in snap.nodes.iter() {
let nt = match node.node_type {
ArchivedNodeType::EpisodicSession => NodeType::EpisodicSession,
ArchivedNodeType::EpisodicDaily => NodeType::EpisodicDaily,
ArchivedNodeType::EpisodicWeekly => NodeType::EpisodicWeekly,
ArchivedNodeType::EpisodicMonthly => NodeType::EpisodicMonthly,
ArchivedNodeType::Semantic => NodeType::Semantic,
};
f(key, nt, node.timestamp);
}
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) { fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
let snap = self.snapshot(); let snap = self.snapshot();
for rel in snap.relations.iter() { for rel in snap.relations.iter() {
@ -199,9 +176,6 @@ impl StoreView for AnyView {
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F) { fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F) {
match self { AnyView::Mmap(v) => v.for_each_node(f), AnyView::Owned(s) => s.for_each_node(f) } match self { AnyView::Mmap(v) => v.for_each_node(f), AnyView::Owned(s) => s.for_each_node(f) }
} }
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, f: F) {
match self { AnyView::Mmap(v) => v.for_each_node_meta(f), AnyView::Owned(s) => s.for_each_node_meta(f) }
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F) { fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F) {
match self { AnyView::Mmap(v) => v.for_each_relation(f), AnyView::Owned(s) => s.for_each_relation(f) } match self { AnyView::Mmap(v) => v.for_each_relation(f), AnyView::Owned(s) => s.for_each_relation(f) }
} }

View file

@ -0,0 +1,176 @@
// Transcript JSONL parsing utilities.
//
// Provides mmap-based backward scanning of Claude Code transcript files
// and compaction detection. Used by memory-search (hook mode) and
// parse-claude-conversation (debug tool).
use memmap2::Mmap;
use serde_json::Value;
use std::fs;
use std::path::Path;
/// Scan backwards through mmap'd bytes, yielding byte slices of complete
/// top-level JSON objects (outermost { to matching }).
///
/// Tracks brace depth, skipping braces inside JSON strings. Returns
/// objects in reverse order (newest first).
pub struct JsonlBackwardIter<'a> {
data: &'a [u8],
pos: usize,
}
impl<'a> JsonlBackwardIter<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self { data, pos: data.len() }
}
}
impl<'a> Iterator for JsonlBackwardIter<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if self.pos == 0 {
return None;
}
// Find the closing } of the next object (scanning backward)
let close = loop {
if self.pos == 0 { return None; }
self.pos -= 1;
if self.data[self.pos] == b'}' {
break self.pos;
}
};
// Track brace depth to find matching {
let mut depth: usize = 1;
let mut in_string = false;
loop {
if self.pos == 0 {
return None;
}
self.pos -= 1;
let ch = self.data[self.pos];
if in_string {
if ch == b'"' {
let mut bs = 0;
while self.pos > bs && self.data[self.pos - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 {
in_string = false;
}
}
continue;
}
match ch {
b'"' => { in_string = true; }
b'}' => { depth += 1; }
b'{' => {
depth -= 1;
if depth == 0 {
return Some(&self.data[self.pos..=close]);
}
}
_ => {}
}
}
}
}
/// Find the byte offset of the last compaction summary in mmap'd transcript data.
///
/// Scans backward for a user-type message whose content starts with
/// "This session is being continued". Returns the byte offset of the
/// JSON object's opening brace.
pub fn find_last_compaction(data: &[u8]) -> Option<usize> {
let marker = b"This session is being continued";
for obj_bytes in JsonlBackwardIter::new(data) {
// Quick byte check before parsing
if !contains_bytes(obj_bytes, marker) {
continue;
}
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
if obj.get("type").and_then(|v| v.as_str()) != Some("user") {
continue;
}
if let Some(content) = obj.get("message")
.and_then(|m| m.get("content"))
.and_then(|c| c.as_str())
{
if content.starts_with("This session is being continued") {
let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize;
return Some(offset);
}
}
}
None
}
/// Find the byte offset of the last compaction in a transcript file.
/// Returns None if the file can't be opened or has no compaction.
pub fn find_last_compaction_in_file(path: &str) -> Option<u64> {
if path.is_empty() { return None; }
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
find_last_compaction(&mmap).map(|off| off as u64)
}
/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
pub fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
Some((mmap, file))
}
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack.windows(needle.len()).any(|w| w == needle)
}
/// Detect whether a compaction has occurred since the last check.
///
/// Compares the current compaction offset against a saved value in
/// `state_dir/compaction-{session_id}`. Returns true if a new
/// compaction was found. Updates the saved offset.
pub fn detect_new_compaction(
state_dir: &Path,
session_id: &str,
transcript_path: &str,
) -> bool {
let offset = find_last_compaction_in_file(transcript_path);
let save_path = state_dir.join(format!("compaction-{}", session_id));
let saved: Option<u64> = fs::read_to_string(&save_path)
.ok()
.and_then(|s| s.trim().parse().ok());
let is_new = match (offset, saved) {
(Some(cur), Some(prev)) => cur != prev,
(Some(_), None) => true,
_ => false,
};
// Save current offset
if let Some(off) = offset {
fs::write(&save_path, off.to_string()).ok();
}
is_new
}

907
poc-memory/src/tui.rs Normal file
View file

@ -0,0 +1,907 @@
// TUI dashboard for poc-memory daemon
//
// Connects to the daemon status socket, polls periodically, and renders
// a tabbed interface with per-agent-type tabs for drill-down. Designed
// for observability and control of the consolidation system.
//
// Tabs:
// Overview — graph health gauges, in-flight tasks, recent completions
// Pipeline — daily pipeline phases in execution order
// <agent> — one tab per agent type (replay, linker, separator, transfer,
// health, apply, etc.) showing all runs with output + log history
// Log — auto-scrolling daemon.log tail
use crate::agents::daemon::GraphHealth;
use crossterm::event::{self, Event, KeyCode, KeyModifiers};
use jobkit::{TaskInfo, TaskStatus};
use ratatui::{
layout::{Constraint, Layout, Rect},
style::{Color, Modifier, Style, Stylize},
text::{Line, Span},
widgets::{Block, Borders, Cell, Gauge, Paragraph, Row, Table, Tabs, Wrap},
DefaultTerminal, Frame,
};
use std::fs;
use std::io::Read as _;
use std::os::unix::net::UnixStream;
use std::path::PathBuf;
use std::time::{Duration, Instant};
const POLL_INTERVAL: Duration = Duration::from_secs(2);
// Agent types we know about, in display order
const AGENT_TYPES: &[&str] = &[
"health", "replay", "linker", "separator", "transfer",
"apply", "orphans", "cap", "digest", "digest-links", "knowledge", "rename", "split",
];
fn status_sock_path() -> PathBuf {
crate::config::get().data_dir.join("daemon.sock")
}
fn log_path() -> PathBuf {
crate::config::get().data_dir.join("daemon.log")
}
// --- Data fetching ---
#[derive(serde::Deserialize)]
struct DaemonStatus {
#[allow(dead_code)]
pid: u32,
tasks: Vec<TaskInfo>,
#[serde(default)]
#[allow(dead_code)]
last_daily: Option<String>,
#[serde(default)]
graph_health: Option<GraphHealth>,
}
fn fetch_status() -> Option<DaemonStatus> {
let mut stream = UnixStream::connect(status_sock_path()).ok()?;
stream.set_read_timeout(Some(Duration::from_secs(2))).ok();
let mut buf = String::new();
stream.read_to_string(&mut buf).ok()?;
serde_json::from_str(&buf).ok()
}
#[derive(Clone)]
struct LogEntry {
ts: String,
job: String,
event: String,
detail: String,
}
fn load_log_entries(max: usize) -> Vec<LogEntry> {
let content = match fs::read_to_string(log_path()) {
Ok(c) => c,
Err(_) => return Vec::new(),
};
content
.lines()
.rev()
.take(max)
.filter_map(|line| {
let obj: serde_json::Value = serde_json::from_str(line).ok()?;
Some(LogEntry {
ts: obj.get("ts")?.as_str()?.to_string(),
job: obj.get("job")?.as_str()?.to_string(),
event: obj.get("event")?.as_str()?.to_string(),
detail: obj
.get("detail")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
})
})
.collect::<Vec<_>>()
.into_iter()
.rev()
.collect()
}
// --- Tab model ---
#[derive(Clone, PartialEq, Eq)]
enum Tab {
Overview,
Pipeline,
Agent(String), // agent type name: "replay", "linker", etc.
Log,
}
impl Tab {
fn label(&self) -> String {
match self {
Tab::Overview => "Overview".into(),
Tab::Pipeline => "Pipeline".into(),
Tab::Agent(name) => name.clone(),
Tab::Log => "Log".into(),
}
}
}
// --- App state ---
struct App {
tabs: Vec<Tab>,
tab_idx: usize,
status: Option<DaemonStatus>,
log_entries: Vec<LogEntry>,
last_poll: Instant,
scroll: usize,
count_prefix: Option<usize>, // numeric prefix for commands (vim-style)
flash_msg: Option<(String, Instant)>, // transient status message
}
impl App {
fn new() -> Self {
let status = fetch_status();
let log_entries = load_log_entries(500);
let tabs = Self::build_tabs(&status, &log_entries);
Self {
tabs,
tab_idx: 0,
status,
log_entries,
last_poll: Instant::now(),
scroll: 0,
count_prefix: None,
flash_msg: None,
}
}
fn build_tabs(status: &Option<DaemonStatus>, log_entries: &[LogEntry]) -> Vec<Tab> {
let mut tabs = vec![Tab::Overview, Tab::Pipeline];
for agent_type in AGENT_TYPES {
let prefix = format!("c-{}", agent_type);
let has_tasks = status
.as_ref()
.map(|s| s.tasks.iter().any(|t| t.name.starts_with(&prefix)))
.unwrap_or(false);
let has_logs = log_entries.iter().any(|e| {
e.job.starts_with(&prefix) || e.job == *agent_type
});
if has_tasks || has_logs {
tabs.push(Tab::Agent(agent_type.to_string()));
}
}
tabs.push(Tab::Log);
tabs
}
fn poll(&mut self) {
if self.last_poll.elapsed() >= POLL_INTERVAL {
self.status = fetch_status();
self.log_entries = load_log_entries(500);
// Rebuild tabs, preserving current selection
let current = self.tabs.get(self.tab_idx).cloned();
self.tabs = Self::build_tabs(&self.status, &self.log_entries);
if let Some(ref cur) = current {
self.tab_idx = self.tabs.iter().position(|t| t == cur).unwrap_or(0);
}
self.last_poll = Instant::now();
}
}
fn current_tab(&self) -> &Tab {
self.tabs.get(self.tab_idx).unwrap_or(&Tab::Overview)
}
fn tasks(&self) -> &[TaskInfo] {
self.status
.as_ref()
.map(|s| s.tasks.as_slice())
.unwrap_or(&[])
}
fn tasks_for_agent(&self, agent_type: &str) -> Vec<&TaskInfo> {
let prefix = format!("c-{}", agent_type);
self.tasks()
.iter()
.filter(|t| t.name.starts_with(&prefix))
.collect()
}
fn logs_for_agent(&self, agent_type: &str) -> Vec<&LogEntry> {
let prefix = format!("c-{}", agent_type);
self.log_entries
.iter()
.filter(|e| e.job.starts_with(&prefix) || e.job == agent_type)
.collect()
}
fn pipeline_tasks(&self) -> Vec<&TaskInfo> {
self.tasks()
.iter()
.filter(|t| {
let n = &t.name;
n.starts_with("c-")
|| n.starts_with("consolidate:")
|| n.starts_with("knowledge-loop:")
|| n.starts_with("digest:")
|| n.starts_with("decay:")
})
.collect()
}
fn next_tab(&mut self) {
self.tab_idx = (self.tab_idx + 1) % self.tabs.len();
self.scroll = 0;
}
fn prev_tab(&mut self) {
self.tab_idx = (self.tab_idx + self.tabs.len() - 1) % self.tabs.len();
self.scroll = 0;
}
}
// --- Rendering ---
fn format_duration(d: Duration) -> String {
let ms = d.as_millis();
if ms < 1_000 {
format!("{}ms", ms)
} else if ms < 60_000 {
format!("{:.1}s", ms as f64 / 1000.0)
} else if ms < 3_600_000 {
format!("{}m{}s", ms / 60_000, (ms % 60_000) / 1000)
} else {
format!("{}h{}m", ms / 3_600_000, (ms % 3_600_000) / 60_000)
}
}
fn task_elapsed(t: &TaskInfo) -> Duration {
if matches!(t.status, TaskStatus::Running) {
if let Some(started) = t.started_at {
let now = std::time::SystemTime::now()
.duration_since(std::time::SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_secs_f64();
Duration::from_secs_f64((now - started).max(0.0))
} else {
t.elapsed
}
} else {
t.result.as_ref().map(|r| r.duration).unwrap_or(t.elapsed)
}
}
fn status_style(t: &TaskInfo) -> Style {
if t.cancelled {
return Style::default().fg(Color::DarkGray);
}
match t.status {
TaskStatus::Running => Style::default().fg(Color::Green),
TaskStatus::Completed => Style::default().fg(Color::Blue),
TaskStatus::Failed => Style::default().fg(Color::Red),
TaskStatus::Pending => Style::default().fg(Color::DarkGray),
}
}
fn status_symbol(t: &TaskInfo) -> &'static str {
if t.cancelled {
return "";
}
match t.status {
TaskStatus::Running => "",
TaskStatus::Completed => "",
TaskStatus::Failed => "",
TaskStatus::Pending => "·",
}
}
fn event_style(event: &str) -> Style {
match event {
"completed" => Style::default().fg(Color::Blue),
"failed" => Style::default().fg(Color::Red),
"started" => Style::default().fg(Color::Green),
_ => Style::default().fg(Color::DarkGray),
}
}
fn event_symbol(event: &str) -> &'static str {
match event {
"completed" => "",
"failed" => "",
"started" => "",
_ => "·",
}
}
fn ts_time(ts: &str) -> &str {
if ts.len() >= 19 { &ts[11..19] } else { ts }
}
fn render(frame: &mut Frame, app: &App) {
let [header, body, footer] = Layout::vertical([
Constraint::Length(3),
Constraint::Min(0),
Constraint::Length(1),
])
.areas(frame.area());
// Tab bar — show index hints for first 9 tabs
let tab_titles: Vec<Line> = app
.tabs
.iter()
.enumerate()
.map(|(i, t)| {
let hint = if i < 9 {
format!("{}", i + 1)
} else {
" ".into()
};
Line::from(format!(" {} {} ", hint, t.label()))
})
.collect();
let tabs = Tabs::new(tab_titles)
.select(app.tab_idx)
.highlight_style(
Style::default()
.fg(Color::Yellow)
.add_modifier(Modifier::BOLD),
)
.block(Block::default().borders(Borders::ALL).title(" poc-memory daemon "));
frame.render_widget(tabs, header);
// Body
match app.current_tab() {
Tab::Overview => render_overview(frame, app, body),
Tab::Pipeline => render_pipeline(frame, app, body),
Tab::Agent(name) => render_agent_tab(frame, app, name, body),
Tab::Log => render_log(frame, app, body),
}
// Footer — flash message, count prefix, or help text
let footer_text = if let Some((ref msg, when)) = app.flash_msg {
if when.elapsed() < Duration::from_secs(3) {
Line::from(vec![
Span::raw(" "),
Span::styled(msg.as_str(), Style::default().fg(Color::Green)),
])
} else {
Line::raw("") // expired, will show help below
}
} else {
Line::raw("")
};
let footer_line = if !footer_text.spans.is_empty() {
footer_text
} else if let Some(n) = app.count_prefix {
Line::from(vec![
Span::styled(format!(" {}×", n), Style::default().fg(Color::Yellow).add_modifier(Modifier::BOLD)),
Span::raw(" r: run agent │ Esc: cancel"),
])
} else {
match app.current_tab() {
Tab::Agent(_) => Line::from(
" Tab: switch │ ↑↓: scroll │ [N]r: run agent │ c: consolidate │ q: quit ",
),
_ => Line::from(
" Tab/1-9: switch │ ↑↓: scroll │ c: consolidate │ q: quit ",
),
}
};
let footer_widget = Paragraph::new(footer_line).style(Style::default().fg(Color::DarkGray));
frame.render_widget(footer_widget, footer);
}
// --- Overview tab ---
fn render_overview(frame: &mut Frame, app: &App, area: Rect) {
let [health_area, tasks_area] =
Layout::vertical([Constraint::Length(12), Constraint::Min(0)]).areas(area);
if let Some(ref gh) = app.status.as_ref().and_then(|s| s.graph_health.as_ref()) {
render_health(frame, gh, health_area);
} else {
let p = Paragraph::new(" No graph health data available")
.block(Block::default().borders(Borders::ALL).title(" Graph Health "));
frame.render_widget(p, health_area);
}
// In-flight + recent
let in_flight: Vec<&TaskInfo> = app
.tasks()
.iter()
.filter(|t| matches!(t.status, TaskStatus::Running | TaskStatus::Pending))
.collect();
let mut lines: Vec<Line> = Vec::new();
if in_flight.is_empty() {
lines.push(Line::from(" No tasks in flight").fg(Color::DarkGray));
} else {
for t in &in_flight {
let elapsed = task_elapsed(t);
let progress = t
.progress
.as_deref()
.filter(|p| *p != "idle")
.unwrap_or("");
lines.push(Line::from(vec![
Span::styled(format!(" {} ", status_symbol(t)), status_style(t)),
Span::raw(format!("{:30}", short_name(&t.name))),
Span::styled(
format!(" {:>8}", format_duration(elapsed)),
Style::default().fg(Color::DarkGray),
),
Span::raw(format!(" {}", progress)),
]));
if matches!(t.status, TaskStatus::Running) && !t.output_log.is_empty() {
let skip = t.output_log.len().saturating_sub(2);
for line in &t.output_log[skip..] {
lines.push(Line::from(format!("{}", line)).fg(Color::DarkGray));
}
}
}
}
lines.push(Line::raw(""));
lines.push(Line::from(" Recent:").fg(Color::DarkGray));
let recent: Vec<&LogEntry> = app
.log_entries
.iter()
.rev()
.filter(|e| e.event == "completed" || e.event == "failed")
.take(10)
.collect::<Vec<_>>()
.into_iter()
.rev()
.collect();
for entry in &recent {
lines.push(Line::from(vec![
Span::raw(" "),
Span::styled(event_symbol(&entry.event), event_style(&entry.event)),
Span::raw(format!(
" {} {:28} {}",
ts_time(&entry.ts),
short_name(&entry.job),
entry.detail
)),
]));
}
let tasks_widget = Paragraph::new(lines)
.block(Block::default().borders(Borders::ALL).title(" Tasks "))
.scroll((app.scroll as u16, 0));
frame.render_widget(tasks_widget, tasks_area);
}
fn render_health(frame: &mut Frame, gh: &GraphHealth, area: Rect) {
let block = Block::default()
.borders(Borders::ALL)
.title(format!(" Graph Health ({}) ", gh.computed_at));
let inner = block.inner(area);
frame.render_widget(block, area);
let [metrics_area, gauges_area, plan_area] = Layout::vertical([
Constraint::Length(2),
Constraint::Length(4),
Constraint::Min(1),
])
.areas(inner);
// Metrics
let summary = Line::from(format!(
" {} nodes {} edges {} communities",
gh.nodes, gh.edges, gh.communities
));
let ep_line = Line::from(vec![
Span::raw(" episodic: "),
Span::styled(
format!("{:.0}%", gh.episodic_ratio * 100.0),
if gh.episodic_ratio < 0.4 {
Style::default().fg(Color::Green)
} else {
Style::default().fg(Color::Red)
},
),
Span::raw(format!(" σ={:.1}", gh.sigma)),
]);
frame.render_widget(Paragraph::new(vec![summary, ep_line]), metrics_area);
// Gauges
let [g1, g2, g3] = Layout::horizontal([
Constraint::Ratio(1, 3),
Constraint::Ratio(1, 3),
Constraint::Ratio(1, 3),
])
.areas(gauges_area);
let alpha_color = if gh.alpha >= 2.5 { Color::Green } else { Color::Red };
frame.render_widget(
Gauge::default()
.block(Block::default().borders(Borders::ALL).title(" α (≥2.5) "))
.gauge_style(Style::default().fg(alpha_color))
.ratio((gh.alpha / 5.0).clamp(0.0, 1.0) as f64)
.label(format!("{:.2}", gh.alpha)),
g1,
);
let gini_color = if gh.gini <= 0.4 { Color::Green } else { Color::Red };
frame.render_widget(
Gauge::default()
.block(Block::default().borders(Borders::ALL).title(" gini (≤0.4) "))
.gauge_style(Style::default().fg(gini_color))
.ratio(gh.gini.clamp(0.0, 1.0) as f64)
.label(format!("{:.3}", gh.gini)),
g2,
);
let cc_color = if gh.avg_cc >= 0.2 { Color::Green } else { Color::Red };
frame.render_widget(
Gauge::default()
.block(Block::default().borders(Borders::ALL).title(" cc (≥0.2) "))
.gauge_style(Style::default().fg(cc_color))
.ratio(gh.avg_cc.clamp(0.0, 1.0) as f64)
.label(format!("{:.3}", gh.avg_cc)),
g3,
);
// Plan
let total = gh.plan_replay + gh.plan_linker + gh.plan_separator + gh.plan_transfer + 1;
let plan_line = Line::from(vec![
Span::raw(" plan: "),
Span::styled(
format!("{}", total),
Style::default().add_modifier(Modifier::BOLD),
),
Span::raw(format!(
" agents ({}r {}l {}s {}t +health)",
gh.plan_replay, gh.plan_linker, gh.plan_separator, gh.plan_transfer
)),
]);
frame.render_widget(Paragraph::new(plan_line), plan_area);
}
// --- Pipeline tab ---
fn render_pipeline(frame: &mut Frame, app: &App, area: Rect) {
let pipeline = app.pipeline_tasks();
if pipeline.is_empty() {
let p = Paragraph::new(" No pipeline tasks")
.block(Block::default().borders(Borders::ALL).title(" Daily Pipeline "));
frame.render_widget(p, area);
return;
}
let phase_order = [
"c-health", "c-replay", "c-linker", "c-separator", "c-transfer",
"c-apply", "c-orphans", "c-cap", "c-digest", "c-digest-links", "c-knowledge",
];
let mut rows: Vec<Row> = Vec::new();
let mut seen = std::collections::HashSet::new();
for phase in &phase_order {
for t in &pipeline {
if t.name.starts_with(phase) && seen.insert(&t.name) {
rows.push(pipeline_row(t));
}
}
}
for t in &pipeline {
if seen.insert(&t.name) {
rows.push(pipeline_row(t));
}
}
let header = Row::new(vec!["", "Phase", "Status", "Duration", "Progress"])
.style(
Style::default()
.add_modifier(Modifier::BOLD)
.fg(Color::DarkGray),
);
let widths = [
Constraint::Length(2),
Constraint::Length(30),
Constraint::Length(10),
Constraint::Length(10),
Constraint::Min(20),
];
let table = Table::new(rows, widths)
.header(header)
.block(Block::default().borders(Borders::ALL).title(" Daily Pipeline "));
frame.render_widget(table, area);
}
fn pipeline_row(t: &TaskInfo) -> Row<'static> {
let elapsed = task_elapsed(t);
let progress = t.progress.as_deref().unwrap_or("").to_string();
let error = t
.result
.as_ref()
.and_then(|r| r.error.as_ref())
.map(|e| {
let short = if e.len() > 40 { &e[..40] } else { e };
format!("err: {}", short)
})
.unwrap_or_default();
let detail = if !error.is_empty() { error } else { progress };
Row::new(vec![
Cell::from(status_symbol(t)).style(status_style(t)),
Cell::from(short_name(&t.name)),
Cell::from(format!("{}", t.status)),
Cell::from(if !elapsed.is_zero() {
format_duration(elapsed)
} else {
String::new()
}),
Cell::from(detail),
])
.style(status_style(t))
}
// --- Per-agent-type tab ---
fn render_agent_tab(frame: &mut Frame, app: &App, agent_type: &str, area: Rect) {
let tasks = app.tasks_for_agent(agent_type);
let logs = app.logs_for_agent(agent_type);
let mut lines: Vec<Line> = Vec::new();
// Active/recent tasks
if tasks.is_empty() {
lines.push(Line::from(" No active tasks").fg(Color::DarkGray));
} else {
lines.push(Line::styled(
" Tasks:",
Style::default().add_modifier(Modifier::BOLD),
));
lines.push(Line::raw(""));
for t in &tasks {
let elapsed = task_elapsed(t);
let elapsed_str = if !elapsed.is_zero() {
format_duration(elapsed)
} else {
String::new()
};
let progress = t
.progress
.as_deref()
.filter(|p| *p != "idle")
.unwrap_or("");
lines.push(Line::from(vec![
Span::styled(format!(" {} ", status_symbol(t)), status_style(t)),
Span::styled(format!("{:30}", &t.name), status_style(t)),
Span::styled(
format!(" {:>8}", elapsed_str),
Style::default().fg(Color::DarkGray),
),
Span::raw(format!(" {}", progress)),
]));
// Retries
if t.max_retries > 0 && t.retry_count > 0 {
lines.push(Line::from(vec![
Span::raw(" retry "),
Span::styled(
format!("{}/{}", t.retry_count, t.max_retries),
Style::default().fg(Color::Yellow),
),
]));
}
// Output log
if !t.output_log.is_empty() {
for log_line in &t.output_log {
lines.push(Line::from(format!("{}", log_line)).fg(Color::DarkGray));
}
}
// Error
if matches!(t.status, TaskStatus::Failed) {
if let Some(ref r) = t.result {
if let Some(ref err) = r.error {
lines.push(Line::from(vec![
Span::styled(" error: ", Style::default().fg(Color::Red)),
Span::styled(err.as_str(), Style::default().fg(Color::Red)),
]));
}
}
}
lines.push(Line::raw(""));
}
}
// Log history for this agent type
lines.push(Line::styled(
" Log history:",
Style::default().add_modifier(Modifier::BOLD),
));
lines.push(Line::raw(""));
if logs.is_empty() {
lines.push(Line::from(" (no log entries)").fg(Color::DarkGray));
} else {
// Show last 30 entries
let start = logs.len().saturating_sub(30);
for entry in &logs[start..] {
lines.push(Line::from(vec![
Span::raw(" "),
Span::styled(event_symbol(&entry.event), event_style(&entry.event)),
Span::raw(" "),
Span::styled(ts_time(&entry.ts), Style::default().fg(Color::DarkGray)),
Span::raw(" "),
Span::styled(format!("{:12}", entry.event), event_style(&entry.event)),
Span::raw(format!(" {}", entry.detail)),
]));
}
}
let title = format!(" {} ", agent_type);
let p = Paragraph::new(lines)
.block(Block::default().borders(Borders::ALL).title(title))
.wrap(Wrap { trim: false })
.scroll((app.scroll as u16, 0));
frame.render_widget(p, area);
}
// --- Log tab ---
fn render_log(frame: &mut Frame, app: &App, area: Rect) {
let block = Block::default().borders(Borders::ALL).title(" Daemon Log ");
let inner = block.inner(area);
frame.render_widget(block, area);
let visible_height = inner.height as usize;
let total = app.log_entries.len();
// Auto-scroll to bottom unless user has scrolled up
let offset = if app.scroll == 0 {
total.saturating_sub(visible_height)
} else {
app.scroll.min(total.saturating_sub(visible_height))
};
let mut lines: Vec<Line> = Vec::new();
for entry in app.log_entries.iter().skip(offset).take(visible_height) {
lines.push(Line::from(vec![
Span::styled(ts_time(&entry.ts), Style::default().fg(Color::DarkGray)),
Span::raw(" "),
Span::styled(format!("{:12}", entry.event), event_style(&entry.event)),
Span::raw(format!(" {:30} {}", short_name(&entry.job), entry.detail)),
]));
}
frame.render_widget(Paragraph::new(lines), inner);
}
// --- Helpers ---
fn short_name(name: &str) -> String {
if let Some((verb, path)) = name.split_once(' ') {
let file = path.rsplit('/').next().unwrap_or(path);
let file = file.strip_suffix(".jsonl").unwrap_or(file);
let short = if file.len() > 12 { &file[..12] } else { file };
format!("{} {}", verb, short)
} else {
name.to_string()
}
}
fn send_rpc(cmd: &str) -> Option<String> {
let mut stream = UnixStream::connect(status_sock_path()).ok()?;
stream.set_write_timeout(Some(Duration::from_secs(2))).ok();
stream.set_read_timeout(Some(Duration::from_secs(5))).ok();
std::io::Write::write_all(&mut stream, cmd.as_bytes()).ok()?;
stream.shutdown(std::net::Shutdown::Write).ok()?;
let mut buf = String::new();
stream.read_to_string(&mut buf).ok()?;
Some(buf)
}
// --- Entry point ---
pub fn run_tui() -> Result<(), String> {
use crossterm::terminal;
terminal::enable_raw_mode().map_err(|e| format!("not a terminal: {}", e))?;
terminal::disable_raw_mode().ok();
let mut terminal = ratatui::init();
let result = run_event_loop(&mut terminal);
ratatui::restore();
result
}
fn run_event_loop(terminal: &mut DefaultTerminal) -> Result<(), String> {
let mut app = App::new();
if app.status.is_none() {
return Err("Daemon not running.".into());
}
loop {
terminal
.draw(|frame| render(frame, &app))
.map_err(|e| format!("draw: {}", e))?;
if event::poll(Duration::from_millis(250)).map_err(|e| format!("poll: {}", e))? {
if let Event::Key(key) = event::read().map_err(|e| format!("read: {}", e))? {
match key.code {
KeyCode::Char('q') => return Ok(()),
KeyCode::Char('c') if key.modifiers.contains(KeyModifiers::CONTROL) => {
return Ok(())
}
KeyCode::Char('c') => {
let _ = send_rpc("consolidate");
app.last_poll = Instant::now() - POLL_INTERVAL;
}
KeyCode::Char('r') => {
// Run specific agent type if on an agent tab
if let Tab::Agent(ref name) = app.current_tab().clone() {
let count = app.count_prefix.unwrap_or(1);
let cmd = format!("run-agent {} {}", name, count);
let _ = send_rpc(&cmd);
app.flash_msg = Some((
format!("Queued {} {} run{}", count, name,
if count > 1 { "s" } else { "" }),
Instant::now(),
));
app.count_prefix = None;
app.last_poll = Instant::now() - POLL_INTERVAL;
}
}
KeyCode::Tab => { app.count_prefix = None; app.next_tab(); }
KeyCode::BackTab => { app.count_prefix = None; app.prev_tab(); }
// Number keys: if on agent tab, accumulate as count prefix;
// otherwise switch tabs
KeyCode::Char(c @ '1'..='9') => {
if matches!(app.current_tab(), Tab::Agent(_)) {
let digit = (c as usize) - ('0' as usize);
app.count_prefix = Some(
app.count_prefix.unwrap_or(0) * 10 + digit
);
} else {
let idx = (c as usize) - ('1' as usize);
if idx < app.tabs.len() {
app.tab_idx = idx;
app.scroll = 0;
}
}
}
KeyCode::Down | KeyCode::Char('j') => {
app.scroll = app.scroll.saturating_add(1);
}
KeyCode::Up | KeyCode::Char('k') => {
app.scroll = app.scroll.saturating_sub(1);
}
KeyCode::PageDown => {
app.scroll = app.scroll.saturating_add(20);
}
KeyCode::PageUp => {
app.scroll = app.scroll.saturating_sub(20);
}
KeyCode::Home => {
app.scroll = 0;
}
KeyCode::Esc => {
app.count_prefix = None;
}
_ => {}
}
}
// Drain remaining events
while event::poll(Duration::ZERO).unwrap_or(false) {
let _ = event::read();
}
}
app.poll();
}
}

View file

@ -57,3 +57,17 @@ pub fn jsonl_append<T: Serialize>(path: &Path, item: &T) -> Result<(), String> {
.map_err(|e| format!("write {}: {}", path.display(), e)) .map_err(|e| format!("write {}: {}", path.display(), e))
} }
/// Parse a timestamp string to unix epoch seconds.
/// Handles: "2026-03-05T19:56:00", "2026-03-05T19:56", "2026-03-05 19:56:00", "2026-03-05 19:56"
pub fn parse_timestamp_to_epoch(ts: &str) -> Option<i64> {
use chrono::{Local, NaiveDateTime, TimeZone};
let formats = ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M"];
for fmt in &formats {
if let Ok(ndt) = NaiveDateTime::parse_from_str(ts, fmt) {
if let Some(dt) = Local.from_local_datetime(&ndt).earliest() {
return Some(dt.timestamp());
}
}
}
None
}

Some files were not shown because too many files have changed in this diff Show more