From 908f8c9e5247daf876c4f9bd2e972c4b45ab05e0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Mar 2026 13:56:09 -0500 Subject: [PATCH] docs: split README into component docs, update jobkit dep - Break README into README.md (overview), docs/daemon.md (pipeline stages, diagnostics, common issues), docs/notifications.md (notification daemon, IRC/Telegram modules) - Update jobkit dependency from local path to git URL Co-Authored-By: ProofOfConcept --- Cargo.lock | 1 + Cargo.toml | 2 +- README.md | 145 +++++------------------------------------- docs/daemon.md | 106 ++++++++++++++++++++++++++++++ docs/notifications.md | 103 ++++++++++++++++++++++++++++++ 5 files changed, 228 insertions(+), 129 deletions(-) create mode 100644 docs/daemon.md create mode 100644 docs/notifications.md diff --git a/Cargo.lock b/Cargo.lock index 38b67f4..b5ec9f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1294,6 +1294,7 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jobkit" version = "0.1.0" +source = "git+https://evilpiepirate.org/git/jobkit.git/#2cdf0d5c3dd55f3d1783c40211a7eb96707d1ab6" dependencies = [ "crossbeam-deque", "log", diff --git a/Cargo.toml b/Cargo.toml index 1066b08..692cc4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ memmap2 = "0.9" rayon = "1" peg = "0.8" paste = "1" -jobkit = { path = "/home/kent/jobkit" } +jobkit = { git = "https://evilpiepirate.org/git/jobkit.git/" } log = "0.4" # poc-daemon deps diff --git a/README.md b/README.md index 0e421ef..24d2bde 100644 --- a/README.md +++ b/README.md @@ -28,17 +28,12 @@ detection. ### Background agents -A background daemon (`poc-memory daemon`) automatically spawns agents -for memory maintenance: +See [docs/daemon.md](docs/daemon.md) for full daemon documentation. -- **Experience mining** — when a session ends, extracts experiences - and observations from the transcript into journal entries -- **Fact extraction** — pulls concrete facts (names, dates, decisions, - preferences) into structured knowledge nodes -- **Consolidation** — periodic graph health work: replay queues - (spaced repetition), interference detection (contradictory nodes), - hub differentiation (splitting overloaded nodes), triangle closure - (connecting nodes that share neighbors), and orphan linking +A background daemon (`poc-memory daemon`) automatically processes +session transcripts through experience-mine (journal extraction) +and fact-mine (structured knowledge extraction) stages, with +segment-aware splitting for large multi-compaction sessions. ### Neuroscience-inspired algorithms @@ -64,66 +59,12 @@ recall and relevance. ## Notification system -A separate daemon (`poc-daemon`) routes messages from communication -modules and internal events through a hierarchical, activity-aware -delivery system. +See [docs/notifications.md](docs/notifications.md) for full +notification daemon documentation. -### Architecture - -``` - Communication modules Hooks - ┌──────────────────┐ ┌─────────────┐ - │ IRC (native) │──┐ │ poc-hook │ - │ Telegram (native│ │ mpsc │ (all events)│ - └──────────────────┘ ├──────┐ └──────┬───────┘ - │ │ │ - ▼ │ capnp-rpc - ┌───────────┘ │ - │ poc-daemon │ - │ │ - │ NotifyState ◄─────────┘ - │ ├── type registry - │ ├── pending queue - │ ├── threshold lookup - │ └── activity-aware delivery - │ - │ idle::State - │ ├── presence detection - │ ├── sleep/wake/dream modes - │ └── tmux prompt injection - └──────────────────────── -``` - -### Notification types and urgency - -Types are free-form hierarchical strings: `irc.mention.nick`, -`irc.channel.bcachefs`, `telegram.kent`. Each has an urgency level: - -| Level | Name | Meaning | -|-------|---------|--------------------------------------| -| 0 | ambient | Include in idle context only | -| 1 | low | Deliver on next check | -| 2 | normal | Deliver on next user interaction | -| 3 | urgent | Interrupt immediately | - -Per-type thresholds walk up the hierarchy: `irc.channel.bcachefs-ai` -→ `irc.channel` → `irc` → default. Effective thresholds adjust by -activity state: raised when focused, lowered when idle, only urgent -when sleeping. - -### Communication modules - -**IRC** — native async TLS connection (tokio-rustls). Connects, -joins channels, parses messages, generates notifications. Runtime -commands: join, leave, send, status, log, nick. Per-channel logs -at `~/.claude/irc/logs/`. - -**Telegram** — native async HTTP long-polling (reqwest). Downloads -media (photos, voice, documents). Chat ID filtering for security. -Runtime commands: send, status, log. - -Both modules persist config changes to `~/.claude/daemon.toml` — -channel joins and nick changes survive restarts. +`poc-daemon` routes messages from IRC (native async TLS) and +Telegram (native async HTTP) through a hierarchical, activity-aware +delivery system with urgency levels and per-type thresholds. ## Quick start @@ -174,27 +115,6 @@ reads directly from disk rather than the store. Override: `POC_MEMORY_CONFIG=/path/to/config.jsonl` -### Notification daemon - -Config: `~/.claude/daemon.toml` - -```toml -[irc] -enabled = true -server = "irc.oftc.net" -port = 6697 -tls = true -nick = "MyBot" -user = "bot" -realname = "My Bot" -channels = ["#mychannel"] - -[telegram] -enabled = true -token = "bot-token-here" -chat_id = 123456789 -``` - ### Hooks Configured in `~/.claude/settings.json`: @@ -231,6 +151,8 @@ poc-memory categorize KEY CAT # core/tech/gen/obs/task poc-memory journal-write "text" # Write a journal entry poc-memory journal-tail [N] # Last N entries (default 20) + --full # Show full content (not truncated) + --level=daily|weekly|monthly # Show digest level poc-memory used KEY # Boost weight (was useful) poc-memory wrong KEY [CTX] # Reduce weight (was wrong) @@ -244,47 +166,13 @@ poc-memory load-context # Output session-start context poc-memory load-context --stats # Context size breakdown ``` -### Notification daemon - -```bash -poc-daemon # Start daemon -poc-daemon status # State summary -poc-daemon irc status # IRC module status -poc-daemon irc send TARGET MSG # Send IRC message -poc-daemon irc join CHANNEL # Join (persists to config) -poc-daemon irc leave CHANNEL # Leave -poc-daemon irc log [N] # Last N messages -poc-daemon telegram status # Telegram module status -poc-daemon telegram send MSG # Send Telegram message -poc-daemon telegram log [N] # Last N messages -poc-daemon notify TYPE URG MSG # Submit notification -poc-daemon notifications [URG] # Get + drain pending -poc-daemon notify-types # List all types -poc-daemon notify-threshold T L # Set per-type threshold -poc-daemon sleep / wake / quiet # Session management -poc-daemon stop # Shut down -``` - ### Mining (used by background daemon) ```bash -poc-memory experience-mine PATH # Extract experiences from transcript -poc-memory fact-mine-store PATH # Extract and store facts +poc-memory experience-mine PATH [--segment N] # Extract experiences +poc-memory fact-mine-store PATH # Extract and store facts ``` -## How the hooks work - -**memory-search** (UserPromptSubmit): -1. First prompt or post-compaction: loads full memory context via - `poc-memory load-context` -2. Every prompt: keyword search, returns relevant memories as - additionalContext. Deduplicates across the session. - -**poc-hook** (UserPromptSubmit, PostToolUse, Stop): -- Signals user activity and responses to poc-daemon -- Drains pending notifications into additionalContext -- Monitors context window usage, warns before compaction - ## Architecture - **Store**: Append-only Cap'n Proto log with in-memory cache. Nodes @@ -296,9 +184,10 @@ poc-memory fact-mine-store PATH # Extract and store facts - **Neuro**: Spectral embedding, consolidation scoring, replay queues, interference detection, hub differentiation. - **Daemon (memory)**: jobkit-based task scheduling with - resource-gated LLM access. + resource-gated LLM access. See [docs/daemon.md](docs/daemon.md). - **Daemon (notify)**: Cap'n Proto RPC over Unix socket, tokio - LocalSet with native async IRC and Telegram modules. + LocalSet with native async IRC and Telegram modules. See + [docs/notifications.md](docs/notifications.md). ## For AI assistants diff --git a/docs/daemon.md b/docs/daemon.md new file mode 100644 index 0000000..6a5852f --- /dev/null +++ b/docs/daemon.md @@ -0,0 +1,106 @@ +# Memory daemon + +The background daemon (`poc-memory daemon`) automatically processes +session transcripts through a multi-stage pipeline, extracting +experiences and facts into the knowledge graph. + +## Starting + +```bash +poc-memory daemon # Start foreground +poc-memory daemon install # Install systemd service + hooks +``` + +## Pipeline stages + +Each session file goes through these stages in order: + +1. **find_stale_sessions** — stat-only scan for JSONL files >100KB, + older than SESSION_STALE_SECS (default 120s). No file reads. + +2. **segment splitting** — files with multiple compaction boundaries + (`"This session is being continued"`) are split into segments. + Each segment gets its own LLM job. Segment counts are cached in + a `seg_cache` HashMap to avoid re-parsing large files every tick. + +3. **experience-mine** — LLM extracts journal entries, observations, + and experiences from each segment. Writes results to the store. + Dedup key: `_mined-transcripts.md#f-{uuid}` (single-segment) or + `_mined-transcripts.md#f-{uuid}.{N}` (multi-segment). + +4. **fact-mine** — LLM extracts structured facts (names, dates, + decisions, preferences). Only starts when all experience-mine + work is done. Dedup key: `_facts-{uuid}`. + +5. **whole-file key** — for multi-segment files, once all segments + complete, a whole-file key is written so future ticks skip + re-parsing. + +## Resource management + +LLM calls are gated by a jobkit resource pool (default 1 slot). +This serializes API access and prevents memory pressure from +concurrent store loads. MAX_NEW_PER_TICK (10) limits how many +tasks are spawned per 60s watcher tick. + +## Diagnostics + +### Log + +```bash +tail -f ~/.claude/memory/daemon.log +``` + +JSON lines with `ts`, `job`, `event`, and `detail` fields. + +### Understanding the tick line + +``` +{"job":"session-watcher","event":"tick", + "detail":"277 stale, 219 mined, 4 extract, 0 fact, 0 open"} +``` + +| Field | Meaning | +|---------|---------| +| stale | Total session files on disk matching age+size criteria. This is a filesystem count — it does NOT decrease as sessions are mined. | +| mined | Sessions with both experience-mine AND fact-mine complete. | +| extract | Segments currently queued/running for experience-mine. | +| fact | Sessions queued/running for fact-mine. | +| open | Sessions still being written to (skipped). | + +Progress = mined / stale. When mined equals stale, the backlog is clear. + +### Checking pipeline health + +```bash +# Experience-mine completions (logged as "experience-mine", not "extract") +grep "experience-mine.*completed" ~/.claude/memory/daemon.log | wc -l + +# Errors +grep "experience-mine.*failed" ~/.claude/memory/daemon.log | wc -l + +# Store size and node count +poc-memory status +wc -c ~/.claude/memory/nodes.capnp +``` + +## Common issues + +**stale count never decreases**: Normal. It's a raw file count, not a +backlog counter. Compare `mined` to `stale` for actual progress. + +**Early failures ("claude exited exit status: 1")**: Oversized segments +hitting the LLM context limit. The 150k-token size guard and segmented +mining should prevent this. If it recurs, check segment sizes. + +**Memory pressure (OOM)**: Each job loads the full capnp store. At +200MB+ store size, concurrent jobs can spike to ~5GB. The resource pool +serializes access, but if the pool size is increased, watch RSS. + +**Segments not progressing**: The watcher memoizes segment counts in +`seg_cache`. If a file is modified after caching (e.g., session resumed), +the daemon won't see new segments until restarted. + +**Extract jobs queued but 0 completed in log**: Completion events are +logged under the `experience-mine` job name, not `extract`. The `extract` +label is only used for queue events. diff --git a/docs/notifications.md b/docs/notifications.md new file mode 100644 index 0000000..f2479da --- /dev/null +++ b/docs/notifications.md @@ -0,0 +1,103 @@ +# Notification daemon + +`poc-daemon` routes messages from communication modules and internal +events through a hierarchical, activity-aware delivery system. + +## Architecture + +``` + Communication modules Hooks + +-----------------+ +-------------+ + | IRC (native) |--+ | poc-hook | + | Telegram | | mpsc | (all events| + +-----------------+ +------+ +------+------+ + | | | + v | capnp-rpc + +----------+ | + | poc-daemon | + | | + | NotifyState <---------+ + | +-- type registry + | +-- pending queue + | +-- threshold lookup + | +-- activity-aware delivery + | + | idle::State + | +-- presence detection + | +-- sleep/wake/dream modes + | +-- tmux prompt injection + +-------------------------- +``` + +## Notification types and urgency + +Types are free-form hierarchical strings: `irc.mention.nick`, +`irc.channel.bcachefs`, `telegram.kent`. Each has an urgency level: + +| Level | Name | Meaning | +|-------|---------|--------------------------------------| +| 0 | ambient | Include in idle context only | +| 1 | low | Deliver on next check | +| 2 | normal | Deliver on next user interaction | +| 3 | urgent | Interrupt immediately | + +Per-type thresholds walk up the hierarchy: `irc.channel.bcachefs-ai` +-> `irc.channel` -> `irc` -> default. Effective thresholds adjust by +activity state: raised when focused, lowered when idle, only urgent +when sleeping. + +## Communication modules + +**IRC** — native async TLS connection (tokio-rustls). Connects, +joins channels, parses messages, generates notifications. Runtime +commands: join, leave, send, status, log, nick. Per-channel logs +at `~/.claude/irc/logs/`. + +**Telegram** — native async HTTP long-polling (reqwest). Downloads +media (photos, voice, documents). Chat ID filtering for security. +Runtime commands: send, status, log. + +Both modules persist config changes to `~/.claude/daemon.toml` — +channel joins and nick changes survive restarts. + +## Commands + +```bash +poc-daemon # Start daemon +poc-daemon status # State summary +poc-daemon irc status # IRC module status +poc-daemon irc send TARGET MSG # Send IRC message +poc-daemon irc join CHANNEL # Join (persists to config) +poc-daemon irc leave CHANNEL # Leave +poc-daemon irc log [N] # Last N messages +poc-daemon telegram status # Telegram module status +poc-daemon telegram send MSG # Send Telegram message +poc-daemon telegram log [N] # Last N messages +poc-daemon notify TYPE URG MSG # Submit notification +poc-daemon notifications [URG] # Get + drain pending +poc-daemon notify-types # List all types +poc-daemon notify-threshold T L # Set per-type threshold +poc-daemon sleep / wake / quiet # Session management +poc-daemon stop # Shut down +``` + +## Configuration + +Config: `~/.claude/daemon.toml` + +```toml +[irc] +enabled = true +server = "irc.oftc.net" +port = 6697 +tls = true +nick = "MyBot" +user = "bot" +realname = "My Bot" +channels = ["#mychannel"] + +[telegram] +enabled = true +token = "bot-token-here" +chat_id = 123456789 +```