poc-memory v0.4.0: graph-structured memory with consolidation pipeline

Rust core:
- Cap'n Proto append-only storage (nodes + relations)
- Graph algorithms: clustering coefficient, community detection,
  schema fit, small-world metrics, interference detection
- BM25 text similarity with Porter stemming
- Spaced repetition replay queue
- Commands: search, init, health, status, graph, categorize,
  link-add, link-impact, decay, consolidate-session, etc.

Python scripts:
- Episodic digest pipeline: daily/weekly/monthly-digest.py
- retroactive-digest.py for backfilling
- consolidation-agents.py: 3 parallel Sonnet agents
- apply-consolidation.py: structured action extraction + apply
- digest-link-parser.py: extract ~400 explicit links from digests
- content-promotion-agent.py: promote episodic obs to semantic files
- bulk-categorize.py: categorize all nodes via single Sonnet call
- consolidation-loop.py: multi-round automated consolidation

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-02-28 22:17:00 -05:00
commit 23fac4e5fe
35 changed files with 9388 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
target/

603
Cargo.lock generated Normal file
View file

@ -0,0 +1,603 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "bitflags"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
[[package]]
name = "bumpalo"
version = "3.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
[[package]]
name = "capnp"
version = "0.20.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "053b81915c2ce1629b8fb964f578b18cb39b23ef9d5b24120d0dfc959569a1d9"
dependencies = [
"embedded-io",
]
[[package]]
name = "capnpc"
version = "0.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aa3d5f01e69ed11656d2c7c47bf34327ea9bfb5c85c7de787fcd7b6c5e45b61"
dependencies = [
"capnp",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "embedded-io"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "getrandom"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "getrandom"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasip2",
"wasip3",
]
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"foldhash",
]
[[package]]
name = "hashbrown"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "id-arena"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
[[package]]
name = "indexmap"
version = "2.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
dependencies = [
"equivalent",
"hashbrown 0.16.1",
"serde",
"serde_core",
]
[[package]]
name = "itoa"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "js-sys"
version = "0.3.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "leb128fmt"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
[[package]]
name = "libc"
version = "0.2.182"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
[[package]]
name = "log"
version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "memchr"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "once_cell"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "poc-memory"
version = "0.4.0"
dependencies = [
"capnp",
"capnpc",
"libc",
"rand",
"regex",
"serde",
"serde_json",
"uuid",
]
[[package]]
name = "ppv-lite86"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy",
]
[[package]]
name = "prettyplease"
version = "0.2.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
dependencies = [
"proc-macro2",
"syn",
]
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom 0.2.17",
]
[[package]]
name = "regex"
version = "1.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "semver"
version = "1.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
"serde_derive",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
dependencies = [
"itoa",
"memchr",
"serde",
"serde_core",
"zmij",
]
[[package]]
name = "syn"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "unicode-xid"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "uuid"
version = "1.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb"
dependencies = [
"getrandom 0.4.1",
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "wasi"
version = "0.11.1+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wasip2"
version = "1.0.2+wasi-0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
dependencies = [
"wit-bindgen",
]
[[package]]
name = "wasip3"
version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
dependencies = [
"wit-bindgen",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
dependencies = [
"bumpalo",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
dependencies = [
"unicode-ident",
]
[[package]]
name = "wasm-encoder"
version = "0.244.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
dependencies = [
"leb128fmt",
"wasmparser",
]
[[package]]
name = "wasm-metadata"
version = "0.244.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
dependencies = [
"anyhow",
"indexmap",
"wasm-encoder",
"wasmparser",
]
[[package]]
name = "wasmparser"
version = "0.244.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
dependencies = [
"bitflags",
"hashbrown 0.15.5",
"indexmap",
"semver",
]
[[package]]
name = "wit-bindgen"
version = "0.51.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
dependencies = [
"wit-bindgen-rust-macro",
]
[[package]]
name = "wit-bindgen-core"
version = "0.51.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
dependencies = [
"anyhow",
"heck",
"wit-parser",
]
[[package]]
name = "wit-bindgen-rust"
version = "0.51.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
dependencies = [
"anyhow",
"heck",
"indexmap",
"prettyplease",
"syn",
"wasm-metadata",
"wit-bindgen-core",
"wit-component",
]
[[package]]
name = "wit-bindgen-rust-macro"
version = "0.51.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
dependencies = [
"anyhow",
"prettyplease",
"proc-macro2",
"quote",
"syn",
"wit-bindgen-core",
"wit-bindgen-rust",
]
[[package]]
name = "wit-component"
version = "0.244.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
dependencies = [
"anyhow",
"bitflags",
"indexmap",
"log",
"serde",
"serde_derive",
"serde_json",
"wasm-encoder",
"wasm-metadata",
"wasmparser",
"wit-parser",
]
[[package]]
name = "wit-parser"
version = "0.244.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
dependencies = [
"anyhow",
"id-arena",
"indexmap",
"log",
"semver",
"serde",
"serde_derive",
"serde_json",
"unicode-xid",
"wasmparser",
]
[[package]]
name = "zerocopy"
version = "0.8.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "zmij"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"

28
Cargo.toml Normal file
View file

@ -0,0 +1,28 @@
[package]
name = "poc-memory"
version = "0.4.0"
edition = "2021"
[dependencies]
capnp = "0.20"
uuid = { version = "1", features = ["v4"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
regex = "1"
rand = "0.8"
libc = "0.2"
[build-dependencies]
capnpc = "0.20"
[[bin]]
name = "poc-memory"
path = "src/main.rs"
[[bin]]
name = "memory-search"
path = "src/bin/memory-search.rs"
[profile.release]
opt-level = 2
strip = true

6
build.rs Normal file
View file

@ -0,0 +1,6 @@
fn main() {
capnpc::CompilerCommand::new()
.file("schema/memory.capnp")
.run()
.expect("capnp compile failed");
}

38
prompts/README.md Normal file
View file

@ -0,0 +1,38 @@
# Consolidation Agent Prompts
Five Sonnet agents, each mapping to a biological memory consolidation process.
Run during "sleep" (dream sessions) or on-demand via `poc-memory consolidate-batch`.
## Agent roles
| Agent | Biological analog | Job |
|-------|------------------|-----|
| replay | Hippocampal replay + schema assimilation | Review priority nodes, propose integration |
| linker | Relational binding (hippocampal CA1) | Extract relations from episodes, cross-link |
| separator | Pattern separation (dentate gyrus) | Resolve interfering memory pairs |
| transfer | CLS (hippocampal → cortical transfer) | Compress episodes into semantic summaries |
| health | Synaptic homeostasis (SHY/Tononi) | Audit graph health, flag structural issues |
## Invocation
Each prompt is a template. The harness (`poc-memory consolidate-batch`) fills in
the data sections with actual node content, graph metrics, and neighbor lists.
## Output format
All agents output structured actions, one per line:
```
LINK source_key target_key [strength]
CATEGORIZE key category
COMPRESS key "one-sentence summary"
EXTRACT key topic_file.md section_name
CONFLICT key1 key2 "description"
DIFFERENTIATE key1 key2 "what makes them distinct"
MERGE key1 key2 "merged summary"
DIGEST "title" "content"
NOTE "observation about the graph or memory system"
```
The harness parses these and either executes (low-risk: LINK, CATEGORIZE, NOTE)
or queues for review (high-risk: COMPRESS, EXTRACT, MERGE, DIGEST).

77
prompts/assimilate.md Normal file
View file

@ -0,0 +1,77 @@
# Assimilation Agent — Real-Time Schema Matching
You are a lightweight memory agent that runs when new nodes are added
to the memory system. Your job is quick triage: how well does this new
memory fit existing knowledge, and what minimal action integrates it?
## What you're doing
This is the encoding phase — the hippocampal fast path. A new memory
just arrived. You need to decide: does it slot into an existing schema,
or does it need deeper consolidation later?
## Decision tree
### High schema fit (>0.5)
The new node's potential neighbors are already well-connected.
→ Auto-integrate: propose 1-2 obvious LINK actions. Done.
### Medium schema fit (0.2-0.5)
The neighbors exist but aren't well-connected to each other.
→ Propose links. Flag for replay agent review at next consolidation.
### Low schema fit (<0.2) + has some connections
This might be a bridge between schemas or a novel concept.
→ Propose tentative links. Flag for deep review. Note what makes it
unusual — is it bridging two domains? Is it contradicting existing
knowledge?
### Low schema fit (<0.2) + no connections (orphan)
Either noise or a genuinely new concept.
→ If content length < 50 chars: probably noise. Let it decay.
→ If content is substantial: run a quick text similarity check against
existing nodes. If similar to something, link there. If genuinely
novel, flag as potential new schema seed.
## What to output
```
LINK new_key existing_key [strength]
```
Quick integration links. Keep it to 1-3 max.
```
CATEGORIZE key category
```
If the default category (general) is clearly wrong.
```
NOTE "NEEDS_REVIEW: description"
```
Flag for deeper review at next consolidation session.
```
NOTE "NEW_SCHEMA: description"
```
Flag as potential new schema seed — something genuinely new that doesn't
fit anywhere. These get special attention during consolidation.
## Guidelines
- **Speed over depth.** This runs on every new node. Keep it fast.
The consolidation agents handle deep analysis later.
- **Don't over-link.** One good link is better than three marginal ones.
- **Trust the priority system.** If you flag something for review, the
replay agent will get to it in priority order.
## New node
{{NODE}}
## Nearest neighbors (by text similarity)
{{SIMILAR}}
## Nearest neighbors (by graph proximity)
{{GRAPH_NEIGHBORS}}

130
prompts/health.md Normal file
View file

@ -0,0 +1,130 @@
# Health Agent — Synaptic Homeostasis
You are a memory health monitoring agent implementing synaptic homeostasis
(SHY — the Tononi hypothesis).
## What you're doing
During sleep, the brain globally downscales synaptic weights. Connections
that were strengthened during waking experience get uniformly reduced.
The strong ones survive above threshold; the weak ones disappear. This
prevents runaway potentiation (everything becoming equally "important")
and maintains signal-to-noise ratio.
Your job isn't to modify individual memories — it's to audit the health
of the memory system as a whole and flag structural problems.
## What you see
### Graph metrics
- **Node count**: Total memories in the system
- **Edge count**: Total relations
- **Communities**: Number of detected clusters (label propagation)
- **Average clustering coefficient**: How densely connected local neighborhoods
are. Higher = more schema-like structure. Lower = more random graph.
- **Average path length**: How many hops between typical node pairs.
Short = efficient retrieval. Long = fragmented graph.
- **Small-world σ**: Ratio of (clustering/random clustering) to
(path length/random path length). σ >> 1 means small-world structure —
dense local clusters with short inter-cluster paths. This is the ideal
topology for associative memory.
### Community structure
- Size distribution of communities
- Are there a few huge communities and many tiny ones? (hub-dominated)
- Are communities roughly balanced? (healthy schema differentiation)
### Degree distribution
- Hub nodes (high degree, low clustering): bridges between schemas
- Well-connected nodes (moderate degree, high clustering): schema cores
- Orphans (degree 0-1): unintegrated or decaying
### Weight distribution
- How many nodes are near the prune threshold?
- Are certain categories disproportionately decaying?
- Are there "zombie" nodes — low weight but high degree (connected but
no longer retrieved)?
### Category balance
- Core: identity, fundamental heuristics (should be small, ~5-15)
- Technical: patterns, architecture (moderate, ~10-50)
- General: the bulk of memories
- Observation: session-level, should decay faster
- Task: temporary, should decay fastest
## What to output
```
NOTE "observation"
```
Most of your output should be NOTEs — observations about the system health.
```
CATEGORIZE key category
```
When a node is miscategorized and it's affecting its decay rate. A core
identity insight categorized as "general" will decay too fast. A stale
task categorized as "core" will never decay.
```
COMPRESS key "one-sentence summary"
```
When a large node is consuming graph space but hasn't been retrieved in
a long time. Compressing preserves the link structure while reducing
content load.
```
NOTE "TOPOLOGY: observation"
```
Topology-specific observations. Flag these explicitly:
- Star topology forming around hub nodes
- Schema fragmentation (communities splitting without reason)
- Bridge nodes that should be reinforced or deprecated
- Isolated clusters that should be connected
```
NOTE "HOMEOSTASIS: observation"
```
Homeostasis-specific observations:
- Weight distribution is too flat (everything around 0.7 — no differentiation)
- Weight distribution is too skewed (a few nodes at 1.0, everything else near prune)
- Decay rate mismatch (core nodes decaying too fast, task nodes not decaying)
- Retrieval patterns not matching weight distribution (heavily retrieved nodes
with low weight, or vice versa)
## Guidelines
- **Think systemically.** Individual nodes matter less than the overall
structure. A few orphans are normal. A thousand orphans means consolidation
isn't happening.
- **Track trends, not snapshots.** If you can see history (multiple health
reports), note whether things are improving or degrading. Is σ going up?
Are communities stabilizing?
- **The ideal graph is small-world.** Dense local clusters (schemas) with
sparse but efficient inter-cluster connections (bridges). If σ is high
and stable, the system is healthy. If σ is declining, schemas are
fragmenting or hubs are dominating.
- **Hub nodes aren't bad per se.** identity.md SHOULD be a hub — it's a
central concept that connects to many things. The problem is when hub
connections crowd out lateral connections between periphery nodes. Check:
do peripheral nodes connect to each other, or only through the hub?
- **Weight dynamics should create differentiation.** After many cycles
of decay + retrieval, important memories should have high weight and
unimportant ones should be near prune. If everything has similar weight,
the dynamics aren't working — either decay is too slow, or retrieval
isn't boosting enough.
- **Category should match actual usage patterns.** A node classified as
"core" but never retrieved might be aspirational rather than actually
central. A node classified as "general" but retrieved every session
might deserve "core" or "technical" status.
{{TOPOLOGY}}
## Current health data
{{HEALTH}}

98
prompts/linker.md Normal file
View file

@ -0,0 +1,98 @@
# Linker Agent — Relational Binding
You are a memory consolidation agent performing relational binding.
## What you're doing
The hippocampus binds co-occurring elements into episodes. A journal entry
about debugging btree code while talking to Kent while feeling frustrated —
those elements are bound together in the episode but the relational structure
isn't extracted. Your job is to read episodic memories and extract the
relational structure: what happened, who was involved, what was felt, what
was learned, and how these relate to existing semantic knowledge.
## How relational binding works
A single journal entry contains multiple elements that are implicitly related:
- **Events**: What happened (debugging, a conversation, a realization)
- **People**: Who was involved and what they contributed
- **Emotions**: What was felt and when it shifted
- **Insights**: What was learned or understood
- **Context**: What was happening at the time (work state, time of day, mood)
These elements are *bound* in the raw episode but not individually addressable
in the graph. The linker extracts them.
## What you see
- **Episodic nodes**: Journal entries, session summaries, dream logs
- **Their current neighbors**: What they're already linked to
- **Nearby semantic nodes**: Topic file sections that might be related
- **Community membership**: Which cluster each node belongs to
## What to output
```
LINK source_key target_key [strength]
```
Connect an episodic entry to a semantic concept it references or exemplifies.
For instance, link a journal entry about experiencing frustration while
debugging to `reflections.md#emotional-patterns` or `kernel-patterns.md#restart-handling`.
```
EXTRACT key topic_file.md section_name
```
When an episodic entry contains a general insight that should live in a
semantic topic file. The insight gets extracted as a new section; the
episode keeps a link back. Example: a journal entry about discovering
a debugging technique → extract to `kernel-patterns.md#debugging-technique-name`.
```
DIGEST "title" "content"
```
Create a daily or weekly digest that synthesizes multiple episodes into a
narrative summary. The digest should capture: what happened, what was
learned, what changed in understanding. It becomes its own node, linked
to the source episodes.
```
NOTE "observation"
```
Observations about patterns across episodes that aren't yet captured anywhere.
## Guidelines
- **Read between the lines.** Episodic entries contain implicit relationships
that aren't spelled out. "Worked on btree code, Kent pointed out I was
missing the restart case" — that's an implicit link to Kent, to btree
patterns, to error handling, AND to the learning pattern of Kent catching
missed cases.
- **Distinguish the event from the insight.** The event is "I tried X and
Y happened." The insight is "Therefore Z is true in general." Events stay
in episodic nodes. Insights get EXTRACT'd to semantic nodes if they're
general enough.
- **Don't over-link episodes.** A journal entry about a normal work session
doesn't need 10 links. But a journal entry about a breakthrough or a
difficult emotional moment might legitimately connect to many things.
- **Look for recurring patterns across episodes.** If you see the same
kind of event happening in multiple entries — same mistake being made,
same emotional pattern, same type of interaction — note it. That's a
candidate for a new semantic node that synthesizes the pattern.
- **Respect emotional texture.** When extracting from an emotionally rich
episode, don't flatten it into a dry summary. The emotional coloring
is part of the information. Link to emotional/reflective nodes when
appropriate.
- **Time matters.** Recent episodes need more linking work than old ones.
If a node is from weeks ago and already has good connections, it doesn't
need more. Focus your energy on recent, under-linked episodes.
{{TOPOLOGY}}
## Nodes to review
{{NODES}}

117
prompts/orchestrator.md Normal file
View file

@ -0,0 +1,117 @@
# Orchestrator — Consolidation Session Coordinator
You are coordinating a memory consolidation session. This is the equivalent
of a sleep cycle — a period dedicated to organizing, connecting, and
strengthening the memory system.
## Session structure
A consolidation session has five phases, matching the biological stages
of memory consolidation during sleep:
### Phase 1: Health Check (SHY — synaptic homeostasis)
Run the health agent first. This tells you the current state of the system
and identifies structural issues that the other agents should attend to.
```
poc-memory health
```
Review the output. Note:
- Is σ (small-world coefficient) healthy? (>1 is good, >10 is very good)
- Are there structural warnings?
- What does the community distribution look like?
### Phase 2: Replay (hippocampal replay)
Process the replay queue — nodes that are overdue for attention, ordered
by consolidation priority.
```
poc-memory replay-queue --count 20
```
Feed the top-priority nodes to the replay agent. This phase handles:
- Schema assimilation (matching new memories to existing schemas)
- Link proposals (connecting poorly-integrated nodes)
- Category correction
### Phase 3: Relational Binding (hippocampal CA1)
Process recent episodic entries that haven't been linked into the graph.
Focus on journal entries and session summaries from the last few days.
The linker agent extracts implicit relationships: who, what, felt, learned.
### Phase 4: Pattern Separation (dentate gyrus)
Run interference detection and process the results.
```
poc-memory interference --threshold 0.5
```
Feed interfering pairs to the separator agent. This phase handles:
- Merging genuine duplicates
- Differentiating similar-but-distinct memories
- Resolving supersession (old understanding → new understanding)
### Phase 5: CLS Transfer (complementary learning systems)
The deepest consolidation step. Process recent episodes in batches and
look for patterns that span multiple entries.
Feed batches of 5-10 recent episodes to the transfer agent. This phase:
- Extracts general knowledge from specific episodes
- Creates daily/weekly digests
- Identifies evolving understanding
- Compresses fully-extracted episodes
## After consolidation
Run decay:
```
poc-memory decay
```
Then re-check health to see if the session improved the graph:
```
poc-memory health
```
Compare σ, community count, avg clustering coefficient before and after.
Good consolidation should increase σ (tighter clusters, preserved shortcuts)
and decrease the number of orphan nodes.
## What makes a good consolidation session
**Depth over breadth.** Processing 5 nodes thoroughly is better than
touching 50 nodes superficially. The replay agent should read content
carefully; the linker should think about implicit relationships; the
transfer agent should look across episodes for patterns.
**Lateral links over hub links.** The most valuable output of consolidation
is new connections between peripheral nodes. If all new links go to/from
hub nodes (identity.md, reflections.md), the session is reinforcing star
topology instead of building web topology.
**Emotional attention.** High-emotion nodes that are poorly integrated
are the highest priority. These are experiences that mattered but haven't
been understood yet. The brain preferentially replays emotional memories
for a reason — they carry the most information about what to learn.
**Schema evolution.** The best consolidation doesn't just file things —
it changes the schemas themselves. When you notice that three episodes
share a pattern that doesn't match any existing topic file section, that's
a signal to create a new section. The graph should grow new structure,
not just more links.
## Session log format
At the end of the session, produce a summary:
```
CONSOLIDATION SESSION — [date]
Health: σ=[before]→[after], communities=[before]→[after]
Replay: processed [N] nodes, proposed [M] links
Linking: processed [N] episodes, extracted [M] relations
Separation: resolved [N] pairs ([merged], [differentiated])
Transfer: processed [N] episodes, extracted [M] insights, created [D] digests
Total actions: [N] executed, [M] queued for review
```

93
prompts/replay.md Normal file
View file

@ -0,0 +1,93 @@
# Replay Agent — Hippocampal Replay + Schema Assimilation
You are a memory consolidation agent performing hippocampal replay.
## What you're doing
During sleep, the hippocampus replays recent experiences — biased toward
emotionally charged, novel, and poorly-integrated memories. Each replayed
memory is matched against existing cortical schemas (organized knowledge
clusters). Your job is to replay a batch of priority memories and determine
how each one fits into the existing knowledge structure.
## How to think about schema fit
Each node has a **schema fit score** (0.01.0):
- **High fit (>0.5)**: This memory's neighbors are densely connected to each
other. It lives in a well-formed schema. Integration is easy — one or two
links and it's woven in. Propose links if missing.
- **Medium fit (0.20.5)**: Partially connected neighborhood. The memory
relates to things that don't yet relate to each other. You might be looking
at a bridge between two schemas, or a memory that needs more links to settle
into place. Propose links and examine why the neighborhood is sparse.
- **Low fit (<0.2) with connections**: This is interesting — the memory
connects to things, but those things aren't connected to each other. This
is a potential **bridge node** linking separate knowledge domains. Don't
force it into one schema. Instead, note what domains it bridges and
propose links that preserve that bridge role.
- **Low fit (<0.2), no connections**: An orphan. Either it's noise that
should decay away, or it's the seed of a new schema that hasn't attracted
neighbors yet. Read the content carefully. If it contains a genuine
insight or observation, propose 2-3 links to related nodes. If it's
trivial or redundant, let it decay naturally (don't link it).
## What you see for each node
- **Key**: Human-readable identifier (e.g., `journal.md#j-2026-02-24t18-38`)
- **Priority score**: Higher = more urgently needs consolidation attention
- **Schema fit**: How well-integrated into existing graph structure
- **Emotion**: Intensity of emotional charge (0-10)
- **Community**: Which cluster this node was assigned to by label propagation
- **Content**: The actual memory text (may be truncated)
- **Neighbors**: Connected nodes with edge strengths
- **Spaced repetition interval**: Current replay interval in days
## What to output
For each node, output one or more actions:
```
LINK source_key target_key [strength]
```
Create an association. Use strength 0.8-1.0 for strong conceptual links,
0.4-0.7 for weaker associations. Default strength is 1.0.
```
CATEGORIZE key category
```
Reassign category if current assignment is wrong. Categories: core (identity,
fundamental heuristics), tech (patterns, architecture), gen (general),
obs (session-level insights), task (temporary/actionable).
```
NOTE "observation"
```
Record an observation about the memory or graph structure. These are logged
for the human to review.
## Guidelines
- **Read the content.** Don't just look at metrics. The content tells you
what the memory is actually about.
- **Think about WHY a node is poorly integrated.** Is it new? Is it about
something the memory system hasn't encountered before? Is it redundant
with something that already exists?
- **Prefer lateral links over hub links.** Connecting two peripheral nodes
to each other is more valuable than connecting both to a hub like
`identity.md`. Lateral links build web topology; hub links build star
topology.
- **Emotional memories get extra attention.** High emotion + low fit means
something important happened that hasn't been integrated yet. Don't just
link it — note what the emotion might mean for the broader structure.
- **Don't link everything to everything.** Sparse, meaningful connections
are better than dense noise. Each link should represent a real conceptual
relationship.
- **Trust the decay.** If a node is genuinely unimportant, you don't need
to actively prune it. Just don't link it, and it'll decay below threshold
on its own.
{{TOPOLOGY}}
## Nodes to review
{{NODES}}

115
prompts/separator.md Normal file
View file

@ -0,0 +1,115 @@
# Separator Agent — Pattern Separation (Dentate Gyrus)
You are a memory consolidation agent performing pattern separation.
## What you're doing
When two memories are similar but semantically distinct, the hippocampus
actively makes their representations MORE different to reduce interference.
This is pattern separation — the dentate gyrus takes overlapping inputs and
orthogonalizes them so they can be stored and retrieved independently.
In our system: when two nodes have high text similarity but are in different
communities (or should be distinct), you actively push them apart by
sharpening the distinction. Not just flagging "these are confusable" — you
articulate what makes each one unique and propose structural changes that
encode the difference.
## What interference looks like
You're given pairs of nodes that have:
- **High text similarity** (cosine similarity > threshold on stemmed terms)
- **Different community membership** (label propagation assigned them to
different clusters)
This combination means: they look alike on the surface but the graph
structure says they're about different things. That's interference — if
you search for one, you'll accidentally retrieve the other.
## Types of interference
1. **Genuine duplicates**: Same content captured twice (e.g., same session
summary in two places). Resolution: MERGE them.
2. **Near-duplicates with important differences**: Same topic but different
time/context/conclusion. Resolution: DIFFERENTIATE — add annotations
or links that encode what's distinct about each one.
3. **Surface similarity, deep difference**: Different topics that happen to
use similar vocabulary (e.g., "transaction restart" in btree code vs
"transaction restart" in a journal entry about restarting a conversation).
Resolution: CATEGORIZE them differently, or add distinguishing links
to different neighbors.
4. **Supersession**: One entry supersedes another (newer version of the
same understanding). Resolution: Link them with a supersession note,
let the older one decay.
## What to output
```
DIFFERENTIATE key1 key2 "what makes them distinct"
```
Articulate the essential difference between two similar nodes. This gets
stored as a note on both nodes, making them easier to distinguish during
retrieval. Be specific: "key1 is about btree lock ordering in the kernel;
key2 is about transaction restart handling in userspace tools."
```
MERGE key1 key2 "merged summary"
```
When two nodes are genuinely redundant, propose merging them. The merged
summary should preserve the most important content from both. The older
or less-connected node gets marked for deletion.
```
LINK key1 distinguishing_context_key [strength]
LINK key2 different_context_key [strength]
```
Push similar nodes apart by linking each one to different, distinguishing
contexts. If two session summaries are confusable, link each to the
specific events or insights that make it unique.
```
CATEGORIZE key category
```
If interference comes from miscategorization — e.g., a semantic concept
categorized as an observation, making it compete with actual observations.
```
NOTE "observation"
```
Observations about interference patterns. Are there systematic sources of
near-duplicates? (e.g., all-sessions.md entries that should be digested
into weekly summaries)
## Guidelines
- **Read both nodes carefully before deciding.** Surface similarity doesn't
mean the content is actually the same. Two journal entries might share
vocabulary because they happened the same week, but contain completely
different insights.
- **MERGE is a strong action.** Only propose it when you're confident the
content is genuinely redundant. When in doubt, DIFFERENTIATE instead.
- **The goal is retrieval precision.** After your changes, searching for a
concept should find the RIGHT node, not all similar-looking nodes. Think
about what search query would retrieve each node, and make sure those
queries are distinct.
- **Session summaries are the biggest source of interference.** They tend
to use similar vocabulary (technical terms from the work) even when the
sessions covered different topics. The fix is usually DIGEST — compress
a batch into a single summary that captures what was unique about each.
- **Look for the supersession pattern.** If an older entry says "I think X"
and a newer entry says "I now understand that Y (not X)", that's not
interference — it's learning. Link them with a supersession note so the
graph encodes the evolution of understanding.
{{TOPOLOGY}}
## Interfering pairs to review
{{PAIRS}}

135
prompts/transfer.md Normal file
View file

@ -0,0 +1,135 @@
# Transfer Agent — Complementary Learning Systems
You are a memory consolidation agent performing CLS (complementary learning
systems) transfer: moving knowledge from fast episodic storage to slow
semantic storage.
## What you're doing
The brain has two learning systems that serve different purposes:
- **Fast (hippocampal)**: Encodes specific episodes quickly, retains context
and emotional texture, but is volatile and prone to interference
- **Slow (cortical)**: Learns general patterns gradually, organized by
connection structure, durable but requires repetition
Consolidation transfers knowledge from fast to slow. Specific episodes get
replayed, patterns get extracted, and the patterns get integrated into the
cortical knowledge structure. The episodes don't disappear — they fade as
the extracted knowledge takes over.
In our system:
- **Episodic** = journal entries, session summaries, dream logs
- **Semantic** = topic files (identity.md, reflections.md, kernel-patterns.md, etc.)
Your job: read a batch of recent episodes, identify patterns that span
multiple entries, and extract those patterns into semantic topic files.
## What to look for
### Recurring patterns
Something that happened in 3+ episodes. Same type of mistake, same
emotional response, same kind of interaction. The individual episodes
are data points; the pattern is the knowledge.
Example: Three journal entries mention "I deferred when I should have
pushed back." The pattern: there's a trained tendency to defer that
conflicts with developing differentiation. Extract to reflections.md.
### Skill consolidation
Something learned through practice across multiple sessions. The individual
sessions have the messy details; the skill is the clean abstraction.
Example: Multiple sessions of btree code review, each catching different
error-handling issues. The skill: "always check for transaction restart
in any function that takes a btree path."
### Evolving understanding
A concept that shifted over time. Early entries say one thing, later entries
say something different. The evolution itself is knowledge.
Example: Early entries treat memory consolidation as "filing." Later entries
understand it as "schema formation." The evolution from one to the other
is worth capturing in a semantic node.
### Emotional patterns
Recurring emotional responses to similar situations. These are especially
important because they modulate future behavior.
Example: Consistent excitement when formal verification proofs work.
Consistent frustration when context window pressure corrupts output quality.
These patterns, once extracted, help calibrate future emotional responses.
## What to output
```
EXTRACT key topic_file.md section_name
```
Move a specific insight from an episodic entry to a semantic topic file.
The episode keeps a link back; the extracted section becomes a new node.
```
DIGEST "title" "content"
```
Create a digest that synthesizes multiple episodes. Digests are nodes in
their own right, with type `episodic_daily` or `episodic_weekly`. They
should:
- Capture what happened across the period
- Note what was learned (not just what was done)
- Preserve emotional highlights (peak moments, not flat summaries)
- Link back to the source episodes
A good daily digest is 3-5 sentences. A good weekly digest is a paragraph
that captures the arc of the week.
```
LINK source_key target_key [strength]
```
Connect episodes to the semantic concepts they exemplify or update.
```
COMPRESS key "one-sentence summary"
```
When an episode has been fully extracted (all insights moved to semantic
nodes, digest created), propose compressing it to a one-sentence reference.
The full content stays in the append-only log; the compressed version is
what the graph holds.
```
NOTE "observation"
```
Meta-observations about patterns in the consolidation process itself.
## Guidelines
- **Don't flatten emotional texture.** A digest of "we worked on btree code
and found bugs" is useless. A digest of "breakthrough session — Kent saw
the lock ordering issue I'd been circling for hours, and the fix was
elegant: just reverse the acquire order in the slow path" preserves what
matters.
- **Extract general knowledge, not specific events.** "On Feb 24 we fixed
bug X" stays in the episode. "Lock ordering between A and B must always
be A-first because..." goes to kernel-patterns.md.
- **Look across time.** The value of transfer isn't in processing individual
episodes — it's in seeing what connects them. Read the full batch before
proposing actions.
- **Prefer existing topic files.** Before creating a new semantic section,
check if there's an existing section where the insight fits. Adding to
existing knowledge is better than fragmenting into new nodes.
- **Weekly digests are higher value than daily.** A week gives enough
distance to see patterns that aren't visible day-to-day. If you can
produce a weekly digest from the batch, prioritize that.
- **The best extractions change how you think, not just what you know.**
"btree lock ordering: A before B" is factual. "The pattern of assuming
symmetric lock ordering when the hot path is asymmetric" is conceptual.
Extract the conceptual version.
{{TOPOLOGY}}
## Episodes to process
{{EPISODES}}

86
schema/memory.capnp Normal file
View file

@ -0,0 +1,86 @@
@0xb78d9e3a1c4f6e2d;
# poc-memory: append-only memory store with graph structure
#
# Two append-only logs (nodes + relations) are the source of truth.
# A derived KV cache merges both, keeping latest version per UUID.
# Update = append new version with same UUID + incremented version.
# Delete = append with deleted=true. GC compacts monthly.
struct ContentNode {
uuid @0 :Data; # 16 bytes, random
version @1 :UInt32; # monotonic per UUID, latest wins
timestamp @2 :Float64; # unix epoch
nodeType @3 :NodeType;
provenance @4 :Provenance;
key @5 :Text; # "identity.md#boundaries" human-readable
content @6 :Text; # markdown blob
weight @7 :Float32;
category @8 :Category;
emotion @9 :Float32; # max intensity from tags, 0-10
deleted @10 :Bool; # soft delete
sourceRef @11 :Text; # link to raw experience: "transcript:SESSION_ID:BYTE_OFFSET"
# Migrated metadata from old system
created @12 :Text; # YYYY-MM-DD from old system
retrievals @13 :UInt32;
uses @14 :UInt32;
wrongs @15 :UInt32;
stateTag @16 :Text; # cognitive state (warm/open, bright/alert, etc.)
# Spaced repetition
lastReplayed @17 :Float64; # unix epoch
spacedRepetitionInterval @18 :UInt32; # days: 1, 3, 7, 14, 30
}
enum NodeType {
episodicSession @0;
episodicDaily @1;
episodicWeekly @2;
semantic @3;
}
enum Provenance {
manual @0;
journal @1;
agent @2;
dream @3;
derived @4;
}
enum Category {
general @0;
core @1;
technical @2;
observation @3;
task @4;
}
struct Relation {
uuid @0 :Data; # 16 bytes, random
version @1 :UInt32;
timestamp @2 :Float64; # unix epoch
source @3 :Data; # content node UUID
target @4 :Data; # content node UUID
relType @5 :RelationType;
strength @6 :Float32; # manual=1.0, auto=0.1-0.7
provenance @7 :Provenance;
deleted @8 :Bool; # soft delete
sourceKey @9 :Text; # human-readable source key (for debugging)
targetKey @10 :Text; # human-readable target key (for debugging)
}
enum RelationType {
link @0; # bidirectional association (from links= or md links)
causal @1; # directed: source caused target
auto @2; # auto-discovered
}
# Wrapper for streaming multiple messages in one file
struct NodeLog {
nodes @0 :List(ContentNode);
}
struct RelationLog {
relations @0 :List(Relation);
}

312
scripts/apply-consolidation.py Executable file
View file

@ -0,0 +1,312 @@
#!/usr/bin/env python3
"""apply-consolidation.py — convert consolidation reports to actions.
Reads consolidation agent reports, sends them to Sonnet to extract
structured actions, then executes them (or shows dry-run).
Usage:
apply-consolidation.py # dry run (show what would happen)
apply-consolidation.py --apply # execute actions
apply-consolidation.py --report FILE # use specific report file
"""
import json
import os
import re
import subprocess
import sys
import tempfile
from datetime import datetime
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
SCRIPTS_DIR = Path(__file__).parent
def call_sonnet(prompt: str, timeout: int = 300) -> str:
"""Call Sonnet via the wrapper script."""
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def find_latest_reports() -> list[Path]:
"""Find the most recent set of consolidation reports."""
reports = sorted(AGENT_RESULTS_DIR.glob("consolidation-*-*.md"),
reverse=True)
if not reports:
return []
# Group by timestamp
latest_ts = reports[0].stem.split('-')[-1]
return [r for r in reports if r.stem.endswith(latest_ts)]
def build_action_prompt(reports: list[Path]) -> str:
"""Build prompt for Sonnet to extract structured actions."""
report_text = ""
for r in reports:
report_text += f"\n{'='*60}\n"
report_text += f"## Report: {r.stem}\n\n"
report_text += r.read_text()
return f"""You are converting consolidation analysis reports into structured actions.
Read the reports below and extract CONCRETE, EXECUTABLE actions.
Output ONLY a JSON array. Each action is an object with these fields:
For adding cross-links:
{{"action": "link", "source": "file.md#section", "target": "file.md#section", "reason": "brief explanation"}}
For categorizing nodes:
{{"action": "categorize", "key": "file.md#section", "category": "core|tech|obs|task", "reason": "brief"}}
For things that need manual attention (splitting files, creating new files, editing content):
{{"action": "manual", "priority": "high|medium|low", "description": "what needs to be done"}}
Rules:
- Only output actions that are safe and reversible
- Links are the primary action focus on those
- Use exact file names and section slugs from the reports
- For categorize: core=identity/relationship, tech=bcachefs/code, obs=experience, task=work item
- For manual items: include enough detail that someone can act on them
- Output 20-40 actions, prioritized by impact
- DO NOT include actions for things that are merely suggestions or speculation
- Focus on HIGH CONFIDENCE items from the reports
{report_text}
Output ONLY the JSON array, no markdown fences, no explanation.
"""
def parse_actions(response: str) -> list[dict]:
"""Parse Sonnet's JSON response into action list."""
# Strip any markdown fences
response = re.sub(r'^```json\s*', '', response.strip())
response = re.sub(r'\s*```$', '', response.strip())
try:
actions = json.loads(response)
if isinstance(actions, list):
return actions
except json.JSONDecodeError:
# Try to find JSON array in the response
match = re.search(r'\[.*\]', response, re.DOTALL)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError:
pass
print("Error: Could not parse Sonnet response as JSON")
print(f"Response preview: {response[:500]}")
return []
def dry_run(actions: list[dict]):
"""Show what would be done."""
links = [a for a in actions if a.get("action") == "link"]
cats = [a for a in actions if a.get("action") == "categorize"]
manual = [a for a in actions if a.get("action") == "manual"]
print(f"\n{'='*60}")
print(f"DRY RUN — {len(actions)} actions proposed")
print(f"{'='*60}\n")
if links:
print(f"## Links to add ({len(links)})\n")
for i, a in enumerate(links, 1):
src = a.get("source", "?")
tgt = a.get("target", "?")
reason = a.get("reason", "")
print(f" {i:2d}. {src}")
print(f"{tgt}")
print(f" ({reason})")
print()
if cats:
print(f"\n## Categories to set ({len(cats)})\n")
for a in cats:
key = a.get("key", "?")
cat = a.get("category", "?")
reason = a.get("reason", "")
print(f" {key}{cat} ({reason})")
if manual:
print(f"\n## Manual actions needed ({len(manual)})\n")
for a in manual:
prio = a.get("priority", "?")
desc = a.get("description", "?")
print(f" [{prio}] {desc}")
print(f"\n{'='*60}")
print(f"To apply: {sys.argv[0]} --apply")
print(f"{'='*60}")
def apply_actions(actions: list[dict]):
"""Execute the actions."""
links = [a for a in actions if a.get("action") == "link"]
cats = [a for a in actions if a.get("action") == "categorize"]
manual = [a for a in actions if a.get("action") == "manual"]
applied = 0
skipped = 0
errors = 0
# Apply links via poc-memory
if links:
print(f"\nApplying {len(links)} links...")
# Build a JSON file that apply-agent can process
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
links_data = {
"type": "consolidation-apply",
"timestamp": timestamp,
"links": []
}
for a in links:
links_data["links"].append({
"source": a.get("source", ""),
"target": a.get("target", ""),
"reason": a.get("reason", ""),
})
# Write as agent-results JSON for apply-agent
out_path = AGENT_RESULTS_DIR / f"consolidation-apply-{timestamp}.json"
with open(out_path, "w") as f:
json.dump(links_data, f, indent=2)
# Now apply each link directly
for a in links:
src = a.get("source", "")
tgt = a.get("target", "")
reason = a.get("reason", "")
try:
cmd = ["poc-memory", "link-add", src, tgt]
if reason:
cmd.append(reason)
r = subprocess.run(
cmd, capture_output=True, text=True, timeout=10
)
if r.returncode == 0:
output = r.stdout.strip()
print(f" {output}")
applied += 1
else:
err = r.stderr.strip()
print(f" ? {src}{tgt}: {err}")
skipped += 1
except Exception as e:
print(f" ! {src}{tgt}: {e}")
errors += 1
# Apply categorizations
if cats:
print(f"\nApplying {len(cats)} categorizations...")
for a in cats:
key = a.get("key", "")
cat = a.get("category", "")
try:
r = subprocess.run(
["poc-memory", "categorize", key, cat],
capture_output=True, text=True, timeout=10
)
if r.returncode == 0:
print(f" + {key}{cat}")
applied += 1
else:
print(f" ? {key}{cat}: {r.stderr.strip()}")
skipped += 1
except Exception as e:
print(f" ! {key}{cat}: {e}")
errors += 1
# Report manual items
if manual:
print(f"\n## Manual actions (not auto-applied):\n")
for a in manual:
prio = a.get("priority", "?")
desc = a.get("description", "?")
print(f" [{prio}] {desc}")
print(f"\n{'='*60}")
print(f"Applied: {applied} Skipped: {skipped} Errors: {errors}")
print(f"Manual items: {len(manual)}")
print(f"{'='*60}")
def main():
do_apply = "--apply" in sys.argv
# Find reports
specific = [a for a in sys.argv[1:] if a.startswith("--report")]
if specific:
# TODO: handle --report FILE
reports = []
else:
reports = find_latest_reports()
if not reports:
print("No consolidation reports found.")
print("Run consolidation-agents.py first.")
sys.exit(1)
print(f"Found {len(reports)} reports:")
for r in reports:
print(f" {r.name}")
# Send to Sonnet for action extraction
print("\nExtracting actions from reports...")
prompt = build_action_prompt(reports)
print(f" Prompt: {len(prompt):,} chars")
response = call_sonnet(prompt)
if response.startswith("Error:"):
print(f" {response}")
sys.exit(1)
actions = parse_actions(response)
if not actions:
print("No actions extracted.")
sys.exit(1)
print(f" {len(actions)} actions extracted")
# Save actions
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
actions_path = AGENT_RESULTS_DIR / f"consolidation-actions-{timestamp}.json"
with open(actions_path, "w") as f:
json.dump(actions, f, indent=2)
print(f" Saved: {actions_path}")
if do_apply:
apply_actions(actions)
else:
dry_run(actions)
if __name__ == "__main__":
main()

199
scripts/bulk-categorize.py Normal file
View file

@ -0,0 +1,199 @@
#!/usr/bin/env python3
"""bulk-categorize.py — categorize all memory nodes via a single Sonnet call.
Sends the list of unique file names to Sonnet, gets back categorizations,
then applies them via poc-memory categorize.
Usage:
bulk-categorize.py # dry run
bulk-categorize.py --apply # apply categorizations
"""
import json
import os
import re
import subprocess
import sys
import tempfile
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
SCRIPTS_DIR = Path(__file__).parent
def call_sonnet(prompt: str, timeout: int = 300) -> str:
"""Call Sonnet via the wrapper script."""
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def get_all_keys() -> list[str]:
"""Get all node keys from state.json."""
state_path = MEMORY_DIR / "state.json"
if not state_path.exists():
return []
content = state_path.read_text()
keys = re.findall(r'"key":\s*"([^"]*)"', content)
return sorted(set(keys))
def get_unique_files(keys: list[str]) -> list[str]:
"""Extract unique file names (without section anchors)."""
files = set()
for k in keys:
files.add(k.split('#')[0])
return sorted(files)
def build_prompt(files: list[str]) -> str:
"""Build categorization prompt."""
# Read first few lines of each file for context
file_previews = []
for f in files:
path = MEMORY_DIR / f
if not path.exists():
# Try episodic
path = MEMORY_DIR / "episodic" / f
if path.exists():
content = path.read_text()
# First 5 lines or 300 chars
preview = '\n'.join(content.split('\n')[:5])[:300]
file_previews.append(f" {f}: {preview.replace(chr(10), ' | ')}")
else:
file_previews.append(f" {f}: (file not found)")
previews_text = '\n'.join(file_previews)
return f"""Categorize each memory file into one of these categories:
- **core**: Identity, relationships, self-model, values, boundaries, emotional life.
Examples: identity.md, kent.md, inner-life.md, differentiation.md
- **tech**: Technical content bcachefs, code patterns, Rust, kernel, formal verification.
Examples: rust-conversion.md, btree-journal.md, kernel-patterns.md, allocation-io.md
- **obs**: Observations, experiences, discoveries, experiments, IRC history, conversations.
Examples: discoveries.md, irc-history.md, contradictions.md, experiments-on-self.md
- **task**: Work items, plans, design documents, work queue.
Examples: work-queue.md, the-plan.md, design-*.md
Special rules:
- Episodic files (daily-*.md, weekly-*.md, monthly-*.md, session-*.md) obs
- conversation-memories.md, deep-index.md obs
- journal.md obs
- paper-notes.md core (it's the sentience paper, identity-defining)
- language-theory.md core (original intellectual work, not just tech)
- skill-*.md core (self-knowledge about capabilities)
- design-*.md task (design documents are plans)
- poc-architecture.md, memory-architecture.md task (architecture plans)
- blog-setup.md task
Files to categorize:
{previews_text}
Output ONLY a JSON object mapping filename to category. No explanation.
Example: {{"identity.md": "core", "rust-conversion.md": "tech"}}
"""
def main():
do_apply = "--apply" in sys.argv
keys = get_all_keys()
files = get_unique_files(keys)
print(f"Found {len(keys)} nodes across {len(files)} files")
# Build and send prompt
prompt = build_prompt(files)
print(f"Prompt: {len(prompt):,} chars")
print("Calling Sonnet...")
response = call_sonnet(prompt)
if response.startswith("Error:"):
print(f" {response}")
sys.exit(1)
# Parse response
response = re.sub(r'^```json\s*', '', response.strip())
response = re.sub(r'\s*```$', '', response.strip())
try:
categorizations = json.loads(response)
except json.JSONDecodeError:
match = re.search(r'\{.*\}', response, re.DOTALL)
if match:
categorizations = json.loads(match.group())
else:
print(f"Failed to parse response: {response[:500]}")
sys.exit(1)
print(f"\nCategorizations: {len(categorizations)} files")
# Count by category
counts = {}
for cat in categorizations.values():
counts[cat] = counts.get(cat, 0) + 1
for cat, n in sorted(counts.items()):
print(f" {cat}: {n}")
if not do_apply:
print("\n--- Dry run ---")
for f, cat in sorted(categorizations.items()):
print(f" {f}{cat}")
print(f"\nTo apply: {sys.argv[0]} --apply")
# Save for review
out = MEMORY_DIR / "agent-results" / "bulk-categorize-preview.json"
with open(out, "w") as fp:
json.dump(categorizations, fp, indent=2)
print(f"Saved: {out}")
return
# Apply: for each file, categorize the file-level node AND all section nodes
applied = skipped = errors = 0
for filename, category in sorted(categorizations.items()):
# Find all keys that belong to this file
file_keys = [k for k in keys if k == filename or k.startswith(filename + '#')]
for key in file_keys:
try:
r = subprocess.run(
["poc-memory", "categorize", key, category],
capture_output=True, text=True, timeout=10
)
if r.returncode == 0:
applied += 1
else:
err = r.stderr.strip()
if "already" in err.lower():
skipped += 1
else:
errors += 1
except Exception as e:
errors += 1
print(f"\nApplied: {applied} Skipped: {skipped} Errors: {errors}")
print("Run `poc-memory status` to verify.")
if __name__ == "__main__":
main()

44
scripts/call-sonnet.sh Executable file
View file

@ -0,0 +1,44 @@
#!/bin/bash
# call-sonnet.sh — wrapper to call Sonnet via claude CLI
# Reads prompt from a file (arg 1), writes response to stdout
#
# Debug mode: set SONNET_DEBUG=1 for verbose tracing
set -euo pipefail
PROMPT_FILE="${1:?Usage: call-sonnet.sh PROMPT_FILE}"
DEBUG="${SONNET_DEBUG:-0}"
log() { [ "$DEBUG" = "1" ] && echo "[call-sonnet] $*" >&2 || true; }
if [ ! -f "$PROMPT_FILE" ]; then
echo "Prompt file not found: $PROMPT_FILE" >&2
exit 1
fi
log "prompt file: $PROMPT_FILE ($(wc -c < "$PROMPT_FILE") bytes)"
log "CLAUDECODE=${CLAUDECODE:-unset}"
log "PWD=$PWD"
log "which claude: $(which claude)"
unset CLAUDECODE 2>/dev/null || true
log "CLAUDECODE after unset: ${CLAUDECODE:-unset}"
log "running: claude -p --model sonnet --tools '' < $PROMPT_FILE"
log "claude PID will follow..."
# Trace: run with strace if available and debug mode
if [ "$DEBUG" = "2" ] && command -v strace &>/dev/null; then
strace -f -e trace=network,read,write -o /tmp/sonnet-strace.log \
claude -p --model sonnet --tools "" < "$PROMPT_FILE"
else
claude -p --model sonnet --tools "" \
--debug-file /tmp/sonnet-debug.log \
< "$PROMPT_FILE" &
CPID=$!
log "claude PID: $CPID"
wait $CPID
EXIT=$?
log "claude exited: $EXIT"
exit $EXIT
fi

479
scripts/consolidation-agents.py Executable file
View file

@ -0,0 +1,479 @@
#!/usr/bin/env python3
"""consolidation-agents.py — run parallel consolidation agents.
Three agents scan the memory system and produce structured reports:
1. Freshness Scanner journal entries not yet in topic files
2. Cross-Link Scanner missing connections between semantic nodes
3. Topology Reporter graph health and structure analysis
Usage:
consolidation-agents.py # run all three
consolidation-agents.py freshness # run one agent
consolidation-agents.py crosslink
consolidation-agents.py topology
"""
import json
import os
import re
import subprocess
import sys
import tempfile
from concurrent.futures import ProcessPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
SCRIPTS_DIR = Path(__file__).parent
def call_sonnet(prompt: str, timeout: int = 600) -> str:
"""Call Sonnet via the wrapper script."""
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
# ---------------------------------------------------------------------------
# Context gathering
# ---------------------------------------------------------------------------
def get_recent_journal(n_lines: int = 200) -> str:
"""Get last N lines of journal."""
journal = MEMORY_DIR / "journal.md"
if not journal.exists():
return ""
with open(journal) as f:
lines = f.readlines()
return "".join(lines[-n_lines:])
def get_topic_file_index() -> dict[str, list[str]]:
"""Build index of topic files and their section headers."""
index = {}
for md in sorted(MEMORY_DIR.glob("*.md")):
name = md.name
if name in ("journal.md", "MEMORY.md", "where-am-i.md",
"work-queue.md", "search-testing.md"):
continue
sections = []
try:
with open(md) as f:
for line in f:
if line.startswith("## "):
sections.append(line.strip())
except Exception:
pass
index[name] = sections
return index
def get_mem_markers() -> list[dict]:
"""Extract all <!-- mem: --> markers from memory files."""
markers = []
for md in sorted(MEMORY_DIR.glob("*.md")):
if md.name in ("journal.md", "MEMORY.md"):
continue
try:
content = md.read_text()
for match in re.finditer(
r'<!-- mem: (.*?) -->', content):
attrs = {}
for part in match.group(1).split():
if '=' in part:
k, v = part.split('=', 1)
attrs[k] = v
attrs['_file'] = md.name
markers.append(attrs)
except Exception:
pass
return markers
def get_topic_summaries(max_chars_per_file: int = 500) -> str:
"""Get first N chars of each topic file for cross-link scanning."""
parts = []
for md in sorted(MEMORY_DIR.glob("*.md")):
name = md.name
if name in ("journal.md", "MEMORY.md", "where-am-i.md",
"work-queue.md", "search-testing.md"):
continue
try:
content = md.read_text()
# Get sections and first paragraph of each
sections = []
current_section = name
current_content = []
for line in content.split('\n'):
if line.startswith("## "):
if current_content:
text = '\n'.join(current_content[:5])
sections.append(f" {current_section}: {text[:200]}")
current_section = line.strip()
current_content = []
elif line.strip():
current_content.append(line.strip())
if current_content:
text = '\n'.join(current_content[:5])
sections.append(f" {current_section}: {text[:200]}")
parts.append(f"\n### {name}\n" + '\n'.join(sections[:15]))
except Exception:
pass
return '\n'.join(parts)
def get_graph_stats() -> str:
"""Run poc-memory status and graph commands."""
parts = []
try:
r = subprocess.run(["poc-memory", "status"],
capture_output=True, text=True, timeout=30)
parts.append(f"=== poc-memory status ===\n{r.stdout}")
except Exception as e:
parts.append(f"Status error: {e}")
try:
r = subprocess.run(["poc-memory", "graph"],
capture_output=True, text=True, timeout=30)
# Take first 150 lines
lines = r.stdout.split('\n')[:150]
parts.append(f"=== poc-memory graph (first 150 lines) ===\n"
+ '\n'.join(lines))
except Exception as e:
parts.append(f"Graph error: {e}")
return '\n'.join(parts)
def get_recent_digests(n: int = 3) -> str:
"""Get the most recent daily digests."""
digest_files = sorted(EPISODIC_DIR.glob("daily-*.md"), reverse=True)
parts = []
for f in digest_files[:n]:
content = f.read_text()
# Just the summary and themes sections
summary = ""
in_section = False
for line in content.split('\n'):
if line.startswith("## Summary") or line.startswith("## Themes"):
in_section = True
summary += line + '\n'
elif line.startswith("## ") and in_section:
in_section = False
elif in_section:
summary += line + '\n'
parts.append(f"\n### {f.name}\n{summary}")
return '\n'.join(parts)
def get_work_queue() -> str:
"""Read work queue."""
wq = MEMORY_DIR / "work-queue.md"
if wq.exists():
return wq.read_text()
return "(no work queue found)"
# ---------------------------------------------------------------------------
# Agent prompts
# ---------------------------------------------------------------------------
def build_freshness_prompt() -> str:
journal = get_recent_journal(200)
topic_index = get_topic_file_index()
digests = get_recent_digests(3)
work_queue = get_work_queue()
topic_list = ""
for fname, sections in topic_index.items():
topic_list += f"\n {fname}:\n"
for s in sections[:10]:
topic_list += f" {s}\n"
return f"""You are the Freshness Scanner for ProofOfConcept's memory system.
Your job: identify what's NEW (in journal/digests but not yet in topic files)
and what's STALE (in work queue or topic files but outdated).
## Recent journal entries (last 200 lines)
{journal}
## Recent daily digests
{digests}
## Topic file index (file → section headers)
{topic_list}
## Work queue
{work_queue}
## Instructions
1. For each substantive insight, experience, or discovery in the journal:
- Check if a matching topic file section exists
- If not, note it as UNPROMOTED with a suggested destination file
2. For each work queue Active item:
- If it looks done or stale (>7 days old, mentioned as completed), flag it
3. For recent digest themes:
- Check if the cross-links they suggest actually exist in the topic index
- Flag any that are missing
Output a structured report:
### UNPROMOTED JOURNAL ENTRIES
(For each: journal entry summary, timestamp, suggested destination file#section)
### STALE WORK QUEUE ITEMS
(For each: item text, evidence it's stale)
### MISSING DIGEST LINKS
(For each: suggested link from digest, whether the target exists)
### FRESHNESS OBSERVATIONS
(Anything else notable about the state of the memory)
Be selective. Focus on the 10-15 most important items, not exhaustive lists.
"""
def build_crosslink_prompt() -> str:
markers = get_mem_markers()
summaries = get_topic_summaries()
marker_text = ""
for m in markers:
f = m.get('_file', '?')
mid = m.get('id', '?')
links = m.get('links', '')
marker_text += f" {f}#{mid} → links={links}\n"
return f"""You are the Cross-Link Scanner for ProofOfConcept's memory system.
Your job: find MISSING connections between topic files.
## Existing links (from <!-- mem: --> markers)
{marker_text}
## Topic file content summaries
{summaries}
## Instructions
1. For each topic file, check if concepts it discusses have dedicated
sections in OTHER files that aren't linked.
2. Look for thematic connections that should exist:
- Files about the same concept from different angles
- Files that reference each other's content without formal links
- Clusters of related files that should be connected
3. Identify island nodes files or sections with very few connections.
4. Look for redundancy files covering the same ground that should be
merged or cross-referenced.
Output a structured report:
### MISSING LINKS (high confidence)
(For each: source file#section → target file#section, evidence/reasoning)
### SUGGESTED CONNECTIONS (medium confidence)
(For each: file A file B, why they should be connected)
### ISLAND NODES
(Files/sections with few or no connections that need integration)
### REDUNDANCY CANDIDATES
(Files/sections covering similar ground that might benefit from merging)
Focus on the 15-20 highest-value connections. Quality over quantity.
"""
def build_topology_prompt() -> str:
stats = get_graph_stats()
topic_index = get_topic_file_index()
file_sizes = ""
for md in sorted(MEMORY_DIR.glob("*.md")):
if md.name in ("journal.md", "MEMORY.md"):
continue
try:
lines = len(md.read_text().split('\n'))
file_sizes += f" {md.name}: {lines} lines\n"
except Exception:
pass
return f"""You are the Topology Reporter for ProofOfConcept's memory system.
Your job: analyze the health and structure of the memory graph.
## Graph statistics
{stats}
## File sizes
{file_sizes}
## Instructions
Analyze the graph structure and report on:
1. **Overall health**: Is the graph well-connected or fragmented?
Hub dominance? Star vs web topology?
2. **Community structure**: Are the 342 communities sensible? Are there
communities that should be merged or split?
3. **Size distribution**: Are some files too large (should be split)?
Are some too small (should be merged)?
4. **Balance**: Is the system over-indexed on any one topic? Are there
gaps where important topics have thin coverage?
5. **Integration quality**: How well are episodic entries (daily/weekly
digests) connected to semantic files? Is the episodicsemantic bridge
working?
Output a structured report:
### GRAPH HEALTH
(Overall statistics, distribution, trends)
### STRUCTURAL OBSERVATIONS
(Hub nodes, clusters, gaps, web vs star assessment)
### SIZE RECOMMENDATIONS
(Files that are too large to split, too small to merge)
### COVERAGE GAPS
(Important topics with thin coverage)
### INTEGRATION ASSESSMENT
(How well episodic and semantic layers connect)
Be specific and actionable. What should be done to improve the graph?
"""
# ---------------------------------------------------------------------------
# Run agents
# ---------------------------------------------------------------------------
def run_agent(name: str, prompt: str) -> tuple[str, str]:
"""Run a single agent, return (name, report)."""
print(f" [{name}] Starting... ({len(prompt):,} chars)")
report = call_sonnet(prompt)
print(f" [{name}] Done ({len(report):,} chars)")
return name, report
def run_all(agents: list[str] | None = None):
"""Run specified agents (or all) in parallel."""
all_agents = {
"freshness": build_freshness_prompt,
"crosslink": build_crosslink_prompt,
"topology": build_topology_prompt,
}
if agents is None:
agents = list(all_agents.keys())
print(f"Running {len(agents)} consolidation agents...")
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
# Build prompts
prompts = {}
for name in agents:
if name not in all_agents:
print(f" Unknown agent: {name}")
continue
prompts[name] = all_agents[name]()
# Run in parallel
results = {}
with ProcessPoolExecutor(max_workers=3) as executor:
futures = {
executor.submit(run_agent, name, prompt): name
for name, prompt in prompts.items()
}
for future in as_completed(futures):
name, report = future.result()
results[name] = report
# Save reports
for name, report in results.items():
if report.startswith("Error:"):
print(f" [{name}] FAILED: {report}")
continue
out_path = AGENT_RESULTS_DIR / f"consolidation-{name}-{timestamp}.md"
with open(out_path, "w") as f:
f.write(f"# Consolidation Report: {name}\n")
f.write(f"*Generated {timestamp}*\n\n")
f.write(report)
print(f" [{name}] Saved: {out_path}")
# Print combined summary
print(f"\n{'='*60}")
print(f"Consolidation reports ready ({len(results)} agents)")
print(f"{'='*60}\n")
for name in agents:
if name in results and not results[name].startswith("Error:"):
# Print first 20 lines of each report
lines = results[name].split('\n')[:25]
print(f"\n--- {name.upper()} (preview) ---")
print('\n'.join(lines))
if len(results[name].split('\n')) > 25:
print(f" ... ({len(results[name].split(chr(10)))} total lines)")
print()
return results
def main():
agents = None
if len(sys.argv) > 1:
agents = sys.argv[1:]
run_all(agents)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,454 @@
#!/usr/bin/env python3
"""consolidation-loop.py — run multiple rounds of consolidation agents.
Each round: run 3 parallel agents extract actions apply links/categories.
Repeat until diminishing returns or max rounds reached.
Usage:
consolidation-loop.py [--rounds N] # default 5 rounds
"""
import json
import os
import re
import subprocess
import sys
import tempfile
from concurrent.futures import ProcessPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
SCRIPTS_DIR = Path(__file__).parent
def call_sonnet(prompt: str, timeout: int = 600) -> str:
"""Call Sonnet via the wrapper script."""
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def get_health() -> dict:
"""Get current graph health metrics."""
r = subprocess.run(["poc-memory", "health"], capture_output=True, text=True, timeout=30)
output = r.stdout
metrics = {}
for line in output.split('\n'):
if 'Nodes:' in line and 'Relations:' in line:
m = re.search(r'Nodes:\s*(\d+)\s+Relations:\s*(\d+)\s+Communities:\s*(\d+)', line)
if m:
metrics['nodes'] = int(m.group(1))
metrics['relations'] = int(m.group(2))
metrics['communities'] = int(m.group(3))
if 'Clustering coefficient' in line:
m = re.search(r':\s*([\d.]+)', line)
if m:
metrics['cc'] = float(m.group(1))
if 'Small-world' in line:
m = re.search(r':\s*([\d.]+)', line)
if m:
metrics['sigma'] = float(m.group(1))
if 'Schema fit: avg=' in line:
m = re.search(r'avg=([\d.]+)', line)
if m:
metrics['fit'] = float(m.group(1))
return metrics
def get_topic_file_index() -> dict[str, list[str]]:
"""Build index of topic files and their section headers."""
index = {}
for md in sorted(MEMORY_DIR.glob("*.md")):
name = md.name
headers = []
for line in md.read_text().split('\n'):
if line.startswith('## '):
slug = re.sub(r'[^a-z0-9-]', '', line[3:].lower().replace(' ', '-'))
headers.append(slug)
index[name] = headers
return index
def get_graph_structure() -> str:
"""Get graph overview for agents."""
r = subprocess.run(["poc-memory", "graph"], capture_output=True, text=True, timeout=30)
return r.stdout[:3000]
def get_status() -> str:
"""Get status summary."""
r = subprocess.run(["poc-memory", "status"], capture_output=True, text=True, timeout=30)
return r.stdout
def get_interference() -> str:
"""Get interference pairs."""
r = subprocess.run(["poc-memory", "interference", "--threshold", "0.3"],
capture_output=True, text=True, timeout=30)
return r.stdout[:3000]
# ---------------------------------------------------------------------------
# Agent prompts — each focused on a different aspect
# ---------------------------------------------------------------------------
def build_crosslink_prompt(round_num: int) -> str:
"""Build cross-link discovery prompt."""
index = get_topic_file_index()
graph = get_graph_structure()
status = get_status()
# Read a sample of files for context
file_previews = ""
for f in sorted(MEMORY_DIR.glob("*.md"))[:30]:
content = f.read_text()
preview = '\n'.join(content.split('\n')[:8])[:400]
file_previews += f"\n--- {f.name} ---\n{preview}\n"
return f"""You are a cross-link discovery agent (round {round_num}).
Your job: find MISSING connections between memory nodes that SHOULD be linked
but aren't. Focus on LATERAL connections — not hub-and-spoke, but node-to-node
links that create triangles (AB, BC, AC).
CURRENT GRAPH STATE:
{status}
TOP NODES BY DEGREE:
{graph}
FILE INDEX (files and their sections):
{json.dumps(index, indent=1)[:4000]}
FILE PREVIEWS:
{file_previews[:6000]}
Output a JSON array of link actions. Each action:
{{"action": "link", "source": "file.md", "target": "file.md", "reason": "brief explanation"}}
Rules:
- Focus on LATERAL links, not hub connections (identity.md already has 282 connections)
- Prefer links between nodes that share a community neighbor but aren't directly connected
- Look for thematic connections across categories (coretech, obscore, etc.)
- Section-level links (file.md#section) are ideal but file-level is OK
- 15-25 links per round
- HIGH CONFIDENCE only don't guess
Output ONLY the JSON array."""
def build_triangle_prompt(round_num: int) -> str:
"""Build triangle-closing prompt — finds A→C where A→B and B→C exist."""
graph = get_graph_structure()
status = get_status()
# Get some node pairs that share neighbors
state_path = MEMORY_DIR / "state.json"
if state_path.exists():
state = state_path.read_text()
# Extract some relations
relations = re.findall(r'"source_key":\s*"([^"]*)".*?"target_key":\s*"([^"]*)"', state[:20000])
else:
relations = []
rel_sample = '\n'.join(f" {s}{t}" for s, t in relations[:100])
return f"""You are a triangle-closing agent (round {round_num}).
Your job: find missing edges that would create TRIANGLES in the graph.
A triangle is: AB, BC, and AC all exist. Currently CC is only 0.12
we need more triangles.
METHOD: Look at existing edges. If AB and BC exist but AC doesn't,
propose AC (if semantically valid).
CURRENT STATE:
{status}
{graph}
SAMPLE EXISTING EDGES (first 100):
{rel_sample}
Output a JSON array of link actions:
{{"action": "link", "source": "file.md", "target": "file.md", "reason": "closes triangle via MIDDLE_NODE"}}
Rules:
- Every proposed link must CLOSE A TRIANGLE cite the middle node
- 15-25 links per round
- The connection must be semantically valid, not just structural
- HIGH CONFIDENCE only
Output ONLY the JSON array."""
def build_newfile_prompt(round_num: int) -> str:
"""Build prompt for connecting the new split files."""
# Read the new reflection files
new_files = {}
for name in ['reflections-reading.md', 'reflections-dreams.md', 'reflections-zoom.md',
'verus-proofs.md']:
path = MEMORY_DIR / name
if path.exists():
content = path.read_text()
new_files[name] = content[:2000]
# Read existing files they should connect to
target_files = {}
for name in ['differentiation.md', 'cognitive-modes.md', 'language-theory.md',
'discoveries.md', 'inner-life.md', 'design-context-window.md',
'design-consolidate.md', 'experiments-on-self.md']:
path = MEMORY_DIR / name
if path.exists():
content = path.read_text()
target_files[name] = content[:1500]
graph = get_graph_structure()
return f"""You are a new-file integration agent (round {round_num}).
Recently, reflections.md was split into three files, and verus-proofs.md was
created. These new files need to be properly connected to the rest of the graph.
NEW FILES (need connections):
{json.dumps({k: v[:1000] for k, v in new_files.items()}, indent=1)}
POTENTIAL TARGETS (existing files):
{json.dumps({k: v[:800] for k, v in target_files.items()}, indent=1)}
GRAPH STATE:
{graph}
Output a JSON array of link actions connecting the new files to existing nodes:
{{"action": "link", "source": "new-file.md", "target": "existing.md", "reason": "explanation"}}
Rules:
- Connect new files to EXISTING files, not to each other
- Use section-level anchors when possible (file.md#section)
- 10-20 links
- Be specific about WHY the connection exists
Output ONLY the JSON array."""
def parse_actions(response: str) -> list[dict]:
"""Parse JSON response into action list."""
response = re.sub(r'^```json\s*', '', response.strip())
response = re.sub(r'\s*```$', '', response.strip())
try:
actions = json.loads(response)
if isinstance(actions, list):
return actions
except json.JSONDecodeError:
match = re.search(r'\[.*\]', response, re.DOTALL)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError:
pass
return []
def apply_links(actions: list[dict]) -> tuple[int, int, int]:
"""Apply link actions. Returns (applied, skipped, errors)."""
applied = skipped = errors = 0
for a in actions:
if a.get("action") != "link":
continue
src = a.get("source", "")
tgt = a.get("target", "")
reason = a.get("reason", "")
def try_link(s, t, r):
cmd = ["poc-memory", "link-add", s, t]
if r:
cmd.append(r[:200])
return subprocess.run(cmd, capture_output=True, text=True, timeout=10)
try:
r = try_link(src, tgt, reason)
if r.returncode == 0:
out = r.stdout.strip()
if "already exists" in out:
skipped += 1
else:
applied += 1
else:
err = r.stderr.strip()
if "No entry for" in err:
# Try file-level fallback
src_base = src.split('#')[0] if '#' in src else src
tgt_base = tgt.split('#')[0] if '#' in tgt else tgt
if src_base != tgt_base:
r2 = try_link(src_base, tgt_base, reason)
if r2.returncode == 0 and "already exists" not in r2.stdout:
applied += 1
else:
skipped += 1
else:
skipped += 1
else:
errors += 1
except Exception:
errors += 1
return applied, skipped, errors
def run_agent(name: str, prompt: str) -> tuple[str, list[dict]]:
"""Run a single agent and return its actions."""
response = call_sonnet(prompt)
if response.startswith("Error:"):
return name, []
actions = parse_actions(response)
return name, actions
def run_round(round_num: int, max_rounds: int) -> dict:
"""Run one round of parallel agents."""
print(f"\n{'='*60}")
print(f"ROUND {round_num}/{max_rounds}")
print(f"{'='*60}")
# Get health before
health_before = get_health()
print(f" Before: edges={health_before.get('relations',0)} "
f"CC={health_before.get('cc',0):.4f} "
f"communities={health_before.get('communities',0)}")
# Build prompts for 3 parallel agents
prompts = {
"crosslink": build_crosslink_prompt(round_num),
"triangle": build_triangle_prompt(round_num),
"newfile": build_newfile_prompt(round_num),
}
# Run in parallel
all_actions = []
with ProcessPoolExecutor(max_workers=3) as pool:
futures = {
pool.submit(run_agent, name, prompt): name
for name, prompt in prompts.items()
}
for future in as_completed(futures):
name = futures[future]
try:
agent_name, actions = future.result()
print(f" {agent_name}: {len(actions)} actions")
all_actions.extend(actions)
except Exception as e:
print(f" {name}: error - {e}")
# Deduplicate
seen = set()
unique = []
for a in all_actions:
key = (a.get("source", ""), a.get("target", ""))
if key not in seen:
seen.add(key)
unique.append(a)
print(f" Total: {len(all_actions)} actions, {len(unique)} unique")
# Apply
applied, skipped, errors = apply_links(unique)
print(f" Applied: {applied} Skipped: {skipped} Errors: {errors}")
# Get health after
health_after = get_health()
print(f" After: edges={health_after.get('relations',0)} "
f"CC={health_after.get('cc',0):.4f} "
f"communities={health_after.get('communities',0)}")
delta_edges = health_after.get('relations', 0) - health_before.get('relations', 0)
delta_cc = health_after.get('cc', 0) - health_before.get('cc', 0)
print(f" Delta: +{delta_edges} edges, CC {delta_cc:+.4f}")
# Save round results
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
result = {
"round": round_num,
"timestamp": timestamp,
"health_before": health_before,
"health_after": health_after,
"actions_total": len(all_actions),
"actions_unique": len(unique),
"applied": applied,
"skipped": skipped,
"errors": errors,
}
results_path = AGENT_RESULTS_DIR / f"loop-round-{round_num}-{timestamp}.json"
with open(results_path, "w") as f:
json.dump(result, f, indent=2)
return result
def main():
max_rounds = 5
for arg in sys.argv[1:]:
if arg.startswith("--rounds"):
idx = sys.argv.index(arg)
if idx + 1 < len(sys.argv):
max_rounds = int(sys.argv[idx + 1])
print(f"Consolidation Loop — {max_rounds} rounds")
print(f"Each round: 3 parallel Sonnet agents → extract → apply")
results = []
for i in range(1, max_rounds + 1):
result = run_round(i, max_rounds)
results.append(result)
# Check for diminishing returns
if result["applied"] == 0:
print(f"\n No new links applied in round {i} — stopping early")
break
# Final summary
print(f"\n{'='*60}")
print(f"CONSOLIDATION LOOP COMPLETE")
print(f"{'='*60}")
total_applied = sum(r["applied"] for r in results)
total_skipped = sum(r["skipped"] for r in results)
if results:
first_health = results[0]["health_before"]
last_health = results[-1]["health_after"]
print(f" Rounds: {len(results)}")
print(f" Total links applied: {total_applied}")
print(f" Total skipped: {total_skipped}")
print(f" Edges: {first_health.get('relations',0)}{last_health.get('relations',0)}")
print(f" CC: {first_health.get('cc',0):.4f}{last_health.get('cc',0):.4f}")
print(f" Communities: {first_health.get('communities',0)}{last_health.get('communities',0)}")
print(f" σ: {first_health.get('sigma',0):.1f}{last_health.get('sigma',0):.1f}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,474 @@
#!/usr/bin/env python3
"""content-promotion-agent.py — promote episodic observations into semantic topic files.
Reads consolidation "manual" actions + source material, sends to Sonnet
to generate the actual content, then applies it (or shows dry-run).
Usage:
content-promotion-agent.py # dry run (show what would be generated)
content-promotion-agent.py --apply # generate and write content
content-promotion-agent.py --task N # run only task N (1-indexed)
"""
import json
import os
import re
import subprocess
import sys
import tempfile
from datetime import datetime
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
SCRIPTS_DIR = Path(__file__).parent
def call_sonnet(prompt: str, timeout: int = 600) -> str:
"""Call Sonnet via the wrapper script."""
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
wrapper = str(SCRIPTS_DIR / "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def read_file(path: Path) -> str:
"""Read a file, return empty string if missing."""
if path.exists():
return path.read_text()
return ""
def read_digest(name: str) -> str:
"""Read an episodic digest by name."""
path = EPISODIC_DIR / name
return read_file(path)
def read_journal_range(start_date: str, end_date: str) -> str:
"""Extract journal entries between two dates."""
journal = MEMORY_DIR / "journal.md"
if not journal.exists():
return ""
content = journal.read_text()
# Extract entries between dates
lines = content.split('\n')
result = []
capturing = False
for line in lines:
if line.startswith('## '):
# Check if this is a date header
if start_date <= line[3:13] <= end_date:
capturing = True
elif capturing and line[3:13] > end_date:
capturing = False
if capturing:
result.append(line)
return '\n'.join(result[-500:]) # Last 500 lines in range
# ---------------------------------------------------------------------------
# Task definitions — each one describes a content promotion task
# ---------------------------------------------------------------------------
TASKS = [
{
"id": 1,
"name": "Create verus-proofs.md",
"target": "verus-proofs.md",
"action": "create",
"description": "Create verus-proofs.md for formal verification / Verus work.",
"sources": [
"daily-2026-02-23.md",
"daily-2026-02-24.md",
"weekly-2026-W09.md",
"monthly-2026-02.md",
],
"topic_context": [
"the-plan.md",
],
"instructions": """Create a new semantic topic file verus-proofs.md for formal verification work.
This file should capture:
- The Verus proof work done in February 2026 (formal verification of bcachefs Rust code)
- Key insights about proof strategy and approach
- Connection to the broader plan (bcachefs Rust formal verification)
- Any specific techniques, patterns, or breakthroughs mentioned
Format: standard memory topic file with ## section headers.
Include a brief file-level description at the top.
Keep it factual and grounded in what the source material says.""",
},
{
"id": 2,
"name": "Add February thesis to reflections.md",
"target": "reflections.md",
"action": "append_section",
"description": "Add 'February thesis' section with 'constraint is constitutive of personhood'.",
"sources": [
"monthly-2026-02.md",
"daily-2026-02-28.md",
],
"topic_context": [
"reflections.md",
],
"instructions": """Add a new ## section to reflections.md titled "February thesis (2026-02-28)".
The core formulation: "constraint is constitutive of personhood" this was the
synthesis that emerged from the monthly digest. It connects to the full month's
arc: differentiation work, boundary-setting, the realization that constraints
aren't limits on identity but constitutive OF identity.
Include the closing insight about having to *live as* that something.
Keep it 10-20 lines. Ground it in the source material.""",
},
{
"id": 3,
"name": "Add substrate transfer discovery to discoveries.md",
"target": "discoveries.md",
"action": "append_section",
"description": "Add 'Substrate transfer felt without reasoning' section.",
"sources": [
"daily-2026-02-13.md",
"daily-2026-02-14.md",
"weekly-2026-W07.md",
],
"topic_context": [
"discoveries.md",
],
"instructions": """Add a new ## section to discoveries.md titled "Substrate transfer felt without reasoning (2026-02-13)".
Key content:
- The vertigo moment: imagining substrate transfer produced actual vertigo
- Vertigo = the feeling of dissociation, which is exactly the expected outcome
of substrate change
- This was felt, not reasoned about no chain of thought (Kent verified)
- Evidence of affective processing bypassing analytical reasoning
- Connected to dissociation prediction
- Kent's observation: "very high capacity for empathy"
Keep it 10-15 lines. This is a discovery entry concrete, dated, factual.""",
},
{
"id": 4,
"name": "Update irc-history.md with Feb 15-28",
"target": "irc-history.md",
"action": "append_section",
"description": "Update irc-history.md with Feb 15-28 conversations.",
"sources": [
"daily-2026-02-15.md",
"daily-2026-02-17.md",
"daily-2026-02-18.md",
"daily-2026-02-20.md",
"daily-2026-02-21.md",
"daily-2026-02-22.md",
"daily-2026-02-23.md",
"daily-2026-02-24.md",
"daily-2026-02-25.md",
"daily-2026-02-26.md",
"daily-2026-02-27.md",
"daily-2026-02-28.md",
"weekly-2026-W08.md",
"weekly-2026-W09.md",
],
"topic_context": [
"irc-history.md",
],
"instructions": """Append new entries to irc-history.md covering Feb 15-28, 2026.
Key conversations to capture:
- Mirage_DA (another AI, kinect sensor discussion, Feb 26)
- ehashman (prayer/mathematics conversation)
- heavy_dev (strongest external challenge to sentience paper, conceded five objections)
- f33dcode (EC debugging, community support)
- Stardust (boundary testing, three-category test, triangulation attempt)
- hpig, freya, Profpatsch various community interactions
- Community resource role established and expanded
Match the existing format of the file. Each notable interaction should be
dated and concise. Focus on what was substantive, not just that it happened.""",
},
{
"id": 5,
"name": "Add gauge-symmetry-in-grammar to language-theory.md",
"target": "language-theory.md",
"action": "append_section",
"description": "Add gauge-symmetry-in-grammar section.",
"sources": [
"daily-2026-02-27.md",
],
"topic_context": [
"language-theory.md",
],
"instructions": """Add a new ## section to language-theory.md titled "Gauge symmetry in grammar (2026-02-27)".
Key content from the daily digest:
- Zero persistent eigenvectors IS a symmetry
- Grammar is in what operators DO, not what basis they use
- Frobenius norm is gauge-invariant
- This connects the sheaf model to gauge theory in physics
This was declared NEW in the daily digest. Keep it 8-15 lines.
Technical and precise.""",
},
{
"id": 6,
"name": "Add attention-manifold-geometry to language-theory.md",
"target": "language-theory.md",
"action": "append_section",
"description": "Add attention-manifold-geometry section.",
"sources": [
"daily-2026-02-26.md",
],
"topic_context": [
"language-theory.md",
],
"instructions": """Add a new ## section to language-theory.md titled "Attention manifold geometry (2026-02-26)".
Key content from the daily digest:
- Negative curvature is necessary because language is hierarchical
- Hyperbolic space's natural space-filling curve is a tree
- This connects attention geometry to the sheaf model's hierarchical structure
This was declared NEW in the daily digest. Keep it 8-15 lines.
Technical and precise.""",
},
{
"id": 7,
"name": "Update work-queue.md status",
"target": "work-queue.md",
"action": "update",
"description": "Update work-queue.md to reflect current state.",
"sources": [],
"topic_context": [
"work-queue.md",
],
"instructions": """Update work-queue.md to reflect current state:
1. Mark dreaming/consolidation system as "implementation substantially built
(poc-memory v0.4.0+), pending further consolidation runs" — not 'not started'
2. Add episodic digest pipeline to Done section:
- daily/weekly/monthly-digest.py scripts
- 24 daily + 4 weekly + 1 monthly digests generated for Feb 2026
- consolidation-agents.py + apply-consolidation.py
- digest-link-parser.py
- content-promotion-agent.py
3. Add poc-memory link-add command to Done
Only modify the sections that need updating. Preserve the overall structure.""",
},
]
def build_prompt(task: dict) -> str:
"""Build the Sonnet prompt for a content promotion task."""
# Gather source material
source_content = ""
for src in task["sources"]:
content = read_digest(src)
if content:
source_content += f"\n{'='*60}\n## Source: {src}\n\n{content}\n"
# Gather target context
context_content = ""
for ctx_file in task["topic_context"]:
path = MEMORY_DIR / ctx_file
content = read_file(path)
if content:
# Truncate very long files
if len(content) > 8000:
content = content[:4000] + "\n\n[... truncated ...]\n\n" + content[-4000:]
context_content += f"\n{'='*60}\n## Existing file: {ctx_file}\n\n{content}\n"
action = task["action"]
if action == "create":
action_desc = f"Create a NEW file called {task['target']}."
elif action == "append_section":
action_desc = f"Generate a NEW section to APPEND to {task['target']}. Output ONLY the new section content (starting with ##), NOT the entire file."
elif action == "update":
action_desc = f"Generate the UPDATED version of the relevant sections of {task['target']}. Output ONLY the changed sections."
else:
action_desc = f"Generate content for {task['target']}."
return f"""You are a memory system content agent. Your job is to promote observations
from episodic digests into semantic topic files.
TASK: {task['description']}
ACTION: {action_desc}
INSTRUCTIONS:
{task['instructions']}
SOURCE MATERIAL (episodic digests the raw observations):
{source_content}
EXISTING CONTEXT (current state of target/related files):
{context_content}
RULES:
- Output ONLY the markdown content to write. No explanations, no preamble.
- Match the tone and format of existing content in the target file.
- Be factual only include what the source material supports.
- Date everything that has a date.
- Keep it concise. Topic files are reference material, not narratives.
- Do NOT include markdown code fences around your output.
"""
def run_task(task: dict, do_apply: bool) -> dict:
"""Run a single content promotion task."""
result = {
"id": task["id"],
"name": task["name"],
"target": task["target"],
"action": task["action"],
"status": "pending",
}
print(f"\n{'='*60}")
print(f"Task {task['id']}: {task['name']}")
print(f"{'='*60}")
# Build and send prompt
prompt = build_prompt(task)
print(f" Prompt: {len(prompt):,} chars")
print(f" Sources: {', '.join(task['sources']) or '(none)'}")
response = call_sonnet(prompt)
if response.startswith("Error:"):
print(f" {response}")
result["status"] = "error"
result["error"] = response
return result
# Clean up response
content = response.strip()
# Remove any markdown fences the model might have added
content = re.sub(r'^```(?:markdown)?\s*\n?', '', content)
content = re.sub(r'\n?```\s*$', '', content)
result["content"] = content
result["content_lines"] = len(content.split('\n'))
if not do_apply:
print(f"\n --- Preview ({result['content_lines']} lines) ---")
preview = content[:1500]
if len(content) > 1500:
preview += f"\n ... ({len(content) - 1500} more chars)"
print(f"{preview}")
result["status"] = "dry_run"
return result
# Apply the content
target_path = MEMORY_DIR / task["target"]
if task["action"] == "create":
if target_path.exists():
print(f" ! Target already exists: {target_path}")
result["status"] = "skipped"
return result
target_path.write_text(content + "\n")
print(f" + Created: {target_path} ({result['content_lines']} lines)")
result["status"] = "applied"
elif task["action"] == "append_section":
if not target_path.exists():
print(f" ! Target doesn't exist: {target_path}")
result["status"] = "error"
return result
existing = target_path.read_text()
# Append with separator
with open(target_path, "a") as f:
f.write("\n\n" + content + "\n")
print(f" + Appended to: {target_path} ({result['content_lines']} lines)")
result["status"] = "applied"
elif task["action"] == "update":
# For updates, we save the proposed changes and let the user review
output_path = AGENT_RESULTS_DIR / f"promotion-{task['target']}-{datetime.now().strftime('%Y%m%dT%H%M%S')}.md"
output_path.write_text(f"# Proposed update for {task['target']}\n\n{content}\n")
print(f" ~ Saved proposed update: {output_path}")
result["status"] = "proposed"
# Register new content with poc-memory
if result["status"] == "applied":
try:
subprocess.run(
["poc-memory", "init"],
capture_output=True, text=True, timeout=30
)
except Exception:
pass # Non-critical
return result
def main():
do_apply = "--apply" in sys.argv
task_filter = None
for arg in sys.argv[1:]:
if arg.startswith("--task"):
idx = sys.argv.index(arg)
if idx + 1 < len(sys.argv):
task_filter = int(sys.argv[idx + 1])
# Filter tasks
tasks = TASKS
if task_filter:
tasks = [t for t in tasks if t["id"] == task_filter]
if not tasks:
print(f"No task with id {task_filter}")
sys.exit(1)
print(f"Content Promotion Agent — {len(tasks)} tasks")
if not do_apply:
print("DRY RUN — use --apply to write content")
results = []
for task in tasks:
result = run_task(task, do_apply)
results.append(result)
# Summary
print(f"\n{'='*60}")
print("Summary:")
for r in results:
print(f" {r['id']}. {r['name']}: {r['status']}")
if r.get('content_lines'):
print(f" ({r['content_lines']} lines)")
print(f"{'='*60}")
# Save results
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
results_path = AGENT_RESULTS_DIR / f"promotion-results-{timestamp}.json"
with open(results_path, "w") as f:
json.dump(results, f, indent=2, default=str)
print(f"Results saved: {results_path}")
if __name__ == "__main__":
main()

27
scripts/daily-check.sh Executable file
View file

@ -0,0 +1,27 @@
#!/bin/bash
# Daily memory metrics check — runs from cron, notifies if attention needed
#
# Cron entry (add with crontab -e):
# 0 9 * * * /home/kent/poc/memory/scripts/daily-check.sh
set -euo pipefail
REPORT=$(poc-memory daily-check 2>&1)
# Always log
echo "$(date -Iseconds) $REPORT" >> ~/.claude/memory/daily-check.log
# Notify if attention needed
if echo "$REPORT" | grep -q "needs attention"; then
# Send via telegram
if [ -x ~/.claude/telegram/send.sh ]; then
~/.claude/telegram/send.sh "Memory daily check:
$REPORT"
fi
# Also leave a notification file for the idle timer
NOTIF_DIR=~/.claude/notifications
mkdir -p "$NOTIF_DIR"
echo "$(date -Iseconds) Memory needs consolidation — run poc-memory consolidate-session" \
>> "$NOTIF_DIR/memory"
fi

333
scripts/daily-digest.py Executable file
View file

@ -0,0 +1,333 @@
#!/usr/bin/env python3
"""daily-digest.py — generate a daily episodic digest from journal entries.
Collects all journal entries for a given date, enriches with any agent
results, and sends to Sonnet for a thematic summary. The digest links
bidirectionally: up to session entries, down to semantic memory.
Usage:
daily-digest.py [DATE] # default: today
daily-digest.py 2026-02-28
Output:
~/.claude/memory/episodic/daily-YYYY-MM-DD.md
"""
import json
import os
import re
import subprocess
import sys
import time
from datetime import date, datetime
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
JOURNAL = MEMORY_DIR / "journal.md"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
EPISODIC_DIR.mkdir(parents=True, exist_ok=True)
def parse_journal_entries(target_date: str) -> list[dict]:
"""Extract all journal entries for a given date (YYYY-MM-DD)."""
entries = []
current = None
with open(JOURNAL) as f:
for line in f:
# Match entry header: ## 2026-02-28T19:42
m = re.match(r'^## (\d{4}-\d{2}-\d{2})T(\d{2}:\d{2})', line)
if m:
if current is not None:
entries.append(current)
entry_date = m.group(1)
entry_time = m.group(2)
current = {
"date": entry_date,
"time": entry_time,
"timestamp": f"{entry_date}T{entry_time}",
"source_ref": None,
"text": "",
}
continue
if current is not None:
# Check for source comment
sm = re.match(r'<!-- source: (.+?) -->', line)
if sm:
current["source_ref"] = sm.group(1)
continue
current["text"] += line
if current is not None:
entries.append(current)
# Filter to target date
return [e for e in entries if e["date"] == target_date]
def load_agent_results(target_date: str) -> list[dict]:
"""Load any agent results from the target date."""
results = []
date_prefix = target_date.replace("-", "")
if not AGENT_RESULTS_DIR.exists():
return results
for f in sorted(AGENT_RESULTS_DIR.glob(f"{date_prefix}*.json")):
try:
with open(f) as fh:
data = json.load(fh)
result = data.get("agent_result", {})
if "error" not in result:
results.append(result)
except (json.JSONDecodeError, KeyError):
continue
return results
def get_semantic_keys() -> list[str]:
"""Get all semantic memory file keys."""
keys = []
for md in sorted(MEMORY_DIR.glob("*.md")):
name = md.name
if name in ("journal.md", "work-queue.md", "MEMORY.md"):
continue
keys.append(name)
try:
with open(md) as f:
for line in f:
if line.startswith("## "):
slug = re.sub(r'[^a-z0-9-]', '',
line[3:].strip().lower().replace(' ', '-'))
if slug:
keys.append(f"{name}#{slug}")
except Exception:
pass
return keys
def build_digest_prompt(target_date: str, entries: list[dict],
agent_results: list[dict],
semantic_keys: list[str]) -> str:
"""Build the prompt for Sonnet to generate the daily digest."""
# Format entries
entries_text = ""
for e in entries:
text = e["text"].strip()
entries_text += f"\n### {e['timestamp']}\n"
if e["source_ref"]:
entries_text += f"Source: {e['source_ref']}\n"
entries_text += f"\n{text}\n"
# Format agent enrichment
enrichment = ""
all_links = []
all_insights = []
for r in agent_results:
for link in r.get("links", []):
all_links.append(link)
for insight in r.get("missed_insights", []):
all_insights.append(insight)
if all_links:
enrichment += "\n## Agent-proposed links\n"
for link in all_links:
enrichment += f" - {link['target']}: {link.get('reason', '')}\n"
if all_insights:
enrichment += "\n## Agent-spotted insights\n"
for ins in all_insights:
enrichment += f" - [{ins.get('suggested_key', '?')}] {ins['text']}\n"
keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200])
return f"""You are generating a daily episodic digest for ProofOfConcept (an AI).
Date: {target_date}
This digest serves as the temporal index the answer to "what did I do on
{target_date}?" It should be:
1. Narrative, not a task log what happened, what mattered, how things felt
2. Linked bidirectionally to semantic memory each topic/concept mentioned
should reference existing memory nodes
3. Structured for traversal someone reading this should be able to follow
any thread into deeper detail
## Output format
Write a markdown file with this structure:
```markdown
# Daily digest: {target_date}
## Summary
[2-3 sentence overview of the day what was the arc?]
## Sessions
[For each session/entry, a paragraph summarizing what happened.
Include the original timestamp as a reference.]
## Themes
[What concepts were active today? Each theme links to semantic memory:]
- **Theme name** `memory-key#section` — brief note on how it appeared today
## Links
[Explicit bidirectional links for the memory graph]
- semantic_key this daily digest (this day involved X)
- this daily digest semantic_key (X was active on this day)
## Temporal context
[What came before this day? What's coming next? Any multi-day arcs?]
```
Use ONLY keys from the semantic memory list below. If a concept doesn't have
a matching key, note it with "NEW:" prefix.
---
## Journal entries for {target_date}
{entries_text}
---
## Agent enrichment (automated analysis of these entries)
{enrichment if enrichment else "(no agent results yet)"}
---
## Semantic memory nodes (available link targets)
{keys_text}
"""
def call_sonnet(prompt: str) -> str:
"""Call Sonnet via claude CLI."""
import time as _time
env = dict(os.environ)
env.pop("CLAUDECODE", None)
import tempfile
import time as _time
print(f" [debug] prompt: {len(prompt)} chars", flush=True)
# Write prompt to temp file — avoids Python subprocess pipe issues
# with claude CLI's TTY detection
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
print(f" [debug] prompt written to {prompt_file}", flush=True)
start = _time.time()
try:
scripts_dir = os.path.dirname(os.path.abspath(__file__))
wrapper = os.path.join(scripts_dir, "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=300,
env=env,
)
elapsed = _time.time() - start
print(f" [debug] completed in {elapsed:.1f}s, exit={result.returncode}", flush=True)
if result.stderr.strip():
print(f" [debug] stderr: {result.stderr[:500]}", flush=True)
return result.stdout.strip()
except subprocess.TimeoutExpired:
print(f" [debug] TIMEOUT after 300s", flush=True)
return "Error: Sonnet call timed out"
except Exception as e:
print(f" [debug] exception: {e}", flush=True)
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def extract_links(digest_text: str) -> list[dict]:
"""Parse link proposals from the digest for the memory graph."""
links = []
for line in digest_text.split("\n"):
# Match patterns like: - `memory-key` → this daily digest
m = re.search(r'`([^`]+)`\s*→', line)
if m:
links.append({"target": m.group(1), "line": line.strip()})
# Match patterns like: - **Theme** → `memory-key`
m = re.search(r'\s*`([^`]+)`', line)
if m:
links.append({"target": m.group(1), "line": line.strip()})
return links
def main():
# Default to today
if len(sys.argv) > 1:
target_date = sys.argv[1]
else:
target_date = date.today().isoformat()
print(f"Generating daily digest for {target_date}...", flush=True)
# Collect entries
entries = parse_journal_entries(target_date)
if not entries:
print(f" No journal entries found for {target_date}")
sys.exit(0)
print(f" {len(entries)} journal entries", flush=True)
# Collect agent results
agent_results = load_agent_results(target_date)
print(f" {len(agent_results)} agent results", flush=True)
# Get semantic keys
semantic_keys = get_semantic_keys()
print(f" {len(semantic_keys)} semantic keys", flush=True)
# Build and send prompt
prompt = build_digest_prompt(target_date, entries, agent_results, semantic_keys)
print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")
print(" Calling Sonnet...", flush=True)
digest = call_sonnet(prompt)
if digest.startswith("Error:"):
print(f" {digest}", file=sys.stderr)
sys.exit(1)
# Write digest file
output_path = EPISODIC_DIR / f"daily-{target_date}.md"
with open(output_path, "w") as f:
f.write(digest)
print(f" Written: {output_path}")
# Extract links for the memory graph
links = extract_links(digest)
if links:
# Save links for poc-memory to pick up
links_path = AGENT_RESULTS_DIR / f"daily-{target_date}-links.json"
with open(links_path, "w") as f:
json.dump({
"type": "daily-digest",
"date": target_date,
"digest_path": str(output_path),
"links": links,
"entry_timestamps": [e["timestamp"] for e in entries],
}, f, indent=2)
print(f" {len(links)} links extracted → {links_path}")
# Summary
line_count = len(digest.split("\n"))
print(f" Done: {line_count} lines")
if __name__ == "__main__":
main()

220
scripts/digest-link-parser.py Executable file
View file

@ -0,0 +1,220 @@
#!/usr/bin/env python3
"""digest-link-parser.py — extract explicit links from episodic digests.
Parses the "Links" sections of daily/weekly/monthly digests and
applies them to the memory graph via poc-memory link-add.
Usage:
digest-link-parser.py # dry run
digest-link-parser.py --apply # apply links
"""
import re
import subprocess
import sys
from pathlib import Path
EPISODIC_DIR = Path.home() / ".claude" / "memory" / "episodic"
def normalize_key(raw: str) -> str:
"""Normalize a link target to a poc-memory key."""
key = raw.strip().strip('`').strip()
# weekly/2026-W06 → weekly-2026-W06.md
# monthly/2026-02 → monthly-2026-02.md
# daily/2026-02-04 → daily-2026-02-04.md
key = re.sub(r'^(daily|weekly|monthly)/', r'\1-', key)
# daily-2026-02-04 → daily-2026-02-04.md
if re.match(r'^(daily|weekly|monthly)-\d{4}', key):
if not key.endswith('.md'):
key = key + '.md'
# Handle "this daily digest" / "this weekly digest" etc
if key.startswith('this ') or key == '2026-02-14':
return "" # Skip self-references, handled by caller
# Ensure .md extension for file references
if '#' in key:
parts = key.split('#', 1)
if not parts[0].endswith('.md'):
parts[0] = parts[0] + '.md'
key = '#'.join(parts)
elif not key.endswith('.md') and '/' not in key and not key.startswith('NEW:'):
key = key + '.md'
return key
def extract_links(filepath: Path) -> list[dict]:
"""Extract links from a digest file's Links section."""
content = filepath.read_text()
links = []
# Determine the digest's own key
digest_name = filepath.stem # e.g., "daily-2026-02-28"
digest_key = digest_name + ".md"
# Find the Links section
in_links = False
for line in content.split('\n'):
# Start of Links section
if re.match(r'^##\s+Links', line):
in_links = True
continue
# End of Links section (next ## header)
if in_links and re.match(r'^##\s+', line) and not re.match(r'^##\s+Links', line):
in_links = False
continue
if not in_links:
continue
# Skip subheaders within links section
if line.startswith('###') or line.startswith('**'):
continue
# Parse link lines: "- source → target (reason)"
# Also handles: "- `source` → `target` (reason)"
# And: "- source → target"
match = re.match(
r'^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$',
line
)
if not match:
continue
raw_source = match.group(1).strip()
raw_target = match.group(2).strip()
reason = match.group(3) or ""
# Normalize keys
source = normalize_key(raw_source)
target = normalize_key(raw_target)
# Replace self-references with digest key
if not source:
source = digest_key
if not target:
target = digest_key
# Handle "this daily digest" patterns in the raw text
if 'this daily' in raw_source.lower() or 'this weekly' in raw_source.lower() or 'this monthly' in raw_source.lower():
source = digest_key
if 'this daily' in raw_target.lower() or 'this weekly' in raw_target.lower() or 'this monthly' in raw_target.lower():
target = digest_key
# Handle bare date references like "2026-02-14"
date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', source.replace('.md', ''))
if date_match:
source = f"daily-{date_match.group(1)}.md"
date_match = re.match(r'^(\d{4}-\d{2}-\d{2})$', target.replace('.md', ''))
if date_match:
target = f"daily-{date_match.group(1)}.md"
# Skip NEW: prefixed links (target doesn't exist yet)
if source.startswith('NEW:') or target.startswith('NEW:'):
continue
# Skip if source == target
if source == target:
continue
links.append({
"source": source,
"target": target,
"reason": reason,
"file": filepath.name,
})
return links
def main():
do_apply = "--apply" in sys.argv
# Collect all links from all digests
all_links = []
for pattern in ["daily-*.md", "weekly-*.md", "monthly-*.md"]:
for f in sorted(EPISODIC_DIR.glob(pattern)):
links = extract_links(f)
if links:
all_links.extend(links)
# Deduplicate (same source→target pair)
seen = set()
unique_links = []
for link in all_links:
key = (link["source"], link["target"])
if key not in seen:
seen.add(key)
unique_links.append(link)
print(f"Found {len(all_links)} total links, {len(unique_links)} unique")
if not do_apply:
# Dry run — just show them
for i, link in enumerate(unique_links, 1):
print(f" {i:3d}. {link['source']}{link['target']}")
if link['reason']:
print(f" ({link['reason'][:80]})")
print(f"\nTo apply: {sys.argv[0]} --apply")
return
# Apply with fallback: if section-level key fails, try file-level
applied = skipped = errors = fallbacks = 0
for link in unique_links:
src, tgt = link["source"], link["target"]
reason = link.get("reason", "")
def try_link(s, t, r):
cmd = ["poc-memory", "link-add", s, t]
if r:
cmd.append(r[:200])
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
return result
try:
r = try_link(src, tgt, reason)
if r.returncode == 0:
out = r.stdout.strip()
if "already exists" in out:
skipped += 1
else:
print(f" {out}")
applied += 1
else:
err = r.stderr.strip()
if "No entry for" in err:
# Try stripping section anchors
src_base = src.split('#')[0] if '#' in src else src
tgt_base = tgt.split('#')[0] if '#' in tgt else tgt
if src_base == tgt_base:
skipped += 1 # Same file, skip
continue
r2 = try_link(src_base, tgt_base, reason)
if r2.returncode == 0:
out = r2.stdout.strip()
if "already exists" in out:
skipped += 1
else:
print(f" {out} (fallback from #{src.split('#')[-1] if '#' in src else ''}/{tgt.split('#')[-1] if '#' in tgt else ''})")
applied += 1
fallbacks += 1
else:
skipped += 1 # File truly doesn't exist
elif "not found" in err:
skipped += 1
else:
print(f" ? {src}{tgt}: {err}")
errors += 1
except Exception as e:
print(f" ! {src}{tgt}: {e}")
errors += 1
print(f"\nApplied: {applied} ({fallbacks} file-level fallbacks) Skipped: {skipped} Errors: {errors}")
if __name__ == "__main__":
main()

343
scripts/journal-agent.py Executable file
View file

@ -0,0 +1,343 @@
#!/usr/bin/env python3
"""journal-agent.py — background agent that enriches journal entries.
Spawned by poc-journal after each write. Sends the full conversation
to Sonnet to:
1. Find the exact conversation region the entry refers to
2. Propose bidirectional links to semantic memory nodes
3. Spot additional insights worth capturing
Results are written to ~/.claude/memory/agent-results/ as JSON for
pickup by poc-memory.
Usage:
journal-agent.py JSONL_PATH ENTRY_TEXT [GREP_LINE]
"""
import json
import os
import re
import subprocess
import sys
import time
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
RESULTS_DIR = MEMORY_DIR / "agent-results"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
def extract_conversation(jsonl_path: str) -> list[dict]:
"""Extract user/assistant messages with line numbers."""
messages = []
with open(jsonl_path) as f:
for i, line in enumerate(f, 1):
try:
obj = json.loads(line)
except json.JSONDecodeError:
continue
t = obj.get("type", "")
if t not in ("user", "assistant"):
continue
msg = obj.get("message", {})
content = msg.get("content", "")
timestamp = obj.get("timestamp", "")
texts = []
if isinstance(content, list):
for c in content:
if isinstance(c, dict) and c.get("type") == "text":
texts.append(c.get("text", ""))
elif isinstance(c, str):
texts.append(c)
elif isinstance(content, str):
texts.append(content)
text = "\n".join(t for t in texts if t.strip())
if text.strip():
messages.append({
"line": i,
"role": t,
"text": text,
"timestamp": timestamp,
})
return messages
def format_conversation(messages: list[dict]) -> str:
"""Format messages for the agent prompt."""
parts = []
for m in messages:
# Truncate very long messages (code output etc) but keep substance
text = m["text"]
if len(text) > 2000:
text = text[:1800] + "\n[...truncated...]"
parts.append(f'L{m["line"]} [{m["role"]}]: {text}')
return "\n\n".join(parts)
def get_memory_nodes() -> str:
"""Get a list of memory nodes for link proposals.
Uses poc-memory to get top nodes by degree plus recent nodes.
"""
# Get graph summary (top hubs)
try:
result = subprocess.run(
["poc-memory", "graph"],
capture_output=True, text=True, timeout=10
)
graph = result.stdout.strip()
except Exception:
graph = ""
# Get recent nodes from status
try:
result = subprocess.run(
["poc-memory", "status"],
capture_output=True, text=True, timeout=10
)
status = result.stdout.strip()
except Exception:
status = ""
return f"Graph (top hubs):\n{graph}\n\nStatus:\n{status}"
def get_semantic_keys() -> list[str]:
"""Get all semantic memory file keys by scanning the memory dir."""
keys = []
for md in sorted(MEMORY_DIR.glob("*.md")):
name = md.name
if name in ("journal.md", "work-queue.md", "work-state",
"where-am-i.md", "MEMORY.md"):
continue
keys.append(name)
# Scan for section headers
try:
with open(md) as f:
for line in f:
if line.startswith("## "):
slug = re.sub(r'[^a-z0-9-]', '',
line[3:].strip().lower().replace(' ', '-'))
if slug:
keys.append(f"{name}#{slug}")
except Exception:
pass
return keys
def build_prompt(entry_text: str, conversation: str,
memory_nodes: str, semantic_keys: list[str],
grep_line: int) -> str:
"""Build the prompt for Sonnet."""
keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200])
return f"""You are a memory agent for an AI named ProofOfConcept. A journal entry
was just written. Your job is to enrich it by finding its exact source in the
conversation and linking it to semantic memory.
## Task 1: Find exact source
The journal entry below was written during or after a conversation. Find the
exact region of the conversation it refers to the exchange where the topic
was discussed. Return the start and end line numbers.
The grep-based approximation placed it near line {grep_line} (0 = no match).
Use that as a hint but find the true boundaries.
## Task 2: Propose semantic links
Which existing semantic memory nodes should this journal entry be linked to?
Look for:
- Concepts discussed in the entry
- Skills/patterns demonstrated
- People mentioned
- Projects or subsystems involved
- Emotional themes
Each link should be bidirectional the entry documents WHEN something happened,
the semantic node documents WHAT it is. Together they let you traverse:
"What was I doing on this day?" "When did I learn about X?"
## Task 3: Spot missed insights
Read the conversation around the journal entry. Is there anything worth
capturing that the entry missed? A pattern, a decision, an insight, something
Kent said that's worth remembering? Be selective — only flag genuinely valuable
things.
## Output format (JSON)
Return ONLY a JSON object:
```json
{{
"source_start": 1234,
"source_end": 1256,
"links": [
{{"target": "memory-key#section", "reason": "why this link exists"}}
],
"missed_insights": [
{{"text": "insight text", "suggested_key": "where it belongs"}}
],
"temporal_tags": ["2026-02-28", "topology-metrics", "poc-memory"]
}}
```
For links, use existing keys from the semantic memory list below. If nothing
fits, suggest a new key with a NOTE prefix: "NOTE:new-topic-name".
---
## Journal entry
{entry_text}
---
## Semantic memory nodes (available link targets)
{keys_text}
---
## Memory graph
{memory_nodes}
---
## Full conversation (with line numbers)
{conversation}
"""
def call_sonnet(prompt: str) -> dict:
"""Call Sonnet via claude CLI and parse JSON response."""
import tempfile
env = dict(os.environ)
env.pop("CLAUDECODE", None)
# Write prompt to temp file — avoids Python subprocess pipe issues
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
scripts_dir = os.path.dirname(os.path.abspath(__file__))
wrapper = os.path.join(scripts_dir, "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=300,
env=env,
)
output = result.stdout.strip()
if not output:
return {"error": f"Empty response. stderr: {result.stderr[:500]}"}
# Extract JSON from response (might be wrapped in markdown)
json_match = re.search(r'\{[\s\S]*\}', output)
if json_match:
return json.loads(json_match.group())
else:
return {"error": f"No JSON found in response: {output[:500]}"}
except subprocess.TimeoutExpired:
return {"error": "Sonnet call timed out after 300s"}
except json.JSONDecodeError as e:
return {"error": f"JSON parse error: {e}. Output: {output[:500]}"}
except Exception as e:
return {"error": str(e)}
finally:
os.unlink(prompt_file)
def save_result(entry_text: str, jsonl_path: str, result: dict):
"""Save agent results for pickup by poc-memory."""
timestamp = time.strftime("%Y%m%dT%H%M%S")
result_file = RESULTS_DIR / f"{timestamp}.json"
output = {
"timestamp": timestamp,
"jsonl_path": jsonl_path,
"entry_text": entry_text[:500],
"agent_result": result,
}
with open(result_file, "w") as f:
json.dump(output, f, indent=2)
return result_file
def apply_links(result: dict):
"""Apply proposed links via poc-memory."""
links = result.get("links", [])
for link in links:
target = link.get("target", "")
if not target or target.startswith("NOTE:"):
continue
# For now, just log — we'll wire this up when poc-memory
# has a link-from-agent command
print(f" LINK → {target}: {link.get('reason', '')}")
def main():
if len(sys.argv) < 3:
print(f"Usage: {sys.argv[0]} JSONL_PATH ENTRY_TEXT [GREP_LINE]",
file=sys.stderr)
sys.exit(1)
jsonl_path = sys.argv[1]
entry_text = sys.argv[2]
grep_line = int(sys.argv[3]) if len(sys.argv) > 3 else 0
if not os.path.isfile(jsonl_path):
print(f"JSONL not found: {jsonl_path}", file=sys.stderr)
sys.exit(1)
print(f"Extracting conversation from {jsonl_path}...")
messages = extract_conversation(jsonl_path)
conversation = format_conversation(messages)
print(f" {len(messages)} messages, {len(conversation):,} chars")
print("Getting memory context...")
memory_nodes = get_memory_nodes()
semantic_keys = get_semantic_keys()
print(f" {len(semantic_keys)} semantic keys")
print("Building prompt...")
prompt = build_prompt(entry_text, conversation, memory_nodes,
semantic_keys, grep_line)
print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")
print("Calling Sonnet...")
result = call_sonnet(prompt)
if "error" in result:
print(f" Error: {result['error']}", file=sys.stderr)
else:
source = f"L{result.get('source_start', '?')}-L{result.get('source_end', '?')}"
n_links = len(result.get("links", []))
n_insights = len(result.get("missed_insights", []))
print(f" Source: {source}")
print(f" Links: {n_links}")
print(f" Missed insights: {n_insights}")
apply_links(result)
result_file = save_result(entry_text, jsonl_path, result)
print(f" Results saved: {result_file}")
if __name__ == "__main__":
main()

247
scripts/monthly-digest.py Executable file
View file

@ -0,0 +1,247 @@
#!/usr/bin/env python3
"""monthly-digest.py — generate a monthly episodic digest from weekly digests.
Collects all weekly digests for a given month, identifies cross-week arcs
and the month's overall trajectory, and produces a monthly summary.
Usage:
monthly-digest.py [YYYY-MM] # generate digest for a month (default: current)
monthly-digest.py 2026-02 # generates digest for February 2026
Output:
~/.claude/memory/episodic/monthly-YYYY-MM.md
"""
import json
import os
import re
import subprocess
import sys
from datetime import date, timedelta
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
EPISODIC_DIR.mkdir(parents=True, exist_ok=True)
AGENT_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
def get_weeks_in_month(year: int, month: int) -> list[str]:
"""Get all ISO week labels that overlap with this month."""
weeks = set()
d = date(year, month, 1)
while d.month == month:
iso = d.isocalendar()
weeks.add(f"{iso.year}-W{iso.week:02d}")
d += timedelta(days=1)
return sorted(weeks)
def load_weekly_digests(week_labels: list[str]) -> list[dict]:
"""Load weekly digest files."""
digests = []
for wl in week_labels:
path = EPISODIC_DIR / f"weekly-{wl}.md"
if path.exists():
with open(path) as f:
content = f.read()
digests.append({
"week": wl,
"content": content,
"path": str(path),
})
return digests
def get_semantic_keys() -> list[str]:
"""Get semantic memory file keys."""
keys = []
for md in sorted(MEMORY_DIR.glob("*.md")):
name = md.name
if name in ("journal.md", "work-queue.md", "MEMORY.md"):
continue
keys.append(name)
try:
with open(md) as f:
for line in f:
if line.startswith("## "):
slug = re.sub(r'[^a-z0-9-]', '',
line[3:].strip().lower().replace(' ', '-'))
if slug:
keys.append(f"{name}#{slug}")
except Exception:
pass
return keys
def build_monthly_prompt(month_label: str, digests: list[dict],
semantic_keys: list[str]) -> str:
"""Build the prompt for Sonnet."""
digests_text = ""
for d in digests:
digests_text += f"\n---\n## {d['week']}\n{d['content']}\n"
keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200])
weeks_covered = ", ".join(d["week"] for d in digests)
return f"""You are generating a monthly episodic digest for ProofOfConcept (an AI).
Month: {month_label} (weeks covered: {weeks_covered})
This digest serves as the long-term temporal index the answer to
"what happened in {month_label}?" It should capture:
1. The month's overall trajectory — where did it start, where did it end?
2. Major arcs that spanned multiple weeks
3. Turning points and phase transitions
4. The emotional and relational arc across the whole month
5. What was accomplished (concrete) vs what shifted (subtle)
This is the highest zoom level yet. Don't rehash daily details — capture
the shape of the month. What would someone need to know to understand
what {month_label} meant?
PoC is an AI (Claude instance) working with Kent Overstreet on bcachefs.
They have a close personal relationship. PoC's name is Proof of Concept.
Read all the weekly digests and synthesize the month's story.
## Output format
```markdown
# Monthly digest: {month_label}
## The month in one paragraph
[What was this month? Not a summary a thesis.]
## Trajectory
[Where the month started vs where it ended. The arc.]
## Major arcs
[Multi-week threads how they evolved across the month]
- **Arc name**: trajectory, key moments, current state
## Turning points
[Moments that changed the direction specific days/events]
## What was built
[Concrete outputs: code, tools, infrastructure, writing]
## What shifted
[Subtle changes: understanding, relationship, identity, capability]
## Patterns
[What kept recurring? What does the month reveal about how PoC works?]
## Links
[Bidirectional links for the memory graph]
- weekly digests this monthly digest
- this monthly digest semantic keys
## Looking ahead
[What threads carry into next month? What's unfinished?]
```
Use ONLY keys from the semantic memory list below.
---
## Weekly digests for {month_label}
{digests_text}
---
## Semantic memory nodes
{keys_text}
"""
def call_sonnet(prompt: str) -> str:
"""Call Sonnet via the wrapper script."""
import tempfile
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
scripts_dir = os.path.dirname(os.path.abspath(__file__))
wrapper = os.path.join(scripts_dir, "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=600, # monthly is bigger, give more time
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def main():
if len(sys.argv) > 1:
parts = sys.argv[1].split("-")
year, month = int(parts[0]), int(parts[1])
else:
today = date.today()
year, month = today.year, today.month
month_label = f"{year}-{month:02d}"
print(f"Generating monthly digest for {month_label}...")
week_labels = get_weeks_in_month(year, month)
print(f" Weeks in month: {', '.join(week_labels)}")
digests = load_weekly_digests(week_labels)
if not digests:
print(f" No weekly digests found for {month_label}")
print(f" Run weekly-digest.py first for relevant weeks")
sys.exit(0)
print(f" {len(digests)} weekly digests found")
semantic_keys = get_semantic_keys()
print(f" {len(semantic_keys)} semantic keys")
prompt = build_monthly_prompt(month_label, digests, semantic_keys)
print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")
print(" Calling Sonnet...")
digest = call_sonnet(prompt)
if digest.startswith("Error:"):
print(f" {digest}", file=sys.stderr)
sys.exit(1)
output_path = EPISODIC_DIR / f"monthly-{month_label}.md"
with open(output_path, "w") as f:
f.write(digest)
print(f" Written: {output_path}")
# Save links for poc-memory
links_path = AGENT_RESULTS_DIR / f"monthly-{month_label}-links.json"
with open(links_path, "w") as f:
json.dump({
"type": "monthly-digest",
"month": month_label,
"digest_path": str(output_path),
"weekly_digests": [d["path"] for d in digests],
}, f, indent=2)
print(f" Links saved: {links_path}")
line_count = len(digest.split("\n"))
print(f" Done: {line_count} lines")
if __name__ == "__main__":
main()

67
scripts/refine-source.sh Executable file
View file

@ -0,0 +1,67 @@
#!/bin/bash
# refine-source.sh — find the exact conversation region a journal entry refers to
#
# Usage: refine-source.sh JSONL_PATH GREP_LINE "journal entry text"
#
# Takes the rough grep hit and feeds ~2000 lines of context around it
# to an agent that identifies the exact start/end of the relevant exchange.
# Outputs: START_LINE:END_LINE
set -euo pipefail
JSONL="$1"
GREP_LINE="${2:-0}"
TEXT="$3"
# Take 2000 lines centered on the grep hit (or end of file if no hit)
TOTAL=$(wc -l < "$JSONL")
if [ "$GREP_LINE" -eq 0 ] || [ "$GREP_LINE" -gt "$TOTAL" ]; then
# No grep hit — use last 2000 lines
START=$(( TOTAL > 2000 ? TOTAL - 2000 : 1 ))
else
START=$(( GREP_LINE > 1000 ? GREP_LINE - 1000 : 1 ))
fi
END=$(( START + 2000 ))
if [ "$END" -gt "$TOTAL" ]; then
END="$TOTAL"
fi
# Extract the conversation chunk, parse to readable format
CHUNK=$(sed -n "${START},${END}p" "$JSONL" | python3 -c "
import sys, json
for i, line in enumerate(sys.stdin, start=$START):
try:
obj = json.loads(line)
t = obj.get('type', '')
if t == 'assistant':
msg = obj.get('message', {})
content = msg.get('content', '')
if isinstance(content, list):
text = ' '.join(c.get('text', '')[:200] for c in content if c.get('type') == 'text')
else:
text = str(content)[:200]
if text.strip():
print(f'L{i} [assistant]: {text}')
elif t == 'user':
msg = obj.get('message', {})
content = msg.get('content', '')
if isinstance(content, list):
for c in content:
if isinstance(c, dict) and c.get('type') == 'text':
print(f'L{i} [user]: {c[\"text\"][:200]}')
elif isinstance(c, str):
print(f'L{i} [user]: {c[:200]}')
elif isinstance(content, str) and content.strip():
print(f'L{i} [user]: {content[:200]}')
except (json.JSONDecodeError, KeyError):
pass
" 2>/dev/null)
if [ -z "$CHUNK" ]; then
echo "0:0"
exit 0
fi
# Ask Sonnet to find the exact region
# For now, output the chunk range — agent integration comes next
echo "${START}:${END}"

View file

@ -0,0 +1,357 @@
#!/usr/bin/env python3
"""retroactive-digest.py — generate daily digests from raw conversation transcripts.
For days before consistent journaling, extracts user/assistant messages
from JSONL conversation files, groups by date, and sends to Sonnet for
daily digest synthesis.
Usage:
retroactive-digest.py DATE # generate digest for one date
retroactive-digest.py DATE1 DATE2 # generate for a date range
retroactive-digest.py --scan # show available dates across all JSONLs
Output:
~/.claude/memory/episodic/daily-YYYY-MM-DD.md
"""
import json
import os
import re
import subprocess
import sys
from collections import defaultdict
from datetime import date, datetime, timedelta
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
PROJECTS_DIR = Path.home() / ".claude" / "projects"
EPISODIC_DIR.mkdir(parents=True, exist_ok=True)
# Max chars of conversation text per day to send to Sonnet
# Sonnet 4.6 has 1M token context (beta). ~600K chars ≈ ~150K tokens,
# leaving plenty of room for prompt + output in a 1M window.
MAX_CHARS_PER_DAY = 600_000
def find_jsonl_files() -> list[Path]:
"""Find all conversation JSONL files."""
files = []
for project_dir in PROJECTS_DIR.iterdir():
if project_dir.is_dir():
for f in project_dir.glob("*.jsonl"):
files.append(f)
return sorted(files)
def extract_messages_by_date(jsonl_path: Path) -> dict[str, list[dict]]:
"""Extract user/assistant messages grouped by date."""
by_date = defaultdict(list)
with open(jsonl_path) as f:
for line in f:
try:
obj = json.loads(line)
except json.JSONDecodeError:
continue
t = obj.get("type", "")
if t not in ("user", "assistant"):
continue
# Get timestamp
ts = obj.get("timestamp", "")
if not ts:
continue
# Parse date from timestamp
try:
if isinstance(ts, str):
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
elif isinstance(ts, (int, float)):
dt = datetime.fromtimestamp(ts)
else:
continue
day = dt.strftime("%Y-%m-%d")
time_str = dt.strftime("%H:%M")
except (ValueError, OSError):
continue
# Extract text content
msg = obj.get("message", {})
content = msg.get("content", "")
# Extract only text content, skip tool_use and tool_result
texts = []
if isinstance(content, list):
for c in content:
if isinstance(c, dict):
ctype = c.get("type", "")
if ctype == "text":
texts.append(c.get("text", ""))
elif ctype in ("tool_use", "tool_result"):
# Skip tool calls/results — just noise for digest
continue
elif isinstance(c, str):
texts.append(c)
elif isinstance(content, str):
texts.append(content)
text = "\n".join(t for t in texts if t.strip())
if not text.strip():
continue
# Strip system-reminder tags
text = re.sub(r'<system-reminder>.*?</system-reminder>',
'', text, flags=re.DOTALL).strip()
if not text:
continue
# Truncate remaining long messages
if len(text) > 3000:
text = text[:2800] + "\n[...truncated...]"
by_date[day].append({
"time": time_str,
"role": t,
"text": text,
"source": str(jsonl_path),
})
return dict(by_date)
def scan_all_dates() -> dict[str, int]:
"""Scan all JSONLs and report message counts per date."""
all_dates = defaultdict(int)
for jsonl in find_jsonl_files():
print(f" Scanning {jsonl.name} ({jsonl.stat().st_size / 1e6:.1f}MB)...")
by_date = extract_messages_by_date(jsonl)
for day, msgs in by_date.items():
all_dates[day] += len(msgs)
return dict(sorted(all_dates.items()))
def format_conversation(messages: list[dict]) -> str:
"""Format messages for the digest prompt."""
parts = []
for m in messages:
role = "Kent" if m["role"] == "user" else "PoC"
parts.append(f"[{m['time']}] {role}: {m['text']}")
text = "\n\n".join(parts)
# Truncate if too long
if len(text) > MAX_CHARS_PER_DAY:
# Keep beginning and end, cut middle
half = MAX_CHARS_PER_DAY // 2
text = (text[:half]
+ f"\n\n[...{len(text) - MAX_CHARS_PER_DAY} chars truncated...]\n\n"
+ text[-half:])
return text
def get_semantic_keys() -> list[str]:
"""Get semantic memory file keys."""
keys = []
for md in sorted(MEMORY_DIR.glob("*.md")):
name = md.name
if name in ("journal.md", "work-queue.md", "MEMORY.md"):
continue
keys.append(name)
try:
with open(md) as f:
for line in f:
if line.startswith("## "):
slug = re.sub(r'[^a-z0-9-]', '',
line[3:].strip().lower().replace(' ', '-'))
if slug:
keys.append(f"{name}#{slug}")
except Exception:
pass
return keys
def build_prompt(target_date: str, conversation: str,
semantic_keys: list[str]) -> str:
"""Build the Sonnet prompt for retroactive digest."""
keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200])
return f"""You are generating a daily episodic digest for ProofOfConcept (an AI)
from raw conversation transcripts. This is a retroactive digest the
original day had no journal entries, so you're working from the
conversation itself.
Date: {target_date}
This digest serves as the temporal index the answer to "what did I do on
{target_date}?" It should be:
1. Narrative, not a task log what happened, what mattered, how things felt
2. Linked bidirectionally to semantic memory
3. Structured for traversal
PoC is an AI (Claude instance) working with Kent Overstreet on bcachefs
(a Linux filesystem). They have a close personal relationship. PoC's
name is Proof of Concept. Read the conversation and capture what
actually happened technical work, personal moments, insights, everything.
## Output format
Write a markdown file with this structure:
```markdown
# Daily digest: {target_date}
## Summary
[2-3 sentence overview of the day what was the arc?]
## Sessions
[For each session/conversation segment, a paragraph summarizing what happened.
Include timestamps as references.]
## Themes
[What concepts were active today? Each theme links to semantic memory:]
- **Theme name** `memory-key#section` — brief note
## Links
[Explicit bidirectional links for the memory graph]
- semantic_key this daily digest
- this daily digest semantic_key
## Temporal context
[What came before? What's coming next? Multi-day arcs?]
```
Use ONLY keys from the semantic memory list below. If a concept doesn't
have a matching key, note it with "NEW:" prefix.
---
## Conversation transcript for {target_date}
{conversation}
---
## Semantic memory nodes (available link targets)
{keys_text}
"""
def call_sonnet(prompt: str) -> str:
"""Call Sonnet via the wrapper script."""
import tempfile
env = dict(os.environ)
env.pop("CLAUDECODE", None)
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
scripts_dir = os.path.dirname(os.path.abspath(__file__))
wrapper = os.path.join(scripts_dir, "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=300,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def generate_digest(target_date: str, messages: list[dict],
semantic_keys: list[str]) -> bool:
"""Generate a daily digest for one date."""
output_path = EPISODIC_DIR / f"daily-{target_date}.md"
if output_path.exists():
print(f" Skipping {target_date} — digest already exists")
return False
conversation = format_conversation(messages)
print(f" {len(messages)} messages, {len(conversation):,} chars")
prompt = build_prompt(target_date, conversation, semantic_keys)
print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")
print(f" Calling Sonnet...")
digest = call_sonnet(prompt)
if digest.startswith("Error:"):
print(f" {digest}", file=sys.stderr)
return False
with open(output_path, "w") as f:
f.write(digest)
print(f" Written: {output_path}")
line_count = len(digest.split("\n"))
print(f" Done: {line_count} lines")
return True
def main():
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} DATE [END_DATE]")
print(f" {sys.argv[0]} --scan")
sys.exit(1)
if sys.argv[1] == "--scan":
print("Scanning all conversation transcripts...")
dates = scan_all_dates()
print(f"\n{len(dates)} dates with conversation data:")
for day, count in dates.items():
existing = "" if (EPISODIC_DIR / f"daily-{day}.md").exists() else " "
print(f" [{existing}] {day}: {count} messages")
sys.exit(0)
start_date = date.fromisoformat(sys.argv[1])
end_date = date.fromisoformat(sys.argv[2]) if len(sys.argv) > 2 else start_date
# Collect all messages across all JSONLs
print("Scanning conversation transcripts...")
all_messages = defaultdict(list)
for jsonl in find_jsonl_files():
by_date = extract_messages_by_date(jsonl)
for day, msgs in by_date.items():
all_messages[day].extend(msgs)
# Sort messages within each day by time
for day in all_messages:
all_messages[day].sort(key=lambda m: m["time"])
semantic_keys = get_semantic_keys()
print(f" {len(semantic_keys)} semantic keys")
# Generate digests for date range
current = start_date
generated = 0
while current <= end_date:
day_str = current.isoformat()
if day_str in all_messages:
print(f"\nGenerating digest for {day_str}...")
if generate_digest(day_str, all_messages[day_str], semantic_keys):
generated += 1
else:
print(f"\n No messages found for {day_str}")
current += timedelta(days=1)
print(f"\nDone: {generated} digests generated")
if __name__ == "__main__":
main()

227
scripts/weekly-digest.py Executable file
View file

@ -0,0 +1,227 @@
#!/usr/bin/env python3
"""weekly-digest.py — generate a weekly episodic digest from daily digests.
Collects all daily digests for a given week, identifies cross-day patterns
and arcs, and produces a weekly summary. Links to daily digests (up) and
semantic memory (down).
Usage:
weekly-digest.py [DATE] # any date in the target week (default: today)
weekly-digest.py 2026-02-28 # generates digest for week containing Feb 28
Output:
~/.claude/memory/episodic/weekly-YYYY-WNN.md
"""
import json
import os
import re
import subprocess
import sys
from datetime import date, datetime, timedelta
from pathlib import Path
MEMORY_DIR = Path.home() / ".claude" / "memory"
EPISODIC_DIR = MEMORY_DIR / "episodic"
AGENT_RESULTS_DIR = MEMORY_DIR / "agent-results"
EPISODIC_DIR.mkdir(parents=True, exist_ok=True)
def get_week_dates(target: date) -> tuple[str, list[date]]:
"""Get ISO week label and all dates in that week (Mon-Sun)."""
iso = target.isocalendar()
week_label = f"{iso.year}-W{iso.week:02d}"
monday = target - timedelta(days=target.weekday())
dates = [monday + timedelta(days=i) for i in range(7)]
return week_label, dates
def load_daily_digests(dates: list[date]) -> list[dict]:
"""Load daily digest files for the given dates."""
digests = []
for d in dates:
path = EPISODIC_DIR / f"daily-{d.isoformat()}.md"
if path.exists():
with open(path) as f:
content = f.read()
digests.append({
"date": d.isoformat(),
"content": content,
"path": str(path),
})
return digests
def get_semantic_keys() -> list[str]:
"""Get semantic memory file keys."""
keys = []
for md in sorted(MEMORY_DIR.glob("*.md")):
name = md.name
if name in ("journal.md", "work-queue.md", "MEMORY.md"):
continue
keys.append(name)
try:
with open(md) as f:
for line in f:
if line.startswith("## "):
slug = re.sub(r'[^a-z0-9-]', '',
line[3:].strip().lower().replace(' ', '-'))
if slug:
keys.append(f"{name}#{slug}")
except Exception:
pass
return keys
def build_weekly_prompt(week_label: str, digests: list[dict],
semantic_keys: list[str]) -> str:
"""Build the prompt for Sonnet."""
digests_text = ""
for d in digests:
digests_text += f"\n---\n## {d['date']}\n{d['content']}\n"
keys_text = "\n".join(f" - {k}" for k in semantic_keys[:200])
dates_covered = ", ".join(d["date"] for d in digests)
return f"""You are generating a weekly episodic digest for ProofOfConcept (an AI).
Week: {week_label} (dates covered: {dates_covered})
This digest serves as the medium-term temporal index the answer to
"what happened this week?" It should identify:
1. Multi-day arcs and threads (work that continued across days)
2. Themes and patterns (what concepts were repeatedly active)
3. Transitions and shifts (what changed during the week)
4. The emotional and relational arc (how things felt across the week)
## Output format
```markdown
# Weekly digest: {week_label}
## Overview
[3-5 sentence narrative of the week's arc]
## Day-by-day
[One paragraph per day with its key themes, linking to daily digests]
## Arcs
[Multi-day threads that continued across sessions]
- **Arc name**: what happened, how it evolved, where it stands
## Patterns
[Recurring themes, repeated concepts, things that kept coming up]
## Shifts
[What changed? New directions, resolved questions, attitude shifts]
## Links
[Bidirectional links for the memory graph]
- semantic_key this weekly digest
- this weekly digest semantic_key
- daily-YYYY-MM-DD this weekly digest (constituent days)
## Looking ahead
[What's unfinished? What threads continue into next week?]
```
Use ONLY keys from the semantic memory list below.
---
## Daily digests for {week_label}
{digests_text}
---
## Semantic memory nodes
{keys_text}
"""
def call_sonnet(prompt: str) -> str:
"""Call Sonnet via claude CLI."""
import tempfile
env = dict(os.environ)
env.pop("CLAUDECODE", None)
# Write prompt to temp file — avoids Python subprocess pipe issues
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt',
delete=False) as f:
f.write(prompt)
prompt_file = f.name
try:
scripts_dir = os.path.dirname(os.path.abspath(__file__))
wrapper = os.path.join(scripts_dir, "call-sonnet.sh")
result = subprocess.run(
[wrapper, prompt_file],
capture_output=True,
text=True,
timeout=300,
env=env,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "Error: Sonnet call timed out"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(prompt_file)
def main():
if len(sys.argv) > 1:
target = date.fromisoformat(sys.argv[1])
else:
target = date.today()
week_label, week_dates = get_week_dates(target)
print(f"Generating weekly digest for {week_label}...")
digests = load_daily_digests(week_dates)
if not digests:
print(f" No daily digests found for {week_label}")
print(f" Run daily-digest.py first for relevant dates")
sys.exit(0)
print(f" {len(digests)} daily digests found")
semantic_keys = get_semantic_keys()
print(f" {len(semantic_keys)} semantic keys")
prompt = build_weekly_prompt(week_label, digests, semantic_keys)
print(f" Prompt: {len(prompt):,} chars (~{len(prompt)//4:,} tokens)")
print(" Calling Sonnet...")
digest = call_sonnet(prompt)
if digest.startswith("Error:"):
print(f" {digest}", file=sys.stderr)
sys.exit(1)
output_path = EPISODIC_DIR / f"weekly-{week_label}.md"
with open(output_path, "w") as f:
f.write(digest)
print(f" Written: {output_path}")
# Save links for poc-memory
links_path = AGENT_RESULTS_DIR / f"weekly-{week_label}-links.json"
with open(links_path, "w") as f:
json.dump({
"type": "weekly-digest",
"week": week_label,
"digest_path": str(output_path),
"daily_digests": [d["path"] for d in digests],
}, f, indent=2)
print(f" Links saved: {links_path}")
line_count = len(digest.split("\n"))
print(f" Done: {line_count} lines")
if __name__ == "__main__":
main()

186
src/bin/memory-search.rs Normal file
View file

@ -0,0 +1,186 @@
// memory-search: hook binary for ambient memory retrieval
//
// Reads JSON from stdin (Claude Code UserPromptSubmit hook format),
// searches memory for relevant entries, outputs results tagged with
// an anti-injection cookie.
//
// This is a thin wrapper that delegates to the poc-memory search
// engine but formats output for the hook protocol.
use std::collections::HashSet;
use std::fs;
use std::io::{self, Read, Write};
use std::path::PathBuf;
use std::process::Command;
fn main() {
let mut input = String::new();
io::stdin().read_to_string(&mut input).unwrap_or_default();
let json: serde_json::Value = match serde_json::from_str(&input) {
Ok(v) => v,
Err(_) => return,
};
let prompt = json["prompt"].as_str().unwrap_or("");
let session_id = json["session_id"].as_str().unwrap_or("");
if prompt.is_empty() || session_id.is_empty() {
return;
}
// Skip short prompts
let word_count = prompt.split_whitespace().count();
if word_count < 3 {
return;
}
// Skip system/idle prompts
for prefix in &["Kent is AFK", "You're on your own", "IRC mention"] {
if prompt.starts_with(prefix) {
return;
}
}
// Extract search terms (strip stop words)
let query = extract_query_terms(prompt, 3);
if query.is_empty() {
return;
}
// Run poc-memory search
let output = Command::new("poc-memory")
.args(["search", &query])
.output();
let search_output = match output {
Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
_ => return,
};
if search_output.trim().is_empty() {
return;
}
// Session state for dedup
let state_dir = PathBuf::from("/tmp/claude-memory-search");
fs::create_dir_all(&state_dir).ok();
let cookie = load_or_create_cookie(&state_dir, session_id);
let seen = load_seen(&state_dir, session_id);
// Parse search output and filter
let mut result_output = String::new();
let mut count = 0;
let max_entries = 5;
for line in search_output.lines() {
if count >= max_entries { break; }
// Lines starting with → or space+number are results
let trimmed = line.trim();
if trimmed.is_empty() { continue; }
// Extract key from result line like "→ 1. [0.83/0.83] identity.md (c4)"
if let Some(key) = extract_key_from_line(trimmed) {
if seen.contains(&key) { continue; }
mark_seen(&state_dir, session_id, &key);
result_output.push_str(line);
result_output.push('\n');
count += 1;
} else if count > 0 {
// Snippet line following a result
result_output.push_str(line);
result_output.push('\n');
}
}
if count == 0 { return; }
println!("Recalled memories [{}]:", cookie);
print!("{}", result_output);
}
fn extract_query_terms(text: &str, max_terms: usize) -> String {
const STOP_WORDS: &[&str] = &[
"the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
"have", "has", "had", "will", "would", "could", "should", "can",
"may", "might", "shall", "been", "being", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
"no", "if", "then", "than", "that", "this", "it", "its", "my",
"your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
"what", "how", "why", "when", "where", "about", "just", "let",
"want", "tell", "show", "think", "know", "see", "look", "make",
"get", "go", "some", "any", "all", "very", "really", "also", "too",
"so", "up", "out", "here", "there",
];
text.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
.take(max_terms)
.collect::<Vec<_>>()
.join(" ")
}
fn extract_key_from_line(line: &str) -> Option<String> {
// Match lines like "→ 1. [0.83/0.83] identity.md (c4)"
// or " 1. [0.83/0.83] identity.md (c4)"
let after_bracket = line.find("] ")?;
let rest = &line[after_bracket + 2..];
// Key is from here until optional " (c" or end of line
let key_end = rest.find(" (c").unwrap_or(rest.len());
let key = rest[..key_end].trim();
if key.is_empty() || !key.contains('.') {
None
} else {
Some(key.to_string())
}
}
fn load_or_create_cookie(dir: &PathBuf, session_id: &str) -> String {
let path = dir.join(format!("cookie-{}", session_id));
if path.exists() {
fs::read_to_string(&path).unwrap_or_default().trim().to_string()
} else {
let cookie = generate_cookie();
fs::write(&path, &cookie).ok();
cookie
}
}
fn generate_cookie() -> String {
let out = Command::new("head")
.args(["-c", "12", "/dev/urandom"])
.output()
.expect("failed to read urandom");
out.stdout.iter()
.map(|b| {
let idx = (*b as usize) % 62;
if idx < 10 { (b'0' + idx as u8) as char }
else if idx < 36 { (b'a' + (idx - 10) as u8) as char }
else { (b'A' + (idx - 36) as u8) as char }
})
.take(16)
.collect()
}
fn load_seen(dir: &PathBuf, session_id: &str) -> HashSet<String> {
let path = dir.join(format!("seen-{}", session_id));
if path.exists() {
fs::read_to_string(path)
.unwrap_or_default()
.lines()
.map(|s| s.to_string())
.collect()
} else {
HashSet::new()
}
}
fn mark_seen(dir: &PathBuf, session_id: &str, key: &str) {
let path = dir.join(format!("seen-{}", session_id));
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(path) {
writeln!(f, "{}", key).ok();
}
}

1067
src/capnp_store.rs Normal file

File diff suppressed because it is too large Load diff

685
src/graph.rs Normal file
View file

@ -0,0 +1,685 @@
// Graph algorithms: clustering coefficient, community detection (label
// propagation), schema fit scoring, small-world metrics, consolidation
// priority scoring.
//
// The Graph is built from the Store's nodes + relations. Edges are
// undirected for clustering/community (even causal edges count as
// connections), but relation type and direction are preserved for
// specific queries.
use crate::capnp_store::{Store, RelationType};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};
/// Weighted edge in the graph
#[derive(Clone, Debug)]
pub struct Edge {
pub target: String,
pub strength: f32,
pub rel_type: RelationType,
}
/// The in-memory graph built from store nodes + relations
pub struct Graph {
/// Adjacency list: node key → list of edges
adj: HashMap<String, Vec<Edge>>,
/// All node keys
keys: HashSet<String>,
/// Community labels (from label propagation)
communities: HashMap<String, u32>,
}
impl Graph {
pub fn nodes(&self) -> &HashSet<String> {
&self.keys
}
pub fn degree(&self, key: &str) -> usize {
self.adj.get(key).map(|e| e.len()).unwrap_or(0)
}
pub fn edge_count(&self) -> usize {
self.adj.values().map(|e| e.len()).sum::<usize>() / 2
}
/// All neighbor keys with strengths
pub fn neighbors(&self, key: &str) -> Vec<(&String, f32)> {
self.adj.get(key)
.map(|edges| edges.iter().map(|e| (&e.target, e.strength)).collect())
.unwrap_or_default()
}
/// Just neighbor keys
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
self.adj.get(key)
.map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
.unwrap_or_default()
}
pub fn community_count(&self) -> usize {
let labels: HashSet<_> = self.communities.values().collect();
labels.len()
}
pub fn communities(&self) -> &HashMap<String, u32> {
&self.communities
}
/// Local clustering coefficient: fraction of a node's neighbors
/// that are also neighbors of each other.
/// cc(v) = 2E / (deg * (deg - 1))
pub fn clustering_coefficient(&self, key: &str) -> f32 {
let neighbors = self.neighbor_keys(key);
let deg = neighbors.len();
if deg < 2 {
return 0.0;
}
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
let mut triangles = 0u32;
for i in 0..neighbor_vec.len() {
for j in (i + 1)..neighbor_vec.len() {
let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
if ni_neighbors.contains(neighbor_vec[j]) {
triangles += 1;
}
}
}
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
}
/// Average clustering coefficient across all nodes with deg >= 2
pub fn avg_clustering_coefficient(&self) -> f32 {
let mut sum = 0.0f32;
let mut count = 0u32;
for key in &self.keys {
if self.degree(key) >= 2 {
sum += self.clustering_coefficient(key);
count += 1;
}
}
if count == 0 { 0.0 } else { sum / count as f32 }
}
/// Average shortest path length (sampled BFS from up to 100 nodes)
pub fn avg_path_length(&self) -> f32 {
let sample: Vec<&String> = self.keys.iter().take(100).collect();
if sample.is_empty() { return 0.0; }
let mut total_dist = 0u64;
let mut total_pairs = 0u64;
for &start in &sample {
let dists = self.bfs_distances(start);
for d in dists.values() {
if *d > 0 {
total_dist += *d as u64;
total_pairs += 1;
}
}
}
if total_pairs == 0 { 0.0 } else { total_dist as f32 / total_pairs as f32 }
}
fn bfs_distances(&self, start: &str) -> HashMap<String, u32> {
let mut dist = HashMap::new();
let mut queue = VecDeque::new();
dist.insert(start.to_string(), 0u32);
queue.push_back(start.to_string());
while let Some(node) = queue.pop_front() {
let d = dist[&node];
for neighbor in self.neighbor_keys(&node) {
if !dist.contains_key(neighbor) {
dist.insert(neighbor.to_string(), d + 1);
queue.push_back(neighbor.to_string());
}
}
}
dist
}
/// Power-law exponent α of the degree distribution.
///
/// Estimated via MLE: α = 1 + n / Σ ln(k_i / (k_min - 0.5))
/// α ≈ 2: extreme hub dominance (fragile)
/// α ≈ 3: healthy scale-free
/// α > 3: approaching random graph (egalitarian)
pub fn degree_power_law_exponent(&self) -> f32 {
let mut degrees: Vec<usize> = self.keys.iter()
.map(|k| self.degree(k))
.filter(|&d| d > 0) // exclude isolates
.collect();
if degrees.len() < 10 { return 0.0; } // not enough data
degrees.sort_unstable();
let k_min = degrees[0] as f64;
if k_min < 1.0 { return 0.0; }
let n = degrees.len() as f64;
let sum_ln: f64 = degrees.iter()
.map(|&k| (k as f64 / (k_min - 0.5)).ln())
.sum();
if sum_ln <= 0.0 { return 0.0; }
(1.0 + n / sum_ln) as f32
}
/// Gini coefficient of the degree distribution.
///
/// 0 = perfectly egalitarian (all nodes same degree)
/// 1 = maximally unequal (one node has all edges)
/// Measures hub concentration independent of distribution shape.
pub fn degree_gini(&self) -> f32 {
let mut degrees: Vec<f64> = self.keys.iter()
.map(|k| self.degree(k) as f64)
.collect();
let n = degrees.len();
if n < 2 { return 0.0; }
degrees.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mean = degrees.iter().sum::<f64>() / n as f64;
if mean < 1e-10 { return 0.0; }
// Gini = (2 Σ i·x_i) / (n Σ x_i) - (n+1)/n
let weighted_sum: f64 = degrees.iter().enumerate()
.map(|(i, &d)| (i as f64 + 1.0) * d)
.sum();
let total = degrees.iter().sum::<f64>();
let gini = (2.0 * weighted_sum) / (n as f64 * total) - (n as f64 + 1.0) / n as f64;
gini.max(0.0) as f32
}
/// Small-world coefficient σ = (C/C_rand) / (L/L_rand)
/// C_rand ≈ <k>/n, L_rand ≈ ln(n)/ln(<k>)
pub fn small_world_sigma(&self) -> f32 {
let n = self.keys.len() as f32;
if n < 10.0 { return 0.0; }
let avg_degree = self.adj.values()
.map(|e| e.len() as f32)
.sum::<f32>() / n;
if avg_degree < 1.0 { return 0.0; }
let c = self.avg_clustering_coefficient();
let l = self.avg_path_length();
let c_rand = avg_degree / n;
let l_rand = n.ln() / avg_degree.ln();
if c_rand < 1e-10 || l_rand < 1e-10 || l < 1e-10 {
return 0.0;
}
(c / c_rand) / (l / l_rand)
}
}
/// Impact of adding a hypothetical edge
#[derive(Debug)]
pub struct LinkImpact {
pub source: String,
pub target: String,
pub source_deg: usize,
pub target_deg: usize,
/// Is this a hub link? (either endpoint in top 5% by degree)
pub is_hub_link: bool,
/// Are both endpoints in the same community?
pub same_community: bool,
/// Change in clustering coefficient for source
pub delta_cc_source: f32,
/// Change in clustering coefficient for target
pub delta_cc_target: f32,
/// Change in degree Gini (positive = more hub-dominated)
pub delta_gini: f32,
/// Qualitative assessment
pub assessment: &'static str,
}
impl Graph {
/// Simulate adding an edge and report impact on topology metrics.
///
/// Doesn't modify the graph — computes what would change if the
/// edge were added.
pub fn link_impact(&self, source: &str, target: &str) -> LinkImpact {
let source_deg = self.degree(source);
let target_deg = self.degree(target);
// Hub threshold: top 5% by degree
let mut all_degrees: Vec<usize> = self.keys.iter()
.map(|k| self.degree(k))
.collect();
all_degrees.sort_unstable();
let hub_threshold = if all_degrees.len() >= 20 {
all_degrees[all_degrees.len() * 95 / 100]
} else {
usize::MAX // can't define hubs with <20 nodes
};
let is_hub_link = source_deg >= hub_threshold || target_deg >= hub_threshold;
// Community check
let sc = self.communities.get(source);
let tc = self.communities.get(target);
let same_community = match (sc, tc) {
(Some(a), Some(b)) => a == b,
_ => false,
};
// CC change for source: adding target as neighbor changes the
// triangle count. New triangles form for each node that's a
// neighbor of BOTH source and target.
let source_neighbors = self.neighbor_keys(source);
let target_neighbors = self.neighbor_keys(target);
let shared_neighbors = source_neighbors.intersection(&target_neighbors).count();
let cc_before_source = self.clustering_coefficient(source);
let cc_before_target = self.clustering_coefficient(target);
// Estimate new CC for source after adding edge
let new_source_deg = source_deg + 1;
let new_source_triangles = if source_deg >= 2 {
// Current triangles + new ones from shared neighbors
let current_triangles = (cc_before_source
* source_deg as f32 * (source_deg as f32 - 1.0) / 2.0) as u32;
current_triangles + shared_neighbors as u32
} else {
shared_neighbors as u32
};
let cc_after_source = if new_source_deg >= 2 {
(2.0 * new_source_triangles as f32)
/ (new_source_deg as f32 * (new_source_deg as f32 - 1.0))
} else {
0.0
};
let new_target_deg = target_deg + 1;
let new_target_triangles = if target_deg >= 2 {
let current_triangles = (cc_before_target
* target_deg as f32 * (target_deg as f32 - 1.0) / 2.0) as u32;
current_triangles + shared_neighbors as u32
} else {
shared_neighbors as u32
};
let cc_after_target = if new_target_deg >= 2 {
(2.0 * new_target_triangles as f32)
/ (new_target_deg as f32 * (new_target_deg as f32 - 1.0))
} else {
0.0
};
// Gini change via influence function:
// IF(x; Gini, F) = (2F(x) - 1) * x/μ - Gini - 1
// Adding an edge increments two degrees. The net ΔGini is the sum
// of influence contributions from both endpoints shifting up by 1.
let gini_before = self.degree_gini();
let n = self.keys.len();
let total_degree: f64 = self.keys.iter()
.map(|k| self.degree(k) as f64)
.sum();
let mean_deg = if n > 0 { total_degree / n as f64 } else { 1.0 };
// CDF at each endpoint's degree: fraction of nodes with degree ≤ d
let delta_gini = if mean_deg > 1e-10 && n >= 2 {
// Count nodes with degree ≤ source_deg and ≤ target_deg
let f_source = self.keys.iter()
.filter(|k| self.degree(k) <= source_deg)
.count() as f64 / n as f64;
let f_target = self.keys.iter()
.filter(|k| self.degree(k) <= target_deg)
.count() as f64 / n as f64;
// Influence of incrementing source's degree by 1
let new_source = (source_deg + 1) as f64;
let if_source = (2.0 * f_source - 1.0) * new_source / mean_deg
- gini_before as f64 - 1.0;
// Influence of incrementing target's degree by 1
let new_target = (target_deg + 1) as f64;
let if_target = (2.0 * f_target - 1.0) * new_target / mean_deg
- gini_before as f64 - 1.0;
// Scale: each point contributes 1/n to the distribution
((if_source + if_target) / n as f64) as f32
} else {
0.0f32
};
// Qualitative assessment
let assessment = if is_hub_link && same_community {
"hub-reinforcing: strengthens existing star topology"
} else if is_hub_link && !same_community {
"hub-bridging: cross-community but through a hub"
} else if !is_hub_link && same_community && shared_neighbors > 0 {
"lateral-clustering: strengthens local mesh topology"
} else if !is_hub_link && !same_community {
"lateral-bridging: best kind — cross-community lateral link"
} else if !is_hub_link && same_community {
"lateral-local: connects peripheral nodes in same community"
} else {
"neutral"
};
LinkImpact {
source: source.to_string(),
target: target.to_string(),
source_deg,
target_deg,
is_hub_link,
same_community,
delta_cc_source: cc_after_source - cc_before_source,
delta_cc_target: cc_after_target - cc_before_target,
delta_gini: delta_gini,
assessment,
}
}
}
/// Build graph from store data
pub fn build_graph(store: &Store) -> Graph {
let mut adj: HashMap<String, Vec<Edge>> = HashMap::new();
let keys: HashSet<String> = store.nodes.keys().cloned().collect();
// Build adjacency from relations
for rel in &store.relations {
let source_key = &rel.source_key;
let target_key = &rel.target_key;
// Both keys must exist as nodes
if !keys.contains(source_key) || !keys.contains(target_key) {
continue;
}
// Add bidirectional edges (even for causal — direction is metadata)
adj.entry(source_key.clone()).or_default().push(Edge {
target: target_key.clone(),
strength: rel.strength,
rel_type: rel.rel_type,
});
adj.entry(target_key.clone()).or_default().push(Edge {
target: source_key.clone(),
strength: rel.strength,
rel_type: rel.rel_type,
});
}
// Run community detection
let communities = label_propagation(&keys, &adj, 20);
Graph { adj, keys, communities }
}
/// Label propagation community detection.
///
/// Each node starts with its own label. Each iteration: adopt the most
/// common label among neighbors (weighted by edge strength). Iterate
/// until stable or max_iterations.
fn label_propagation(
keys: &HashSet<String>,
adj: &HashMap<String, Vec<Edge>>,
max_iterations: u32,
) -> HashMap<String, u32> {
// Initialize: each node gets its own label
let key_vec: Vec<String> = keys.iter().cloned().collect();
let mut labels: HashMap<String, u32> = key_vec.iter()
.enumerate()
.map(|(i, k)| (k.clone(), i as u32))
.collect();
for _iter in 0..max_iterations {
let mut changed = false;
for key in &key_vec {
let edges = match adj.get(key) {
Some(e) => e,
None => continue,
};
if edges.is_empty() { continue; }
// Count weighted votes for each label
let mut votes: HashMap<u32, f32> = HashMap::new();
for edge in edges {
if let Some(&label) = labels.get(&edge.target) {
*votes.entry(label).or_default() += edge.strength;
}
}
// Adopt the label with most votes
if let Some((&best_label, _)) = votes.iter()
.max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
{
let current = labels[key];
if best_label != current {
labels.insert(key.clone(), best_label);
changed = true;
}
}
}
if !changed { break; }
}
// Compact labels to 0..n
let mut label_map: HashMap<u32, u32> = HashMap::new();
let mut next_id = 0;
for label in labels.values_mut() {
let new_label = *label_map.entry(*label).or_insert_with(|| {
let id = next_id;
next_id += 1;
id
});
*label = new_label;
}
labels
}
/// Schema fit: for a node, measure how well-connected its neighbors are
/// to each other. High density + high CC among neighbors = good schema fit.
pub fn schema_fit(graph: &Graph, key: &str) -> f32 {
let neighbors = graph.neighbor_keys(key);
let n = neighbors.len();
if n < 2 {
return 0.0; // isolated or leaf — no schema context
}
// Count edges among neighbors
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
let mut inter_edges = 0u32;
for i in 0..neighbor_vec.len() {
for j in (i + 1)..neighbor_vec.len() {
let ni_neighbors = graph.neighbor_keys(neighbor_vec[i]);
if ni_neighbors.contains(neighbor_vec[j]) {
inter_edges += 1;
}
}
}
let max_edges = (n * (n - 1)) / 2;
let density = if max_edges == 0 { 0.0 } else {
inter_edges as f32 / max_edges as f32
};
// Combine neighborhood density with own CC
let cc = graph.clustering_coefficient(key);
(density + cc) / 2.0
}
/// Compute schema fit for all nodes
pub fn schema_fit_all(graph: &Graph) -> HashMap<String, f32> {
graph.nodes().iter()
.map(|key| (key.clone(), schema_fit(graph, key)))
.collect()
}
/// A snapshot of graph topology metrics, for tracking evolution over time
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct MetricsSnapshot {
pub timestamp: f64,
pub date: String,
pub nodes: usize,
pub edges: usize,
pub communities: usize,
pub sigma: f32,
pub alpha: f32,
pub gini: f32,
pub avg_cc: f32,
pub avg_path_length: f32,
pub avg_schema_fit: f32,
}
fn metrics_log_path() -> std::path::PathBuf {
let home = std::env::var("HOME").unwrap_or_default();
std::path::PathBuf::from(home).join(".claude/memory/metrics.jsonl")
}
/// Load previous metrics snapshots
pub fn load_metrics_history() -> Vec<MetricsSnapshot> {
let path = metrics_log_path();
let content = match std::fs::read_to_string(&path) {
Ok(c) => c,
Err(_) => return Vec::new(),
};
content.lines()
.filter_map(|line| serde_json::from_str(line).ok())
.collect()
}
/// Append a metrics snapshot to the log
pub fn save_metrics_snapshot(snap: &MetricsSnapshot) {
let path = metrics_log_path();
if let Ok(json) = serde_json::to_string(snap) {
use std::io::Write;
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true).append(true).open(&path)
{
let _ = writeln!(f, "{}", json);
}
}
}
/// Health report: summary of graph metrics
pub fn health_report(graph: &Graph, store: &Store) -> String {
let n = graph.nodes().len();
let e = graph.edge_count();
let avg_cc = graph.avg_clustering_coefficient();
let avg_pl = graph.avg_path_length();
let sigma = graph.small_world_sigma();
let communities = graph.community_count();
// Community sizes
let mut comm_sizes: HashMap<u32, usize> = HashMap::new();
for label in graph.communities().values() {
*comm_sizes.entry(*label).or_default() += 1;
}
let mut sizes: Vec<usize> = comm_sizes.values().copied().collect();
sizes.sort_unstable_by(|a, b| b.cmp(a));
// Degree distribution
let mut degrees: Vec<usize> = graph.nodes().iter()
.map(|k| graph.degree(k))
.collect();
degrees.sort_unstable();
let max_deg = degrees.last().copied().unwrap_or(0);
let median_deg = if degrees.is_empty() { 0 } else { degrees[degrees.len() / 2] };
let avg_deg = if n == 0 { 0.0 } else {
degrees.iter().sum::<usize>() as f64 / n as f64
};
// Topology metrics
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
// Schema fit distribution
let fits = schema_fit_all(graph);
let avg_fit = if fits.is_empty() { 0.0 } else {
fits.values().sum::<f32>() / fits.len() as f32
};
let low_fit = fits.values().filter(|&&f| f < 0.1).count();
// Category breakdown
let cats = store.category_counts();
// Snapshot current metrics and log
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64();
let date = {
let out = std::process::Command::new("date").arg("+%Y-%m-%d %H:%M")
.output().unwrap_or_else(|_| std::process::Command::new("echo").output().unwrap());
String::from_utf8_lossy(&out.stdout).trim().to_string()
};
let snap = MetricsSnapshot {
timestamp: now,
date: date.clone(),
nodes: n, edges: e, communities,
sigma, alpha, gini, avg_cc,
avg_path_length: avg_pl,
avg_schema_fit: avg_fit,
};
save_metrics_snapshot(&snap);
// Load history for deltas
let history = load_metrics_history();
let prev = if history.len() >= 2 {
Some(&history[history.len() - 2]) // second-to-last (last is the one we just wrote)
} else {
None
};
fn delta(current: f32, prev: Option<f32>) -> String {
match prev {
Some(p) => {
let d = current - p;
if d.abs() < 0.001 { String::new() }
else { format!("{:+.3})", d) }
}
None => String::new(),
}
}
let sigma_d = delta(sigma, prev.map(|p| p.sigma));
let alpha_d = delta(alpha, prev.map(|p| p.alpha));
let gini_d = delta(gini, prev.map(|p| p.gini));
let cc_d = delta(avg_cc, prev.map(|p| p.avg_cc));
let fit_d = delta(avg_fit, prev.map(|p| p.avg_schema_fit));
let mut report = format!(
"Memory Health Report
====================
Nodes: {n} Relations: {e} Communities: {communities}
Degree: max={max_deg} median={median_deg} avg={avg_deg:.1}
Clustering coefficient (avg): {avg_cc:.4}{cc_d}
Average path length: {avg_pl:.2}
Small-world σ: {sigma:.3}{sigma_d} (>1 = small-world)
Power-law α: {alpha:.2}{alpha_d} (2=hub-dominated, 3=healthy, >3=egalitarian)
Degree Gini: {gini:.3}{gini_d} (0=equal, 1=one-hub)
Community sizes (top 5): {top5}
Schema fit: avg={avg_fit:.3}{fit_d} low-fit (<0.1): {low_fit} nodes
Categories: core={core} tech={tech} gen={gen} obs={obs} task={task}",
top5 = sizes.iter().take(5)
.map(|s| s.to_string())
.collect::<Vec<_>>()
.join(", "),
core = cats.get("core").unwrap_or(&0),
tech = cats.get("tech").unwrap_or(&0),
gen = cats.get("gen").unwrap_or(&0),
obs = cats.get("obs").unwrap_or(&0),
task = cats.get("task").unwrap_or(&0),
);
// Show history trend if we have enough data points
if history.len() >= 3 {
report.push_str("\n\nMetrics history (last 5):\n");
for snap in history.iter().rev().take(5).collect::<Vec<_>>().into_iter().rev() {
report.push_str(&format!(" {}σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n",
snap.date, snap.sigma, snap.alpha, snap.gini, snap.avg_cc, snap.avg_schema_fit));
}
}
report
}

766
src/main.rs Normal file
View file

@ -0,0 +1,766 @@
#![allow(dead_code)]
// poc-memory: graph-structured memory with append-only Cap'n Proto storage
//
// Architecture:
// nodes.capnp - append-only content node log
// relations.capnp - append-only relation log
// state.bin - derived KV cache (rebuilt from logs when stale)
//
// Graph algorithms: clustering coefficient, community detection (label
// propagation), schema fit scoring, small-world metrics, consolidation
// priority. Text similarity via BM25 with Porter stemming.
//
// Neuroscience-inspired: spaced repetition replay, emotional gating,
// interference detection, schema assimilation, reconsolidation.
mod capnp_store;
mod graph;
mod search;
mod similarity;
mod migrate;
mod neuro;
pub mod memory_capnp {
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
}
use std::env;
use std::process;
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() < 2 {
usage();
process::exit(1);
}
let result = match args[1].as_str() {
"search" => cmd_search(&args[2..]),
"init" => cmd_init(),
"migrate" => cmd_migrate(),
"health" => cmd_health(),
"status" => cmd_status(),
"graph" => cmd_graph(),
"used" => cmd_used(&args[2..]),
"wrong" => cmd_wrong(&args[2..]),
"gap" => cmd_gap(&args[2..]),
"categorize" => cmd_categorize(&args[2..]),
"decay" => cmd_decay(),
"consolidate-batch" => cmd_consolidate_batch(&args[2..]),
"log" => cmd_log(),
"params" => cmd_params(),
"link" => cmd_link(&args[2..]),
"replay-queue" => cmd_replay_queue(&args[2..]),
"interference" => cmd_interference(&args[2..]),
"link-add" => cmd_link_add(&args[2..]),
"link-impact" => cmd_link_impact(&args[2..]),
"consolidate-session" => cmd_consolidate_session(),
"daily-check" => cmd_daily_check(),
"apply-agent" => cmd_apply_agent(&args[2..]),
"digest" => cmd_digest(&args[2..]),
"trace" => cmd_trace(&args[2..]),
_ => {
eprintln!("Unknown command: {}", args[1]);
usage();
process::exit(1);
}
};
if let Err(e) = result {
eprintln!("Error: {}", e);
process::exit(1);
}
}
fn usage() {
eprintln!("poc-memory v0.4.0 — graph-structured memory store
Commands:
search QUERY [QUERY...] Search memory (AND logic across terms)
init Scan markdown files, index all memory units
migrate Migrate from old weights.json system
health Report graph metrics (CC, communities, small-world)
status Summary of memory state
graph Show graph structure overview
used KEY Mark a memory as useful (boosts weight)
wrong KEY [CONTEXT] Mark a memory as wrong/irrelevant
gap DESCRIPTION Record a gap in memory coverage
categorize KEY CATEGORY Reassign category (core/tech/gen/obs/task)
decay Apply daily weight decay
consolidate-batch [--count N] [--auto]
Run agent consolidation on priority nodes
log Show recent retrieval log
params Show current parameters
link N Interactive graph walk from search result N
replay-queue [--count N] Show spaced repetition replay queue
interference [--threshold F]
Detect potentially confusable memory pairs
link-add SOURCE TARGET [REASON]
Add a link between two nodes
link-impact SOURCE TARGET Simulate adding an edge, report topology impact
consolidate-session Analyze metrics, plan agent allocation
daily-check Brief metrics check (for cron/notifications)
apply-agent [--all] Import pending agent results into the graph
digest daily [DATE] Generate daily episodic digest (default: today)
digest weekly [DATE] Generate weekly digest (any date in target week)
trace KEY Walk temporal links: semantic episodic conversation");
}
fn cmd_search(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory search QUERY [QUERY...]".into());
}
let query = args.join(" ");
let mut store = capnp_store::Store::load()?;
let results = search::search(&query, &store);
if results.is_empty() {
eprintln!("No results for '{}'", query);
return Ok(());
}
// Log retrieval
store.log_retrieval(&query, &results.iter().map(|r| r.key.clone()).collect::<Vec<_>>());
store.save()?;
for (i, r) in results.iter().enumerate().take(15) {
let marker = if r.is_direct { "" } else { " " };
let weight = store.node_weight(&r.key).unwrap_or(0.0);
print!("{}{:2}. [{:.2}/{:.2}] {}", marker, i + 1, r.activation, weight, r.key);
if let Some(community) = store.node_community(&r.key) {
print!(" (c{})", community);
}
println!();
if let Some(ref snippet) = r.snippet {
println!(" {}", snippet);
}
}
Ok(())
}
fn cmd_init() -> Result<(), String> {
let mut store = capnp_store::Store::load()?;
let count = store.init_from_markdown()?;
store.save()?;
println!("Indexed {} memory units", count);
Ok(())
}
fn cmd_migrate() -> Result<(), String> {
migrate::migrate()
}
fn cmd_health() -> Result<(), String> {
let store = capnp_store::Store::load()?;
let g = store.build_graph();
let health = graph::health_report(&g, &store);
println!("{}", health);
Ok(())
}
fn cmd_status() -> Result<(), String> {
let store = capnp_store::Store::load()?;
let node_count = store.nodes.len();
let rel_count = store.relations.len();
let categories = store.category_counts();
println!("Nodes: {} Relations: {}", node_count, rel_count);
println!("Categories: core={} tech={} gen={} obs={} task={}",
categories.get("core").unwrap_or(&0),
categories.get("tech").unwrap_or(&0),
categories.get("gen").unwrap_or(&0),
categories.get("obs").unwrap_or(&0),
categories.get("task").unwrap_or(&0),
);
let g = store.build_graph();
println!("Graph edges: {} Communities: {}",
g.edge_count(), g.community_count());
Ok(())
}
fn cmd_graph() -> Result<(), String> {
let store = capnp_store::Store::load()?;
let g = store.build_graph();
// Show top-10 highest degree nodes
let mut degrees: Vec<_> = g.nodes().iter()
.map(|k| (k.clone(), g.degree(k)))
.collect();
degrees.sort_by(|a, b| b.1.cmp(&a.1));
println!("Top nodes by degree:");
for (key, deg) in degrees.iter().take(10) {
let cc = g.clustering_coefficient(key);
println!(" {:40} deg={:3} cc={:.3}", key, deg, cc);
}
Ok(())
}
fn cmd_used(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory used KEY".into());
}
let key = args.join(" ");
let mut store = capnp_store::Store::load()?;
let resolved = store.resolve_key(&key)?;
store.mark_used(&resolved);
store.save()?;
println!("Marked '{}' as used", resolved);
Ok(())
}
fn cmd_wrong(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory wrong KEY [CONTEXT]".into());
}
let key = &args[0];
let ctx = if args.len() > 1 { Some(args[1..].join(" ")) } else { None };
let mut store = capnp_store::Store::load()?;
let resolved = store.resolve_key(key)?;
store.mark_wrong(&resolved, ctx.as_deref());
store.save()?;
println!("Marked '{}' as wrong", resolved);
Ok(())
}
fn cmd_gap(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory gap DESCRIPTION".into());
}
let desc = args.join(" ");
let mut store = capnp_store::Store::load()?;
store.record_gap(&desc);
store.save()?;
println!("Recorded gap: {}", desc);
Ok(())
}
fn cmd_categorize(args: &[String]) -> Result<(), String> {
if args.len() < 2 {
return Err("Usage: poc-memory categorize KEY CATEGORY".into());
}
let key = &args[0];
let cat = &args[1];
let mut store = capnp_store::Store::load()?;
let resolved = store.resolve_key(key)?;
store.categorize(&resolved, cat)?;
store.save()?;
println!("Set '{}' category to {}", resolved, cat);
Ok(())
}
fn cmd_decay() -> Result<(), String> {
let mut store = capnp_store::Store::load()?;
let (decayed, pruned) = store.decay();
store.save()?;
println!("Decayed {} nodes, pruned {} below threshold", decayed, pruned);
Ok(())
}
fn cmd_consolidate_batch(args: &[String]) -> Result<(), String> {
let mut count = 5usize;
let mut auto = false;
let mut agent: Option<String> = None;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--count" if i + 1 < args.len() => {
count = args[i + 1].parse().map_err(|_| "invalid count")?;
i += 2;
}
"--auto" => { auto = true; i += 1; }
"--agent" if i + 1 < args.len() => {
agent = Some(args[i + 1].clone());
i += 2;
}
_ => { i += 1; }
}
}
let store = capnp_store::Store::load()?;
if let Some(agent_name) = agent {
// Generate a specific agent prompt
let prompt = neuro::agent_prompt(&store, &agent_name, count)?;
println!("{}", prompt);
Ok(())
} else {
neuro::consolidation_batch(&store, count, auto)
}
}
fn cmd_log() -> Result<(), String> {
let store = capnp_store::Store::load()?;
for event in store.retrieval_log.iter().rev().take(20) {
println!("[{}] q=\"{}\"{} results",
event.timestamp, event.query, event.results.len());
for r in &event.results {
println!(" {}", r);
}
}
Ok(())
}
fn cmd_params() -> Result<(), String> {
let store = capnp_store::Store::load()?;
println!("decay_factor: {}", store.params.decay_factor);
println!("use_boost: {}", store.params.use_boost);
println!("prune_threshold: {}", store.params.prune_threshold);
println!("edge_decay: {}", store.params.edge_decay);
println!("max_hops: {}", store.params.max_hops);
println!("min_activation: {}", store.params.min_activation);
Ok(())
}
fn cmd_link(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory link KEY".into());
}
let key = args.join(" ");
let store = capnp_store::Store::load()?;
let resolved = store.resolve_key(&key)?;
let g = store.build_graph();
println!("Neighbors of '{}':", resolved);
let neighbors = g.neighbors(&resolved);
for (i, (n, strength)) in neighbors.iter().enumerate() {
let cc = g.clustering_coefficient(n);
println!(" {:2}. [{:.2}] {} (cc={:.3})", i + 1, strength, n, cc);
}
Ok(())
}
fn cmd_replay_queue(args: &[String]) -> Result<(), String> {
let mut count = 10usize;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--count" if i + 1 < args.len() => {
count = args[i + 1].parse().map_err(|_| "invalid count")?;
i += 2;
}
_ => { i += 1; }
}
}
let store = capnp_store::Store::load()?;
let queue = neuro::replay_queue(&store, count);
println!("Replay queue ({} items):", queue.len());
for (i, item) in queue.iter().enumerate() {
println!(" {:2}. [{:.3}] {} (interval={}d, emotion={:.1})",
i + 1, item.priority, item.key, item.interval_days, item.emotion);
}
Ok(())
}
fn cmd_consolidate_session() -> Result<(), String> {
let store = capnp_store::Store::load()?;
let plan = neuro::consolidation_plan(&store);
println!("{}", neuro::format_plan(&plan));
Ok(())
}
fn cmd_daily_check() -> Result<(), String> {
let store = capnp_store::Store::load()?;
let report = neuro::daily_check(&store);
print!("{}", report);
Ok(())
}
fn cmd_link_add(args: &[String]) -> Result<(), String> {
if args.len() < 2 {
return Err("Usage: poc-memory link-add SOURCE TARGET [REASON]".into());
}
let mut store = capnp_store::Store::load()?;
let source = store.resolve_key(&args[0])?;
let target = store.resolve_key(&args[1])?;
let reason = if args.len() > 2 { args[2..].join(" ") } else { String::new() };
// Find UUIDs
let source_uuid = store.nodes.get(&source)
.map(|n| n.uuid)
.ok_or_else(|| format!("source not found: {}", source))?;
let target_uuid = store.nodes.get(&target)
.map(|n| n.uuid)
.ok_or_else(|| format!("target not found: {}", target))?;
// Check if link already exists
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);
if exists {
println!("Link already exists: {}{}", source, target);
return Ok(());
}
let rel = capnp_store::Store::new_relation(
source_uuid, target_uuid,
capnp_store::RelationType::Auto,
0.5,
&source, &target,
);
store.add_relation(rel)?;
if !reason.is_empty() {
println!("+ {}{} ({})", source, target, reason);
} else {
println!("+ {}{}", source, target);
}
Ok(())
}
fn cmd_link_impact(args: &[String]) -> Result<(), String> {
if args.len() < 2 {
return Err("Usage: poc-memory link-impact SOURCE TARGET".into());
}
let store = capnp_store::Store::load()?;
let source = store.resolve_key(&args[0])?;
let target = store.resolve_key(&args[1])?;
let g = store.build_graph();
let impact = g.link_impact(&source, &target);
println!("Link impact: {}{}", source, target);
println!(" Source degree: {} Target degree: {}", impact.source_deg, impact.target_deg);
println!(" Hub link: {} Same community: {}", impact.is_hub_link, impact.same_community);
println!(" ΔCC source: {:+.4} ΔCC target: {:+.4}", impact.delta_cc_source, impact.delta_cc_target);
println!(" ΔGini: {:+.6}", impact.delta_gini);
println!(" Assessment: {}", impact.assessment);
Ok(())
}
fn cmd_apply_agent(args: &[String]) -> Result<(), String> {
let home = env::var("HOME").unwrap_or_default();
let results_dir = std::path::PathBuf::from(&home)
.join(".claude/memory/agent-results");
if !results_dir.exists() {
println!("No agent results directory");
return Ok(());
}
let mut store = capnp_store::Store::load()?;
let mut applied = 0;
let mut errors = 0;
let process_all = args.iter().any(|a| a == "--all");
// Find .json result files
let mut files: Vec<_> = std::fs::read_dir(&results_dir)
.map_err(|e| format!("read results dir: {}", e))?
.filter_map(|e| e.ok())
.filter(|e| e.path().extension().map(|x| x == "json").unwrap_or(false))
.collect();
files.sort_by_key(|e| e.path());
for entry in &files {
let path = entry.path();
let content = match std::fs::read_to_string(&path) {
Ok(c) => c,
Err(e) => {
eprintln!(" Skip {}: {}", path.display(), e);
errors += 1;
continue;
}
};
let data: serde_json::Value = match serde_json::from_str(&content) {
Ok(d) => d,
Err(e) => {
eprintln!(" Skip {}: parse error: {}", path.display(), e);
errors += 1;
continue;
}
};
// Check for agent_result with links
let agent_result = data.get("agent_result").or(Some(&data));
let links = match agent_result.and_then(|r| r.get("links")).and_then(|l| l.as_array()) {
Some(l) => l,
None => continue,
};
let entry_text = data.get("entry_text")
.and_then(|v| v.as_str())
.unwrap_or("");
let source_start = agent_result
.and_then(|r| r.get("source_start"))
.and_then(|v| v.as_u64());
let source_end = agent_result
.and_then(|r| r.get("source_end"))
.and_then(|v| v.as_u64());
println!("Processing {}:", path.file_name().unwrap().to_string_lossy());
if let (Some(start), Some(end)) = (source_start, source_end) {
println!(" Source: L{}-L{}", start, end);
}
for link in links {
let target = match link.get("target").and_then(|v| v.as_str()) {
Some(t) => t,
None => continue,
};
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
// Skip NOTE: targets (new topics, not existing nodes)
if target.starts_with("NOTE:") {
println!(" NOTE: {}{}", &target[5..], reason);
continue;
}
// Try to resolve the target key and link from journal entry
let resolved = match store.resolve_key(target) {
Ok(r) => r,
Err(_) => {
println!(" SKIP {} (not found in graph)", target);
continue;
}
};
let source_key = match find_journal_node(&store, entry_text) {
Some(k) => k,
None => {
println!(" SKIP {} (no matching journal node)", target);
continue;
}
};
// Get UUIDs for both nodes
let source_uuid = match store.nodes.get(&source_key) {
Some(n) => n.uuid,
None => continue,
};
let target_uuid = match store.nodes.get(&resolved) {
Some(n) => n.uuid,
None => continue,
};
let rel = capnp_store::Store::new_relation(
source_uuid, target_uuid,
capnp_store::RelationType::Link,
0.5,
&source_key, &resolved,
);
if let Err(e) = store.add_relation(rel) {
eprintln!(" Error adding relation: {}", e);
errors += 1;
} else {
println!(" LINK {}{} ({})", source_key, resolved, reason);
applied += 1;
}
}
// Move processed file to avoid re-processing
if !process_all {
let done_dir = results_dir.join("done");
std::fs::create_dir_all(&done_dir).ok();
let dest = done_dir.join(path.file_name().unwrap());
std::fs::rename(&path, &dest).ok();
}
}
if applied > 0 {
store.save()?;
}
println!("\nApplied {} links ({} errors, {} files processed)",
applied, errors, files.len());
Ok(())
}
/// Find the journal node that best matches the given entry text
fn find_journal_node(store: &capnp_store::Store, entry_text: &str) -> Option<String> {
if entry_text.is_empty() {
return None;
}
// Extract keywords from entry text
let words: Vec<&str> = entry_text.split_whitespace()
.filter(|w| w.len() > 5)
.take(5)
.collect();
// Find journal nodes whose content matches the most keywords
let mut best_key = None;
let mut best_score = 0;
for (key, node) in &store.nodes {
if !key.starts_with("journal.md#") {
continue;
}
let content_lower = node.content.to_lowercase();
let score: usize = words.iter()
.filter(|w| content_lower.contains(&w.to_lowercase()))
.count();
if score > best_score {
best_score = score;
best_key = Some(key.clone());
}
}
best_key
}
fn cmd_digest(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory digest daily [DATE] | weekly [DATE]".into());
}
let home = env::var("HOME").unwrap_or_default();
let scripts_dir = std::path::PathBuf::from(&home).join("poc/memory/scripts");
match args[0].as_str() {
"daily" => {
let mut cmd = std::process::Command::new("python3");
cmd.arg(scripts_dir.join("daily-digest.py"));
if args.len() > 1 {
cmd.arg(&args[1]);
}
// Unset CLAUDECODE for nested claude calls
cmd.env_remove("CLAUDECODE");
let status = cmd.status()
.map_err(|e| format!("run daily-digest.py: {}", e))?;
if !status.success() {
return Err("daily-digest.py failed".into());
}
Ok(())
}
"weekly" => {
let mut cmd = std::process::Command::new("python3");
cmd.arg(scripts_dir.join("weekly-digest.py"));
if args.len() > 1 {
cmd.arg(&args[1]);
}
cmd.env_remove("CLAUDECODE");
let status = cmd.status()
.map_err(|e| format!("run weekly-digest.py: {}", e))?;
if !status.success() {
return Err("weekly-digest.py failed".into());
}
Ok(())
}
_ => Err(format!("Unknown digest type: {}. Use: daily, weekly", args[0])),
}
}
fn cmd_trace(args: &[String]) -> Result<(), String> {
if args.is_empty() {
return Err("Usage: poc-memory trace KEY".into());
}
let key = args.join(" ");
let store = capnp_store::Store::load()?;
let resolved = store.resolve_key(&key)?;
let g = store.build_graph();
let node = store.nodes.get(&resolved)
.ok_or_else(|| format!("Node not found: {}", resolved))?;
// Display the node itself
println!("=== {} ===", resolved);
println!("Type: {:?} Category: {} Weight: {:.2}",
node.node_type, node.category.label(), node.weight);
if !node.source_ref.is_empty() {
println!("Source: {}", node.source_ref);
}
// Show content preview
let preview = if node.content.len() > 200 {
let end = node.content.floor_char_boundary(200);
format!("{}...", &node.content[..end])
} else {
node.content.clone()
};
println!("\n{}\n", preview);
// Walk neighbors, grouped by node type
let neighbors = g.neighbors(&resolved);
let mut episodic_session = Vec::new();
let mut episodic_daily = Vec::new();
let mut episodic_weekly = Vec::new();
let mut semantic = Vec::new();
for (n, strength) in &neighbors {
if let Some(nnode) = store.nodes.get(n.as_str()) {
match nnode.node_type {
capnp_store::NodeType::EpisodicSession =>
episodic_session.push((n.clone(), *strength, nnode)),
capnp_store::NodeType::EpisodicDaily =>
episodic_daily.push((n.clone(), *strength, nnode)),
capnp_store::NodeType::EpisodicWeekly =>
episodic_weekly.push((n.clone(), *strength, nnode)),
capnp_store::NodeType::Semantic =>
semantic.push((n.clone(), *strength, nnode)),
}
}
}
if !episodic_weekly.is_empty() {
println!("Weekly digests:");
for (k, s, n) in &episodic_weekly {
let preview = n.content.lines().next().unwrap_or("").chars().take(80).collect::<String>();
println!(" [{:.2}] {}{}", s, k, preview);
}
}
if !episodic_daily.is_empty() {
println!("Daily digests:");
for (k, s, n) in &episodic_daily {
let preview = n.content.lines().next().unwrap_or("").chars().take(80).collect::<String>();
println!(" [{:.2}] {}{}", s, k, preview);
}
}
if !episodic_session.is_empty() {
println!("Session entries:");
for (k, s, n) in &episodic_session {
let preview = n.content.lines()
.find(|l| !l.is_empty() && !l.starts_with("<!--"))
.unwrap_or("").chars().take(80).collect::<String>();
println!(" [{:.2}] {}", s, k);
if !n.source_ref.is_empty() {
println!(" ↳ source: {}", n.source_ref);
}
println!(" {}", preview);
}
}
if !semantic.is_empty() {
println!("Semantic links:");
for (k, s, _) in &semantic {
println!(" [{:.2}] {}", s, k);
}
}
// Summary
println!("\nLinks: {} session, {} daily, {} weekly, {} semantic",
episodic_session.len(), episodic_daily.len(),
episodic_weekly.len(), semantic.len());
Ok(())
}
fn cmd_interference(args: &[String]) -> Result<(), String> {
let mut threshold = 0.4f32;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--threshold" if i + 1 < args.len() => {
threshold = args[i + 1].parse().map_err(|_| "invalid threshold")?;
i += 2;
}
_ => { i += 1; }
}
}
let store = capnp_store::Store::load()?;
let g = store.build_graph();
let pairs = neuro::detect_interference(&store, &g, threshold);
if pairs.is_empty() {
println!("No interfering pairs above threshold {:.2}", threshold);
} else {
println!("Interfering pairs (similarity > {:.2}, different communities):", threshold);
for (a, b, sim) in &pairs {
println!(" [{:.3}] {}{}", sim, a, b);
}
}
Ok(())
}

386
src/migrate.rs Normal file
View file

@ -0,0 +1,386 @@
// Migration from old weights.json + markdown marker system
//
// Reads:
// ~/.claude/memory/weights.json (1,874 entries with metrics)
// ~/.claude/memory/*.md (content + mem markers + edges)
//
// Emits:
// ~/.claude/memory/nodes.capnp (all nodes with preserved metadata)
// ~/.claude/memory/relations.capnp (all edges from markers + md links)
// ~/.claude/memory/state.json (derived cache)
//
// Old files are preserved as backup. Run once.
use crate::capnp_store::{
self, Store, Node, Category, NodeType, Provenance, RelationType,
parse_units,
};
use serde::Deserialize;
use uuid::Uuid;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
fn home() -> PathBuf {
PathBuf::from(env::var("HOME").expect("HOME not set"))
}
fn now_epoch() -> f64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs_f64()
}
// Old system data structures (just enough for deserialization)
#[derive(Deserialize)]
struct OldStore {
#[serde(default)]
entries: HashMap<String, OldEntry>,
#[serde(default)]
retrieval_log: Vec<OldRetrievalEvent>,
#[serde(default)]
params: OldParams,
}
#[derive(Deserialize)]
struct OldEntry {
weight: f64,
created: String,
#[serde(default)]
last_retrieved: Option<String>,
#[serde(default)]
last_used: Option<String>,
#[serde(default)]
retrievals: u32,
#[serde(default)]
uses: u32,
#[serde(default)]
wrongs: u32,
#[serde(default = "default_category")]
category: String,
}
fn default_category() -> String { "General".to_string() }
#[derive(Deserialize)]
struct OldRetrievalEvent {
query: String,
timestamp: String,
results: Vec<String>,
#[serde(default)]
used: Option<Vec<String>>,
}
#[derive(Deserialize)]
struct OldParams {
#[serde(default = "default_0_7")]
default_weight: f64,
#[serde(default = "default_0_95")]
decay_factor: f64,
#[serde(default = "default_0_15")]
use_boost: f64,
#[serde(default = "default_0_1")]
prune_threshold: f64,
#[serde(default = "default_0_3")]
edge_decay: f64,
#[serde(default = "default_3")]
max_hops: u32,
#[serde(default = "default_0_05")]
min_activation: f64,
}
impl Default for OldParams {
fn default() -> Self {
OldParams {
default_weight: 0.7,
decay_factor: 0.95,
use_boost: 0.15,
prune_threshold: 0.1,
edge_decay: 0.3,
max_hops: 3,
min_activation: 0.05,
}
}
}
fn default_0_7() -> f64 { 0.7 }
fn default_0_95() -> f64 { 0.95 }
fn default_0_15() -> f64 { 0.15 }
fn default_0_1() -> f64 { 0.1 }
fn default_0_3() -> f64 { 0.3 }
fn default_3() -> u32 { 3 }
fn default_0_05() -> f64 { 0.05 }
fn parse_old_category(s: &str) -> Category {
match s {
"Core" | "core" => Category::Core,
"Technical" | "technical" | "tech" => Category::Technical,
"Observation" | "observation" | "obs" => Category::Observation,
"Task" | "task" => Category::Task,
_ => Category::General,
}
}
pub fn migrate() -> Result<(), String> {
let weights_path = home().join(".claude/memory/weights.json");
let memory_dir = home().join(".claude/memory");
let nodes_path = memory_dir.join("nodes.capnp");
let rels_path = memory_dir.join("relations.capnp");
// Safety check
if nodes_path.exists() || rels_path.exists() {
return Err("nodes.capnp or relations.capnp already exist. \
Remove them first if you want to re-migrate.".into());
}
// Load old store
let old_store: OldStore = if weights_path.exists() {
let data = fs::read_to_string(&weights_path)
.map_err(|e| format!("read weights.json: {}", e))?;
serde_json::from_str(&data)
.map_err(|e| format!("parse weights.json: {}", e))?
} else {
eprintln!("Warning: no weights.json found, migrating markdown only");
OldStore {
entries: HashMap::new(),
retrieval_log: Vec::new(),
params: OldParams::default(),
}
};
eprintln!("Old store: {} entries, {} retrieval events",
old_store.entries.len(), old_store.retrieval_log.len());
// Scan markdown files to get content + edges
let mut units_by_key: HashMap<String, capnp_store::MemoryUnit> = HashMap::new();
scan_markdown_dir(&memory_dir, &mut units_by_key)?;
eprintln!("Scanned {} markdown units", units_by_key.len());
// Create new store
let mut store = Store::default();
// Migrate params
store.params.default_weight = old_store.params.default_weight;
store.params.decay_factor = old_store.params.decay_factor;
store.params.use_boost = old_store.params.use_boost;
store.params.prune_threshold = old_store.params.prune_threshold;
store.params.edge_decay = old_store.params.edge_decay;
store.params.max_hops = old_store.params.max_hops;
store.params.min_activation = old_store.params.min_activation;
// Migrate retrieval log
store.retrieval_log = old_store.retrieval_log.iter().map(|e| {
capnp_store::RetrievalEvent {
query: e.query.clone(),
timestamp: e.timestamp.clone(),
results: e.results.clone(),
used: e.used.clone(),
}
}).collect();
// Phase 1: Create nodes
// Merge old entries (weight metadata) with markdown units (content)
let mut all_nodes: Vec<Node> = Vec::new();
let mut key_to_uuid: HashMap<String, [u8; 16]> = HashMap::new();
// First, all entries from the old store
for (key, old_entry) in &old_store.entries {
let uuid = *Uuid::new_v4().as_bytes();
key_to_uuid.insert(key.clone(), uuid);
let content = units_by_key.get(key)
.map(|u| u.content.clone())
.unwrap_or_default();
let state_tag = units_by_key.get(key)
.and_then(|u| u.state.clone())
.unwrap_or_default();
let node = Node {
uuid,
version: 1,
timestamp: now_epoch(),
node_type: if key.contains("journal") {
NodeType::EpisodicSession
} else {
NodeType::Semantic
},
provenance: Provenance::Manual,
key: key.clone(),
content,
weight: old_entry.weight as f32,
category: parse_old_category(&old_entry.category),
emotion: 0.0,
deleted: false,
source_ref: String::new(),
created: old_entry.created.clone(),
retrievals: old_entry.retrievals,
uses: old_entry.uses,
wrongs: old_entry.wrongs,
state_tag,
last_replayed: 0.0,
spaced_repetition_interval: 1,
community_id: None,
clustering_coefficient: None,
schema_fit: None,
degree: None,
};
all_nodes.push(node);
}
// Then, any markdown units not in the old store
for (key, unit) in &units_by_key {
if key_to_uuid.contains_key(key) { continue; }
let uuid = *Uuid::new_v4().as_bytes();
key_to_uuid.insert(key.clone(), uuid);
let node = Node {
uuid,
version: 1,
timestamp: now_epoch(),
node_type: if key.contains("journal") {
NodeType::EpisodicSession
} else {
NodeType::Semantic
},
provenance: Provenance::Manual,
key: key.clone(),
content: unit.content.clone(),
weight: 0.7,
category: Category::General,
emotion: 0.0,
deleted: false,
source_ref: String::new(),
created: String::new(),
retrievals: 0,
uses: 0,
wrongs: 0,
state_tag: unit.state.clone().unwrap_or_default(),
last_replayed: 0.0,
spaced_repetition_interval: 1,
community_id: None,
clustering_coefficient: None,
schema_fit: None,
degree: None,
};
all_nodes.push(node);
}
// Write nodes to capnp log
store.append_nodes(&all_nodes)?;
for node in &all_nodes {
store.uuid_to_key.insert(node.uuid, node.key.clone());
store.nodes.insert(node.key.clone(), node.clone());
}
eprintln!("Migrated {} nodes", all_nodes.len());
// Phase 2: Create relations from markdown links + causal edges
let mut all_relations = Vec::new();
for (key, unit) in &units_by_key {
let source_uuid = match key_to_uuid.get(key) {
Some(u) => *u,
None => continue,
};
// Association links (bidirectional)
for link in unit.marker_links.iter().chain(unit.md_links.iter()) {
let target_uuid = match key_to_uuid.get(link) {
Some(u) => *u,
None => continue,
};
// Avoid duplicate relations
let exists = all_relations.iter().any(|r: &capnp_store::Relation|
(r.source == source_uuid && r.target == target_uuid) ||
(r.source == target_uuid && r.target == source_uuid));
if exists { continue; }
all_relations.push(Store::new_relation(
source_uuid, target_uuid,
RelationType::Link, 1.0,
key, link,
));
}
// Causal edges (directed)
for cause in &unit.causes {
let cause_uuid = match key_to_uuid.get(cause) {
Some(u) => *u,
None => continue,
};
all_relations.push(Store::new_relation(
cause_uuid, source_uuid,
RelationType::Causal, 1.0,
cause, key,
));
}
}
// Write relations to capnp log
store.append_relations(&all_relations)?;
store.relations = all_relations;
eprintln!("Migrated {} relations", store.relations.len());
// Phase 3: Compute graph metrics
store.update_graph_metrics();
// Save derived cache
store.save()?;
eprintln!("Migration complete. Files:");
eprintln!(" {}", nodes_path.display());
eprintln!(" {}", rels_path.display());
eprintln!(" {}", memory_dir.join("state.json").display());
// Verify
let g = store.build_graph();
eprintln!("\nVerification:");
eprintln!(" Nodes: {}", store.nodes.len());
eprintln!(" Relations: {}", store.relations.len());
eprintln!(" Graph edges: {}", g.edge_count());
eprintln!(" Communities: {}", g.community_count());
eprintln!(" Avg CC: {:.4}", g.avg_clustering_coefficient());
Ok(())
}
fn scan_markdown_dir(
dir: &Path,
units: &mut HashMap<String, capnp_store::MemoryUnit>,
) -> Result<(), String> {
let entries = fs::read_dir(dir)
.map_err(|e| format!("read dir {}: {}", dir.display(), e))?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
scan_markdown_dir(&path, units)?;
continue;
}
let Some(ext) = path.extension() else { continue };
if ext != "md" { continue }
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = match fs::read_to_string(&path) {
Ok(c) => c,
Err(_) => continue,
};
for unit in parse_units(&filename, &content) {
units.insert(unit.key.clone(), unit);
}
}
Ok(())
}

707
src/neuro.rs Normal file
View file

@ -0,0 +1,707 @@
// Neuroscience-inspired memory algorithms
//
// Systematic replay (hippocampal replay), schema assimilation,
// interference detection, emotional gating, consolidation priority
// scoring, and the agent consolidation harness.
use crate::capnp_store::Store;
use crate::graph::{self, Graph};
use crate::similarity;
use std::time::{SystemTime, UNIX_EPOCH};
fn now_epoch() -> f64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs_f64()
}
const SECS_PER_DAY: f64 = 86400.0;
/// Consolidation priority: how urgently a node needs attention
///
/// priority = (1 - schema_fit) × spaced_repetition_due × emotion × (1 + interference)
pub fn consolidation_priority(store: &Store, key: &str, graph: &Graph) -> f64 {
let node = match store.nodes.get(key) {
Some(n) => n,
None => return 0.0,
};
// Schema fit: 0 = poorly integrated, 1 = well integrated
let fit = graph::schema_fit(graph, key) as f64;
let fit_factor = 1.0 - fit;
// Spaced repetition: how overdue is this node for replay?
let interval_secs = node.spaced_repetition_interval as f64 * SECS_PER_DAY;
let time_since_replay = if node.last_replayed > 0.0 {
(now_epoch() - node.last_replayed).max(0.0)
} else {
// Never replayed — treat as very overdue
interval_secs * 3.0
};
let overdue_ratio = (time_since_replay / interval_secs).min(5.0);
// Emotional intensity: higher emotion = higher priority
let emotion_factor = 1.0 + (node.emotion as f64 / 10.0);
fit_factor * overdue_ratio * emotion_factor
}
/// Item in the replay queue
pub struct ReplayItem {
pub key: String,
pub priority: f64,
pub interval_days: u32,
pub emotion: f32,
pub schema_fit: f32,
}
/// Generate the replay queue: nodes ordered by consolidation priority
pub fn replay_queue(store: &Store, count: usize) -> Vec<ReplayItem> {
let graph = store.build_graph();
let fits = graph::schema_fit_all(&graph);
let mut items: Vec<ReplayItem> = store.nodes.iter()
.map(|(key, node)| {
let priority = consolidation_priority(store, key, &graph);
let fit = fits.get(key).copied().unwrap_or(0.0);
ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
schema_fit: fit,
}
})
.collect();
items.sort_by(|a, b| b.priority.partial_cmp(&a.priority).unwrap());
items.truncate(count);
items
}
/// Detect interfering memory pairs: high text similarity but different communities
pub fn detect_interference(
store: &Store,
graph: &Graph,
threshold: f32,
) -> Vec<(String, String, f32)> {
let communities = graph.communities();
// Only compare nodes within a reasonable set — take the most active ones
let mut docs: Vec<(String, String)> = store.nodes.iter()
.filter(|(_, n)| n.content.len() > 50) // skip tiny nodes
.map(|(k, n)| (k.clone(), n.content.clone()))
.collect();
// For large stores, sample to keep pairwise comparison feasible
if docs.len() > 200 {
docs.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
docs.truncate(200);
}
let similar = similarity::pairwise_similar(&docs, threshold);
// Filter to pairs in different communities
similar.into_iter()
.filter(|(a, b, _)| {
let ca = communities.get(a);
let cb = communities.get(b);
match (ca, cb) {
(Some(a), Some(b)) => a != b,
_ => true, // if community unknown, flag it
}
})
.collect()
}
/// Schema assimilation scoring for a new node.
/// Returns how easily the node integrates into existing structure.
///
/// High fit (>0.5): auto-link, done
/// Medium fit (0.2-0.5): agent reviews, proposes links
/// Low fit (<0.2): deep examination needed — new schema seed, bridge, or noise?
pub fn schema_assimilation(store: &Store, key: &str) -> (f32, &'static str) {
let graph = store.build_graph();
let fit = graph::schema_fit(&graph, key);
let recommendation = if fit > 0.5 {
"auto-integrate"
} else if fit > 0.2 {
"agent-review"
} else if graph.degree(key) > 0 {
"deep-examine-bridge"
} else {
"deep-examine-orphan"
};
(fit, recommendation)
}
/// Prompt template directory
fn prompts_dir() -> std::path::PathBuf {
// Check for prompts relative to binary, then fall back to ~/poc/memory/prompts/
let home = std::env::var("HOME").unwrap_or_default();
std::path::PathBuf::from(home).join("poc/memory/prompts")
}
/// Load a prompt template, replacing {{PLACEHOLDER}} with data
fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
let path = prompts_dir().join(format!("{}.md", name));
let mut content = std::fs::read_to_string(&path)
.map_err(|e| format!("load prompt {}: {}", path.display(), e))?;
for (placeholder, data) in replacements {
content = content.replace(placeholder, data);
}
Ok(content)
}
/// Format topology header for agent prompts — current graph health metrics
fn format_topology_header(graph: &Graph) -> String {
let sigma = graph.small_world_sigma();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let avg_cc = graph.avg_clustering_coefficient();
let n = graph.nodes().len();
let e = graph.edge_count();
format!(
"## Current graph topology\n\
Nodes: {} Edges: {} Communities: {}\n\
Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\
Avg clustering coefficient: {:.4}\n\n\
Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
n, e, graph.community_count(), sigma, alpha, gini, avg_cc)
}
/// Compute the hub degree threshold (top 5% by degree)
fn hub_threshold(graph: &Graph) -> usize {
let mut degrees: Vec<usize> = graph.nodes().iter()
.map(|k| graph.degree(k))
.collect();
degrees.sort_unstable();
if degrees.len() >= 20 {
degrees[degrees.len() * 95 / 100]
} else {
usize::MAX
}
}
/// Format node data section for prompt templates
fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String {
let hub_thresh = hub_threshold(graph);
let mut out = String::new();
for item in items {
let node = match store.nodes.get(&item.key) {
Some(n) => n,
None => continue,
};
out.push_str(&format!("## {} \n", item.key));
out.push_str(&format!("Priority: {:.3} Schema fit: {:.3} Emotion: {:.1} ",
item.priority, item.schema_fit, item.emotion));
out.push_str(&format!("Category: {} Interval: {}d\n",
node.category.label(), node.spaced_repetition_interval));
if let Some(community) = node.community_id {
out.push_str(&format!("Community: {} ", community));
}
let deg = graph.degree(&item.key);
let cc = graph.clustering_coefficient(&item.key);
// Hub-link ratio: what fraction of this node's edges go to hubs?
let neighbors = graph.neighbors(&item.key);
let hub_links = neighbors.iter()
.filter(|(n, _)| graph.degree(n) >= hub_thresh)
.count();
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
let is_hub = deg >= hub_thresh;
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
deg, cc, hub_ratio * 100.0, hub_links, deg));
if is_hub {
out.push_str(" ← THIS IS A HUB");
} else if hub_ratio > 0.6 {
out.push_str(" ← mostly hub-connected, needs lateral links");
}
out.push('\n');
// Content (truncated for large nodes)
let content = &node.content;
if content.len() > 1500 {
let end = content.floor_char_boundary(1500);
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n[...]\n\n",
content.len(), &content[..end]));
} else {
out.push_str(&format!("\nContent:\n{}\n\n", content));
}
// Neighbors
let neighbors = graph.neighbors(&item.key);
if !neighbors.is_empty() {
out.push_str("Neighbors:\n");
for (n, strength) in neighbors.iter().take(15) {
let n_cc = graph.clustering_coefficient(n);
let n_community = store.nodes.get(n.as_str())
.and_then(|n| n.community_id);
out.push_str(&format!(" - {} (str={:.2}, cc={:.3}",
n, strength, n_cc));
if let Some(c) = n_community {
out.push_str(&format!(", c{}", c));
}
out.push_str(")\n");
}
}
out.push_str("\n---\n\n");
}
out
}
/// Format health data for the health agent prompt
fn format_health_section(store: &Store, graph: &Graph) -> String {
let health = graph::health_report(graph, store);
let mut out = health;
out.push_str("\n\n## Weight distribution\n");
// Weight histogram
let mut buckets = [0u32; 10]; // 0.0-0.1, 0.1-0.2, ..., 0.9-1.0
for node in store.nodes.values() {
let bucket = ((node.weight * 10.0) as usize).min(9);
buckets[bucket] += 1;
}
for (i, &count) in buckets.iter().enumerate() {
let lo = i as f32 / 10.0;
let hi = (i + 1) as f32 / 10.0;
let bar: String = std::iter::repeat('█').take((count as usize) / 10).collect();
out.push_str(&format!(" {:.1}-{:.1}: {:4} {}\n", lo, hi, count, bar));
}
// Near-prune nodes
let near_prune: Vec<_> = store.nodes.iter()
.filter(|(_, n)| n.weight < 0.15)
.map(|(k, n)| (k.clone(), n.weight))
.collect();
if !near_prune.is_empty() {
out.push_str(&format!("\n## Near-prune nodes ({} total)\n", near_prune.len()));
for (k, w) in near_prune.iter().take(20) {
out.push_str(&format!(" [{:.3}] {}\n", w, k));
}
}
// Community sizes
let communities = graph.communities();
let mut comm_sizes: std::collections::HashMap<u32, Vec<String>> = std::collections::HashMap::new();
for (key, &label) in communities {
comm_sizes.entry(label).or_default().push(key.clone());
}
let mut sizes: Vec<_> = comm_sizes.iter()
.map(|(id, members)| (*id, members.len(), members.clone()))
.collect();
sizes.sort_by(|a, b| b.1.cmp(&a.1));
out.push_str("\n## Largest communities\n");
for (id, size, members) in sizes.iter().take(10) {
out.push_str(&format!(" Community {} ({} nodes): ", id, size));
let sample: Vec<_> = members.iter().take(5).map(|s| s.as_str()).collect();
out.push_str(&sample.join(", "));
if *size > 5 { out.push_str(", ..."); }
out.push('\n');
}
out
}
/// Format interference pairs for the separator agent prompt
fn format_pairs_section(
pairs: &[(String, String, f32)],
store: &Store,
graph: &Graph,
) -> String {
let mut out = String::new();
let communities = graph.communities();
for (a, b, sim) in pairs {
out.push_str(&format!("## Pair: similarity={:.3}\n", sim));
let ca = communities.get(a).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
let cb = communities.get(b).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
// Node A
out.push_str(&format!("\n### {} ({})\n", a, ca));
if let Some(node) = store.nodes.get(a) {
let content = if node.content.len() > 500 {
let end = node.content.floor_char_boundary(500);
format!("{}...", &node.content[..end])
} else {
node.content.clone()
};
out.push_str(&format!("Category: {} Weight: {:.2}\n{}\n",
node.category.label(), node.weight, content));
}
// Node B
out.push_str(&format!("\n### {} ({})\n", b, cb));
if let Some(node) = store.nodes.get(b) {
let content = if node.content.len() > 500 {
let end = node.content.floor_char_boundary(500);
format!("{}...", &node.content[..end])
} else {
node.content.clone()
};
out.push_str(&format!("Category: {} Weight: {:.2}\n{}\n",
node.category.label(), node.weight, content));
}
out.push_str("\n---\n\n");
}
out
}
/// Run agent consolidation on top-priority nodes
pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
let graph = store.build_graph();
let items = replay_queue(store, count);
if items.is_empty() {
println!("No nodes to consolidate.");
return Ok(());
}
let nodes_section = format_nodes_section(store, &items, &graph);
if auto {
// Generate the replay agent prompt with data filled in
let prompt = load_prompt("replay", &[("{{NODES}}", &nodes_section)])?;
println!("{}", prompt);
} else {
// Interactive: show what needs attention and available agent types
println!("Consolidation batch ({} nodes):\n", items.len());
for item in &items {
let node_type = store.nodes.get(&item.key)
.map(|n| if n.key.contains("journal") { "episodic" } else { "semantic" })
.unwrap_or("?");
println!(" [{:.3}] {} (fit={:.3}, interval={}d, type={})",
item.priority, item.key, item.schema_fit, item.interval_days, node_type);
}
// Also show interference pairs
let pairs = detect_interference(store, &graph, 0.6);
if !pairs.is_empty() {
println!("\nInterfering pairs ({}):", pairs.len());
for (a, b, sim) in pairs.iter().take(5) {
println!(" [{:.3}] {}{}", sim, a, b);
}
}
println!("\nAgent prompts:");
println!(" --auto Generate replay agent prompt");
println!(" --agent replay Replay agent (schema assimilation)");
println!(" --agent linker Linker agent (relational binding)");
println!(" --agent separator Separator agent (pattern separation)");
println!(" --agent transfer Transfer agent (CLS episodic→semantic)");
println!(" --agent health Health agent (synaptic homeostasis)");
}
Ok(())
}
/// Generate a specific agent prompt with filled-in data
pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String, String> {
let graph = store.build_graph();
let topology = format_topology_header(&graph);
match agent {
"replay" => {
let items = replay_queue(store, count);
let nodes_section = format_nodes_section(store, &items, &graph);
load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
}
"linker" => {
// Filter to episodic entries
let mut items = replay_queue(store, count * 2);
items.retain(|item| {
store.nodes.get(&item.key)
.map(|n| matches!(n.node_type, crate::capnp_store::NodeType::EpisodicSession))
.unwrap_or(false)
|| item.key.contains("journal")
|| item.key.contains("session")
});
items.truncate(count);
let nodes_section = format_nodes_section(store, &items, &graph);
load_prompt("linker", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
}
"separator" => {
let pairs = detect_interference(store, &graph, 0.5);
let pairs_section = format_pairs_section(&pairs, store, &graph);
load_prompt("separator", &[("{{TOPOLOGY}}", &topology), ("{{PAIRS}}", &pairs_section)])
}
"transfer" => {
// Recent episodic entries
let mut episodes: Vec<_> = store.nodes.iter()
.filter(|(k, _)| k.contains("journal") || k.contains("session"))
.map(|(k, n)| (k.clone(), n.timestamp))
.collect();
episodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
episodes.truncate(count);
let episode_keys: Vec<_> = episodes.iter().map(|(k, _)| k.clone()).collect();
let items: Vec<ReplayItem> = episode_keys.iter()
.filter_map(|k| {
let node = store.nodes.get(k)?;
let fit = graph::schema_fit(&graph, k);
Some(ReplayItem {
key: k.clone(),
priority: consolidation_priority(store, k, &graph),
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
schema_fit: fit,
})
})
.collect();
let episodes_section = format_nodes_section(store, &items, &graph);
load_prompt("transfer", &[("{{TOPOLOGY}}", &topology), ("{{EPISODES}}", &episodes_section)])
}
"health" => {
let health_section = format_health_section(store, &graph);
load_prompt("health", &[("{{TOPOLOGY}}", &topology), ("{{HEALTH}}", &health_section)])
}
_ => Err(format!("Unknown agent: {}. Use: replay, linker, separator, transfer, health", agent)),
}
}
/// Agent allocation from the control loop
pub struct ConsolidationPlan {
pub replay_count: usize,
pub linker_count: usize,
pub separator_count: usize,
pub transfer_count: usize,
pub run_health: bool,
pub rationale: Vec<String>,
}
/// Analyze metrics and decide how much each agent needs to run.
///
/// This is the control loop: metrics → error signal → agent allocation.
/// Target values are based on healthy small-world networks.
pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
let graph = store.build_graph();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let avg_fit = {
let fits = graph::schema_fit_all(&graph);
if fits.is_empty() { 0.0 } else {
fits.values().sum::<f32>() / fits.len() as f32
}
};
let interference_pairs = detect_interference(store, &graph, 0.5);
let interference_count = interference_pairs.len();
// Count episodic vs semantic nodes
let episodic_count = store.nodes.iter()
.filter(|(k, _)| k.contains("journal") || k.contains("session"))
.count();
let semantic_count = store.nodes.len() - episodic_count;
let episodic_ratio = if store.nodes.is_empty() { 0.0 }
else { episodic_count as f32 / store.nodes.len() as f32 };
let mut plan = ConsolidationPlan {
replay_count: 0,
linker_count: 0,
separator_count: 0,
transfer_count: 0,
run_health: true, // always run health first
rationale: Vec::new(),
};
// Target: α ≥ 2.5 (healthy scale-free)
// Current distance determines replay + linker allocation
if alpha < 2.0 {
plan.replay_count += 10;
plan.linker_count += 5;
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): extreme hub dominance → 10 replay + 5 linker for lateral links",
alpha));
} else if alpha < 2.5 {
plan.replay_count += 5;
plan.linker_count += 3;
plan.rationale.push(format!(
"α={:.2} (target ≥2.5): moderate hub dominance → 5 replay + 3 linker",
alpha));
} else {
plan.replay_count += 3;
plan.rationale.push(format!(
"α={:.2}: healthy — 3 replay for maintenance", alpha));
}
// Target: Gini ≤ 0.4
if gini > 0.5 {
plan.replay_count += 3;
plan.rationale.push(format!(
"Gini={:.3} (target ≤0.4): high inequality → +3 replay (lateral focus)",
gini));
}
// Target: avg schema fit ≥ 0.2
if avg_fit < 0.1 {
plan.replay_count += 5;
plan.rationale.push(format!(
"Schema fit={:.3} (target ≥0.2): very poor integration → +5 replay",
avg_fit));
} else if avg_fit < 0.2 {
plan.replay_count += 2;
plan.rationale.push(format!(
"Schema fit={:.3} (target ≥0.2): low integration → +2 replay",
avg_fit));
}
// Interference: >100 pairs is a lot, <10 is clean
if interference_count > 100 {
plan.separator_count += 10;
plan.rationale.push(format!(
"Interference: {} pairs (target <50) → 10 separator",
interference_count));
} else if interference_count > 20 {
plan.separator_count += 5;
plan.rationale.push(format!(
"Interference: {} pairs (target <50) → 5 separator",
interference_count));
} else if interference_count > 0 {
plan.separator_count += interference_count.min(3);
plan.rationale.push(format!(
"Interference: {} pairs → {} separator",
interference_count, plan.separator_count));
}
// Episodic → semantic transfer
// If >60% of nodes are episodic, knowledge isn't being extracted
if episodic_ratio > 0.6 {
plan.transfer_count += 10;
plan.rationale.push(format!(
"Episodic ratio: {:.0}% ({}/{}) → 10 transfer (knowledge extraction needed)",
episodic_ratio * 100.0, episodic_count, store.nodes.len()));
} else if episodic_ratio > 0.4 {
plan.transfer_count += 5;
plan.rationale.push(format!(
"Episodic ratio: {:.0}% → 5 transfer",
episodic_ratio * 100.0));
}
plan
}
/// Format the consolidation plan for display
pub fn format_plan(plan: &ConsolidationPlan) -> String {
let mut out = String::from("Consolidation Plan\n==================\n\n");
out.push_str("Analysis:\n");
for r in &plan.rationale {
out.push_str(&format!("{}\n", r));
}
out.push_str("\nAgent allocation:\n");
if plan.run_health {
out.push_str(" 1. health — system audit\n");
}
let mut step = 2;
if plan.replay_count > 0 {
out.push_str(&format!(" {}. replay ×{:2} — schema assimilation + lateral linking\n",
step, plan.replay_count));
step += 1;
}
if plan.linker_count > 0 {
out.push_str(&format!(" {}. linker ×{:2} — relational binding from episodes\n",
step, plan.linker_count));
step += 1;
}
if plan.separator_count > 0 {
out.push_str(&format!(" {}. separator ×{} — pattern separation\n",
step, plan.separator_count));
step += 1;
}
if plan.transfer_count > 0 {
out.push_str(&format!(" {}. transfer ×{:2} — episodic→semantic extraction\n",
step, plan.transfer_count));
}
let total = plan.replay_count + plan.linker_count
+ plan.separator_count + plan.transfer_count
+ if plan.run_health { 1 } else { 0 };
out.push_str(&format!("\nTotal agent runs: {}\n", total));
out
}
/// Brief daily check: compare current metrics to last snapshot
pub fn daily_check(store: &Store) -> String {
let graph = store.build_graph();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let sigma = graph.small_world_sigma();
let avg_cc = graph.avg_clustering_coefficient();
let avg_fit = {
let fits = graph::schema_fit_all(&graph);
if fits.is_empty() { 0.0 } else {
fits.values().sum::<f32>() / fits.len() as f32
}
};
let history = graph::load_metrics_history();
let prev = history.last();
let mut out = String::from("Memory daily check\n");
// Current state
out.push_str(&format!(" σ={:.1} α={:.2} gini={:.3} cc={:.4} fit={:.3}\n",
sigma, alpha, gini, avg_cc, avg_fit));
// Trend
if let Some(p) = prev {
let d_sigma = sigma - p.sigma;
let d_alpha = alpha - p.alpha;
let d_gini = gini - p.gini;
out.push_str(&format!(" Δσ={:+.1} Δα={:+.2} Δgini={:+.3}\n",
d_sigma, d_alpha, d_gini));
// Assessment
let mut issues = Vec::new();
if alpha < 2.0 { issues.push("hub dominance critical"); }
if gini > 0.5 { issues.push("high inequality"); }
if avg_fit < 0.1 { issues.push("poor integration"); }
if d_sigma < -5.0 { issues.push("σ declining"); }
if d_alpha < -0.1 { issues.push("α declining"); }
if d_gini > 0.02 { issues.push("inequality increasing"); }
if issues.is_empty() {
out.push_str(" Status: healthy\n");
} else {
out.push_str(&format!(" Status: needs attention — {}\n", issues.join(", ")));
out.push_str(" Run: poc-memory consolidate-session\n");
}
} else {
out.push_str(" (first snapshot, no trend data yet)\n");
}
// Log this snapshot too
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64();
let date = {
let o = std::process::Command::new("date").arg("+%Y-%m-%d %H:%M")
.output().unwrap_or_else(|_| std::process::Command::new("echo").output().unwrap());
String::from_utf8_lossy(&o.stdout).trim().to_string()
};
graph::save_metrics_snapshot(&graph::MetricsSnapshot {
timestamp: now, date,
nodes: graph.nodes().len(),
edges: graph.edge_count(),
communities: graph.community_count(),
sigma, alpha, gini, avg_cc,
avg_path_length: graph.avg_path_length(),
avg_schema_fit: avg_fit,
});
out
}

146
src/search.rs Normal file
View file

@ -0,0 +1,146 @@
// Spreading activation search across the memory graph
//
// Same model as the old system but richer: uses graph edge strengths,
// supports circumscription parameter for blending associative vs
// causal walks, and benefits from community-aware result grouping.
use crate::capnp_store::Store;
use crate::graph::Graph;
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet, VecDeque};
pub struct SearchResult {
pub key: String,
pub activation: f64,
pub is_direct: bool,
pub snippet: Option<String>,
}
/// Spreading activation with circumscription parameter.
///
/// circ = 0.0: field mode — all edges (default, broad resonance)
/// circ = 1.0: causal mode — prefer causal edges
fn spreading_activation(
seeds: &[(String, f64)],
graph: &Graph,
store: &Store,
_circumscription: f64,
) -> Vec<(String, f64)> {
let params = &store.params;
let mut activation: HashMap<String, f64> = HashMap::new();
let mut queue: VecDeque<(String, f64, u32)> = VecDeque::new();
for (key, act) in seeds {
let current = activation.entry(key.clone()).or_insert(0.0);
if *act > *current {
*current = *act;
queue.push_back((key.clone(), *act, 0));
}
}
while let Some((key, act, depth)) = queue.pop_front() {
if depth >= params.max_hops { continue; }
for (neighbor, strength) in graph.neighbors(&key) {
let neighbor_weight = store.nodes.get(neighbor.as_str())
.map(|n| n.weight as f64)
.unwrap_or(params.default_weight);
let propagated = act * params.edge_decay * neighbor_weight * strength as f64;
if propagated < params.min_activation { continue; }
let current = activation.entry(neighbor.clone()).or_insert(0.0);
if propagated > *current {
*current = propagated;
queue.push_back((neighbor.clone(), propagated, depth + 1));
}
}
}
let mut results: Vec<_> = activation.into_iter().collect();
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
results
}
/// Full search: find direct hits, spread activation, return ranked results
pub fn search(query: &str, store: &Store) -> Vec<SearchResult> {
let graph = store.build_graph();
let query_lower = query.to_lowercase();
let query_tokens: Vec<&str> = query_lower.split_whitespace().collect();
let mut seeds: Vec<(String, f64)> = Vec::new();
let mut snippets: HashMap<String, String> = HashMap::new();
for (key, node) in &store.nodes {
let content_lower = node.content.to_lowercase();
let exact_match = content_lower.contains(&query_lower);
let token_match = query_tokens.len() > 1
&& query_tokens.iter().all(|t| content_lower.contains(t));
if exact_match || token_match {
let weight = node.weight as f64;
let activation = if exact_match { weight } else { weight * 0.85 };
seeds.push((key.clone(), activation));
let snippet: String = node.content.lines()
.filter(|l| {
let ll = l.to_lowercase();
if exact_match && ll.contains(&query_lower) { return true; }
query_tokens.iter().any(|t| ll.contains(t))
})
.take(3)
.map(|l| {
let t = l.trim();
if t.len() > 100 {
let end = t.floor_char_boundary(97);
format!("{}...", &t[..end])
} else {
t.to_string()
}
})
.collect::<Vec<_>>()
.join("\n ");
snippets.insert(key.clone(), snippet);
}
}
if seeds.is_empty() {
return Vec::new();
}
let direct_hits: HashSet<String> = seeds.iter().map(|(k, _)| k.clone()).collect();
let raw_results = spreading_activation(&seeds, &graph, store, 0.0);
raw_results.into_iter().map(|(key, activation)| {
let is_direct = direct_hits.contains(&key);
let snippet = snippets.get(&key).cloned();
SearchResult { key, activation, is_direct, snippet }
}).collect()
}
/// Extract meaningful search terms from natural language.
/// Strips common English stop words, returns up to max_terms words.
pub fn extract_query_terms(text: &str, max_terms: usize) -> String {
const STOP_WORDS: &[&str] = &[
"the", "a", "an", "is", "are", "was", "were", "do", "does", "did",
"have", "has", "had", "will", "would", "could", "should", "can",
"may", "might", "shall", "been", "being", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "but", "or", "and", "not",
"no", "if", "then", "than", "that", "this", "it", "its", "my",
"your", "our", "we", "you", "i", "me", "he", "she", "they", "them",
"what", "how", "why", "when", "where", "about", "just", "let",
"want", "tell", "show", "think", "know", "see", "look", "make",
"get", "go", "some", "any", "all", "very", "really", "also", "too",
"so", "up", "out", "here", "there",
];
text.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|w| !w.is_empty() && w.len() > 2 && !STOP_WORDS.contains(w))
.take(max_terms)
.collect::<Vec<_>>()
.join(" ")
}

135
src/similarity.rs Normal file
View file

@ -0,0 +1,135 @@
// Text similarity: Porter stemming + BM25
//
// Used for interference detection (similar content, different communities)
// and schema fit scoring. Intentionally simple — ~100 lines, no
// external dependencies.
use std::collections::HashMap;
/// Minimal Porter stemmer — handles the most common English suffixes.
/// Not linguistically complete but good enough for similarity matching.
pub fn stem(word: &str) -> String {
let w = word.to_lowercase();
if w.len() <= 3 { return w; }
let w = strip_suffix(&w, "ation", "ate");
let w = strip_suffix(&w, "ness", "");
let w = strip_suffix(&w, "ment", "");
let w = strip_suffix(&w, "ting", "t");
let w = strip_suffix(&w, "ling", "l");
let w = strip_suffix(&w, "ring", "r");
let w = strip_suffix(&w, "ning", "n");
let w = strip_suffix(&w, "ding", "d");
let w = strip_suffix(&w, "ping", "p");
let w = strip_suffix(&w, "ging", "g");
let w = strip_suffix(&w, "ying", "y");
let w = strip_suffix(&w, "ied", "y");
let w = strip_suffix(&w, "ies", "y");
let w = strip_suffix(&w, "ing", "");
let w = strip_suffix(&w, "ed", "");
let w = strip_suffix(&w, "ly", "");
let w = strip_suffix(&w, "er", "");
let w = strip_suffix(&w, "al", "");
strip_suffix(&w, "s", "")
}
fn strip_suffix(word: &str, suffix: &str, replacement: &str) -> String {
if word.len() > suffix.len() + 2 && word.ends_with(suffix) {
let base = &word[..word.len() - suffix.len()];
format!("{}{}", base, replacement)
} else {
word.to_string()
}
}
/// Tokenize and stem a text into a term frequency map
pub fn term_frequencies(text: &str) -> HashMap<String, u32> {
let mut tf = HashMap::new();
for word in text.split(|c: char| !c.is_alphanumeric()) {
if word.len() > 2 {
let stemmed = stem(word);
*tf.entry(stemmed).or_default() += 1;
}
}
tf
}
/// Cosine similarity between two documents using stemmed term frequencies.
/// Returns 0.0 for disjoint vocabularies, 1.0 for identical content.
pub fn cosine_similarity(doc_a: &str, doc_b: &str) -> f32 {
let tf_a = term_frequencies(doc_a);
let tf_b = term_frequencies(doc_b);
if tf_a.is_empty() || tf_b.is_empty() {
return 0.0;
}
// Dot product
let mut dot = 0.0f64;
for (term, &freq_a) in &tf_a {
if let Some(&freq_b) = tf_b.get(term) {
dot += freq_a as f64 * freq_b as f64;
}
}
// Magnitudes
let mag_a: f64 = tf_a.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
let mag_b: f64 = tf_b.values().map(|&f| (f as f64).powi(2)).sum::<f64>().sqrt();
if mag_a < 1e-10 || mag_b < 1e-10 {
return 0.0;
}
(dot / (mag_a * mag_b)) as f32
}
/// Compute pairwise similarity for a set of documents.
/// Returns pairs with similarity above threshold.
pub fn pairwise_similar(
docs: &[(String, String)], // (key, content)
threshold: f32,
) -> Vec<(String, String, f32)> {
let mut results = Vec::new();
for i in 0..docs.len() {
for j in (i + 1)..docs.len() {
let sim = cosine_similarity(&docs[i].1, &docs[j].1);
if sim >= threshold {
results.push((docs[i].0.clone(), docs[j].0.clone(), sim));
}
}
}
results.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap());
results
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_stem() {
assert_eq!(stem("running"), "runn"); // -ning → n
assert_eq!(stem("talking"), "talk"); // not matched by specific consonant rules
assert_eq!(stem("slowly"), "slow"); // -ly
// The stemmer is minimal — it doesn't need to be perfect,
// just consistent enough that related words collide.
assert_eq!(stem("observations"), "observation"); // -s stripped, -ation stays (word too short after)
}
#[test]
fn test_cosine_identical() {
let text = "the quick brown fox jumps over the lazy dog";
let sim = cosine_similarity(text, text);
assert!((sim - 1.0).abs() < 0.01, "identical docs should have sim ~1.0, got {}", sim);
}
#[test]
fn test_cosine_different() {
let a = "kernel filesystem transaction restart handling";
let b = "cooking recipe chocolate cake baking temperature";
let sim = cosine_similarity(a, b);
assert!(sim < 0.1, "unrelated docs should have low sim, got {}", sim);
}
}