49 changed files with 1180 additions and 4680 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -165,39 +165,6 @@ dependencies = [
 "tree-sitter-yaml",
 ]
 [[package]]
 name = "async-stream"
 version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
 dependencies = [
 "async-stream-impl",
 "futures-core",
 "pin-project-lite",
 ]
 [[package]]
 name = "async-stream-impl"
 version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn 2.0.117",
 ]
 [[package]]
 name = "async-trait"
 version = "0.1.89"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn 2.0.117",
 ]
 [[package]]
 name = "atomic"
 version = "0.6.1"
@ -241,53 +208,6 @@ dependencies = [
 "fs_extra",
 ]
 [[package]]
 name = "axum"
 version = "0.7.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
 dependencies = [
 "async-trait",
 "axum-core",
 "bytes",
 "futures-util",
 "http",
 "http-body",
 "http-body-util",
 "itoa",
 "matchit",
 "memchr",
 "mime",
 "percent-encoding",
 "pin-project-lite",
 "rustversion",
 "serde",
 "sync_wrapper",
 "tower 0.5.3",
 "tower-layer",
 "tower-service",
 ]
 [[package]]
 name = "axum-core"
 version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
 dependencies = [
 "async-trait",
 "bytes",
 "futures-util",
 "http",
 "http-body",
 "http-body-util",
 "mime",
 "pin-project-lite",
 "rustversion",
 "sync_wrapper",
 "tower-layer",
 "tower-service",
 ]
 [[package]]
 name = "base64"
 version = "0.13.1"
@ -571,7 +491,6 @@ dependencies = [
 "anyhow",
 "ast-grep-core",
 "ast-grep-language",
 "async-stream",
 "base64 0.22.1",
 "bytes",
 "capnp",
@ -599,14 +518,11 @@ dependencies = [
 "notify-debouncer-mini",
 "paste",
 "peg",
 "prost",
 "protoc-bin-vendored",
 "ratatui",
 "redb",
 "regex",
 "rustls",
 "rustls-native-certs",
 "rustls-pemfile",
 "serde",
 "serde_json",
 "serde_urlencoded",
@ -615,10 +531,7 @@ dependencies = [
 "tokenizers",
 "tokio",
 "tokio-rustls",
 "tokio-stream",
 "tokio-util",
 "tonic",
 "tonic-build",
 "tui-markdown",
 "tui-textarea-2",
 "uuid",
@ -1151,12 +1064,6 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
 [[package]]
 name = "fixedbitset"
 version = "0.5.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
 [[package]]
 name = "flate2"
 version = "1.1.9"
@ -1381,31 +1288,6 @@ dependencies = [
 "regex-syntax",
 ]
 [[package]]
 name = "h2"
 version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
 dependencies = [
 "atomic-waker",
 "bytes",
 "fnv",
 "futures-core",
 "futures-sink",
 "http",
 "indexmap 2.14.0",
 "slab",
 "tokio",
 "tokio-util",
 "tracing",
 ]
 [[package]]
 name = "hashbrown"
 version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
 [[package]]
 name = "hashbrown"
 version = "0.15.5"
@ -1511,12 +1393,6 @@ version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
 [[package]]
 name = "httpdate"
 version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
 [[package]]
 name = "hyper"
 version = "1.9.0"
@ -1527,11 +1403,9 @@ dependencies = [
 "bytes",
 "futures-channel",
 "futures-core",
 "h2",
 "http",
 "http-body",
 "httparse",
 "httpdate",
 "itoa",
 "pin-project-lite",
 "smallvec",
@ -1539,19 +1413,6 @@ dependencies = [
 "want",
 ]
 [[package]]
 name = "hyper-timeout"
 version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
 dependencies = [
 "hyper",
 "hyper-util",
 "pin-project-lite",
 "tokio",
 "tower-service",
 ]
 [[package]]
 name = "hyper-util"
 version = "0.1.20"
@ -1559,17 +1420,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
 dependencies = [
 "bytes",
 "futures-channel",
 "futures-util",
 "http",
 "http-body",
 "hyper",
 "libc",
 "pin-project-lite",
 "socket2 0.6.3",
 "tokio",
 "tower-service",
 "tracing",
 ]
 [[package]]
@ -1630,16 +1485,6 @@ version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09e54e57b4c48b40f7aec75635392b12b3421fa26fe8b4332e63138ed278459c"
 [[package]]
 name = "indexmap"
 version = "1.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
 dependencies = [
 "autocfg",
 "hashbrown 0.12.3",
 ]
 [[package]]
 name = "indexmap"
 version = "2.14.0"
@ -2013,12 +1858,6 @@ dependencies = [
 "xml5ever",
 ]
 [[package]]
 name = "matchit"
 version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
 [[package]]
 name = "memchr"
 version = "2.8.0"
@ -2049,12 +1888,6 @@ dependencies = [
 "autocfg",
 ]
 [[package]]
 name = "mime"
 version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@ -2105,12 +1938,6 @@ dependencies = [
 "syn 2.0.117",
 ]
 [[package]]
 name = "multimap"
 version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
 [[package]]
 name = "new_debug_unreachable"
 version = "1.0.6"
@ -2406,16 +2233,6 @@ dependencies = [
 "sha2",
 ]
 [[package]]
 name = "petgraph"
 version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
 dependencies = [
 "fixedbitset 0.5.7",
 "indexmap 2.14.0",
 ]
 [[package]]
 name = "phf"
 version = "0.11.3"
@ -2468,26 +2285,6 @@ dependencies = [
 "siphasher",
 ]
 [[package]]
 name = "pin-project"
 version = "1.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517"
 dependencies = [
 "pin-project-internal",
 ]
 [[package]]
 name = "pin-project-internal"
 version = "1.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn 2.0.117",
 ]
 [[package]]
 name = "pin-project-lite"
 version = "0.2.17"
@ -2507,7 +2304,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07"
 dependencies = [
 "base64 0.22.1",
- "indexmap 2.14.0",
+ "indexmap",
 "quick-xml",
 "serde",
 "time",
@ -2581,122 +2378,6 @@ dependencies = [
 "yansi",
 ]
 [[package]]
 name = "prost"
 version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
 dependencies = [
 "bytes",
 "prost-derive",
 ]
 [[package]]
 name = "prost-build"
 version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
 dependencies = [
 "heck",
 "itertools",
 "log",
 "multimap",
 "once_cell",
 "petgraph",
 "prettyplease",
 "prost",
 "prost-types",
 "regex",
 "syn 2.0.117",
 "tempfile",
 ]
 [[package]]
 name = "prost-derive"
 version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
 dependencies = [
 "anyhow",
 "itertools",
 "proc-macro2",
 "quote",
 "syn 2.0.117",
 ]
 [[package]]
 name = "prost-types"
 version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
 dependencies = [
 "prost",
 ]
 [[package]]
 name = "protoc-bin-vendored"
 version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d1c381df33c98266b5f08186583660090a4ffa0889e76c7e9a5e175f645a67fa"
 dependencies = [
 "protoc-bin-vendored-linux-aarch_64",
 "protoc-bin-vendored-linux-ppcle_64",
 "protoc-bin-vendored-linux-s390_64",
 "protoc-bin-vendored-linux-x86_32",
 "protoc-bin-vendored-linux-x86_64",
 "protoc-bin-vendored-macos-aarch_64",
 "protoc-bin-vendored-macos-x86_64",
 "protoc-bin-vendored-win32",
 ]
 [[package]]
 name = "protoc-bin-vendored-linux-aarch_64"
 version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c350df4d49b5b9e3ca79f7e646fde2377b199e13cfa87320308397e1f37e1a4c"
 [[package]]
 name = "protoc-bin-vendored-linux-ppcle_64"
 version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a55a63e6c7244f19b5c6393f025017eb5d793fd5467823a099740a7a4222440c"
 [[package]]
 name = "protoc-bin-vendored-linux-s390_64"
 version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1dba5565db4288e935d5330a07c264a4ee8e4a5b4a4e6f4e83fad824cc32f3b0"
 [[package]]
 name = "protoc-bin-vendored-linux-x86_32"
 version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8854774b24ee28b7868cd71dccaae8e02a2365e67a4a87a6cd11ee6cdbdf9cf5"
 [[package]]
 name = "protoc-bin-vendored-linux-x86_64"
 version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b38b07546580df720fa464ce124c4b03630a6fb83e05c336fea2a241df7e5d78"
 [[package]]
 name = "protoc-bin-vendored-macos-aarch_64"
 version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "89278a9926ce312e51f1d999fee8825d324d603213344a9a706daa009f1d8092"
 [[package]]
 name = "protoc-bin-vendored-macos-x86_64"
 version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "81745feda7ccfb9471d7a4de888f0652e806d5795b61480605d4943176299756"
 [[package]]
 name = "protoc-bin-vendored-win32"
 version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "95067976aca6421a523e491fce939a3e65249bac4b977adee0ee9771568e8aa3"
 [[package]]
 name = "pulldown-cmark"
 version = "0.13.3"
@ -2752,8 +2433,6 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
 dependencies = [
 "libc",
 "rand_chacha 0.3.1",
 "rand_core 0.6.4",
 ]
@ -2763,20 +2442,10 @@ version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
 dependencies = [
- "rand_chacha 0.9.0",
+ "rand_chacha",
 "rand_core 0.9.5",
 ]
 [[package]]
 name = "rand_chacha"
 version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
 dependencies = [
 "ppv-lite86",
 "rand_core 0.6.4",
 ]
 [[package]]
 name = "rand_chacha"
 version = "0.9.0"
@ -2792,9 +2461,6 @@ name = "rand_core"
 version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
 "getrandom 0.2.17",
 ]
 [[package]]
 name = "rand_core"
@ -3043,15 +2709,6 @@ dependencies = [
 "security-framework",
 ]
 [[package]]
 name = "rustls-pemfile"
 version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
 dependencies = [
 "rustls-pki-types",
 ]
 [[package]]
 name = "rustls-pki-types"
 version = "1.14.0"
@ -3174,7 +2831,7 @@ version = "1.0.149"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
 dependencies = [
- "indexmap 2.14.0",
+ "indexmap",
 "itoa",
 "memchr",
 "serde",
@ -3278,16 +2935,6 @@ version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"
 [[package]]
 name = "socket2"
 version = "0.5.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
 dependencies = [
 "libc",
 "windows-sys 0.52.0",
 ]
 [[package]]
 name = "socket2"
 version = "0.6.3"
@ -3402,12 +3049,6 @@ dependencies = [
 "unicode-ident",
 ]
 [[package]]
 name = "sync_wrapper"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
 [[package]]
 name = "syntect"
 version = "5.3.0"
@ -3486,7 +3127,7 @@ dependencies = [
 "fancy-regex",
 "filedescriptor",
 "finl_unicode",
- "fixedbitset 0.4.2",
+ "fixedbitset",
 "hex",
 "lazy_static",
 "libc",
@ -3646,7 +3287,7 @@ dependencies = [
 "parking_lot",
 "pin-project-lite",
 "signal-hook-registry",
- "socket2 0.6.3",
+ "socket2",
 "tokio-macros",
 "windows-sys 0.61.2",
 ]
@ -3672,17 +3313,6 @@ dependencies = [
 "tokio",
 ]
 [[package]]
 name = "tokio-stream"
 version = "0.1.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
 dependencies = [
 "futures-core",
 "pin-project-lite",
 "tokio",
 ]
 [[package]]
 name = "tokio-util"
 version = "0.7.18"
@ -3697,130 +3327,6 @@ dependencies = [
 "tokio",
 ]
 [[package]]
 name = "tonic"
 version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
 dependencies = [
 "async-stream",
 "async-trait",
 "axum",
 "base64 0.22.1",
 "bytes",
 "h2",
 "http",
 "http-body",
 "http-body-util",
 "hyper",
 "hyper-timeout",
 "hyper-util",
 "percent-encoding",
 "pin-project",
 "prost",
 "rustls-native-certs",
 "rustls-pemfile",
 "socket2 0.5.10",
 "tokio",
 "tokio-rustls",
 "tokio-stream",
 "tower 0.4.13",
 "tower-layer",
 "tower-service",
 "tracing",
 ]
 [[package]]
 name = "tonic-build"
 version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11"
 dependencies = [
 "prettyplease",
 "proc-macro2",
 "prost-build",
 "prost-types",
 "quote",
 "syn 2.0.117",
 ]
 [[package]]
 name = "tower"
 version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
 dependencies = [
 "futures-core",
 "futures-util",
 "indexmap 1.9.3",
 "pin-project",
 "pin-project-lite",
 "rand 0.8.5",
 "slab",
 "tokio",
 "tokio-util",
 "tower-layer",
 "tower-service",
 "tracing",
 ]
 [[package]]
 name = "tower"
 version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
 dependencies = [
 "futures-core",
 "futures-util",
 "pin-project-lite",
 "sync_wrapper",
 "tower-layer",
 "tower-service",
 ]
 [[package]]
 name = "tower-layer"
 version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
 [[package]]
 name = "tower-service"
 version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
 [[package]]
 name = "tracing"
 version = "0.1.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
 dependencies = [
 "pin-project-lite",
 "tracing-attributes",
 "tracing-core",
 ]
 [[package]]
 name = "tracing-attributes"
 version = "0.1.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn 2.0.117",
 ]
 [[package]]
 name = "tracing-core"
 version = "0.1.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
 dependencies = [
 "once_cell",
 ]
 [[package]]
 name = "tree-sitter"
 version = "0.26.8"
@ -4379,7 +3885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
 dependencies = [
 "anyhow",
- "indexmap 2.14.0",
+ "indexmap",
 "wasm-encoder",
 "wasmparser",
 ]
@ -4392,7 +3898,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
 dependencies = [
 "bitflags 2.11.0",
 "hashbrown 0.15.5",
- "indexmap 2.14.0",
+ "indexmap",
 "semver",
 ]
@ -4761,7 +4267,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
 dependencies = [
 "anyhow",
 "heck",
- "indexmap 2.14.0",
+ "indexmap",
 "prettyplease",
 "syn 2.0.117",
 "wasm-metadata",
@ -4792,7 +4298,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
 dependencies = [
 "anyhow",
 "bitflags 2.11.0",
- "indexmap 2.14.0",
+ "indexmap",
 "log",
 "serde",
 "serde_derive",
@ -4811,7 +4317,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
 dependencies = [
 "anyhow",
 "id-arena",
- "indexmap 2.14.0",
+ "indexmap",
 "log",
 "semver",
 "serde",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -18,9 +18,6 @@ name = "consciousness"
 version.workspace = true
 edition.workspace = true
 [features]
 nightly-diagnostics = []
 [dependencies]
 anyhow = "1"
 html2md = "0.2"
@ -64,11 +61,6 @@ futures = "0.3"
 capnp = "0.25"
 capnp-rpc = "0.25"
 tonic = { version = "0.12", features = ["tls", "tls-roots"] }
 prost = "0.13"
 async-stream = "0.3"
 tokio-stream = "0.1"
 tokenizers = "0.22"
 http = "1"
@ -82,13 +74,10 @@ imagesize = "0.14"
 rustls = "0.23"
 tokio-rustls = "0.26"
 rustls-native-certs = "0.8"
 rustls-pemfile = "2"
 serde_urlencoded = "0.7"
 [build-dependencies]
 capnpc = "0.25"
 tonic-build = { version = "0.12", default-features = false, features = ["prost", "transport"] }
 protoc-bin-vendored = "3"
 [lib]
 name = "consciousness"
--- a/build.rs
+++ b/build.rs
@ -13,21 +13,4 @@ fn main() {
        .file("schema/channel.capnp")
        .run()
        .expect("capnp compile failed (channel.capnp)");
    // Generate salience.v1 gRPC client + message types from proto.
    // Server side (python) is generated separately via grpcio-tools.
    // Use vendored protoc so we don't require a system install.
    let protoc = protoc_bin_vendored::protoc_bin_path()
        .expect("vendored protoc not available for this platform");
    // SAFETY: build script is single-threaded at this point; setting env
    // before invoking tonic_build is the documented way to point it at a
    // non-PATH protoc.
    unsafe { std::env::set_var("PROTOC", protoc); }
    tonic_build::configure()
        .build_server(false)
        .build_client(true)
        .compile_protos(&["proto/salience.proto"], &["proto"])
        .expect("tonic_build compile failed (salience.proto)");
    println!("cargo:rerun-if-changed=proto/salience.proto");
 }
--- a/channels/telegram/src/main.rs
+++ b/channels/telegram/src/main.rs
@ -181,8 +181,6 @@ struct TelegramMessage {
    chat_id: i64,
    sender: String,
    text: String,
    /// Absolute path to a downloaded media file (photo, etc.), if any.
    media_path: Option<String>,
 }
 /// Fetch and parse pending updates from Telegram via long polling.
@ -208,115 +206,19 @@ async fn get_updates(
            let sender = msg["from"]["first_name"].as_str().unwrap_or("unknown").to_string();
            let chat_id = msg["chat"]["id"].as_i64().unwrap_or(0);
-            // Photo: array of PhotoSize, largest is last. Download largest,
+            if let Some(text) = msg["text"].as_str() {
-            // surface message with [image: <path>] marker so the multimodal
+                messages.push(TelegramMessage {
-            // model can Read the image.
+                    update_id,
-            let (text, media_path) = if let Some(sizes) = msg["photo"].as_array() {
+                    chat_id,
-                let caption = msg["caption"].as_str().unwrap_or("").to_string();
+                    sender,
-                let largest = sizes.last();
+                    text: text.to_string(),
-                let file_id = largest
+                });
-                    .and_then(|s| s["file_id"].as_str())
+            }
                    .unwrap_or("");
                if file_id.is_empty() {
                    error!("telegram photo: missing file_id in update {update_id}");
                    (caption, None)
                } else {
                    // Bound the download — HttpClient::request_timeout only covers
                    // send_request, not body collect, so an indefinitely-slow body
                    // would otherwise stall every subsequent poll.
                    let dl = tokio::time::timeout(
                        std::time::Duration::from_secs(60),
                        download_telegram_file(client, token, file_id),
                    ).await
                        .unwrap_or_else(|_| Err("download timed out after 60s".into()));
                    match dl {
                        Ok(path) => (caption, Some(path)),
                        Err(e) => {
                            error!("telegram photo download failed (file_id={file_id}): {e}");
                            // Surface what we have: caption plus a marker that
                            // a photo was sent but couldn't be fetched.
                            let marker = format!("[image: download failed: {e}]");
                            let combined = if caption.is_empty() {
                                marker
                            } else {
                                format!("{marker}\n{caption}")
                            };
                            (combined, None)
                        }
                    }
                }
            } else if let Some(text) = msg["text"].as_str() {
                (text.to_string(), None)
            } else {
                // Other media types (voice, video, sticker, etc.) — skip for now,
                // but log so we can extend later.
                let kind = ["voice", "video", "sticker", "document", "audio", "animation"]
                    .iter()
                    .find(|k| !msg[**k].is_null())
                    .copied()
                    .unwrap_or("unknown");
                info!("telegram: skipping non-text/photo message (kind={kind}, update_id={update_id})");
                continue;
            };
            messages.push(TelegramMessage {
                update_id,
                chat_id,
                sender,
                text,
                media_path,
            });
        }
    }
    Ok(messages)
 }
 /// Resolve a Telegram file_id to a downloadable URL path via getFile.
 async fn get_file_path(
    client: &HttpClient,
    token: &str,
    file_id: &str,
 ) -> Result<String, Box<dyn std::error::Error>> {
    let url = format!(
        "https://api.telegram.org/bot{}/getFile?file_id={}",
        token, file_id,
    );
    let response = client.get(&url).await?;
    let body = response.text().await?;
    let resp: serde_json::Value = serde_json::from_str(&body)
        .map_err(|e| format!("getFile JSON parse error: {e}"))?;
    if !resp["ok"].as_bool().unwrap_or(false) {
        return Err(format!("getFile failed: {}", resp["description"].as_str().unwrap_or("?")).into());
    }
    let file_path = resp["result"]["file_path"].as_str()
        .ok_or("getFile: missing result.file_path")?;
    Ok(file_path.to_string())
 }
 /// Download a Telegram file by file_id into the channel media dir.
 /// Returns the absolute local path on success.
 async fn download_telegram_file(
    client: &HttpClient,
    token: &str,
    file_id: &str,
 ) -> Result<String, Box<dyn std::error::Error>> {
    let file_path = get_file_path(client, token, file_id).await?;
    let url = format!("https://api.telegram.org/file/bot{}/{}", token, file_path);
    let response = client.get(&url).await?;
    let status = response.status();
    if !status.is_success() {
        return Err(format!("file download failed: {status}").into());
    }
    let bytes = response.bytes().await?;
    let ext = file_path.rsplit('.').next().filter(|e| !e.contains('/')).unwrap_or("dat");
    let media_dir = log_dir().join("media");
    std::fs::create_dir_all(&media_dir)?;
    let dest = media_dir.join(format!("{file_id}.{ext}"));
    std::fs::write(&dest, &bytes)?;
    Ok(dest.to_string_lossy().to_string())
 }
 /// Send a text message to a Telegram chat.
 async fn send_message(
    client: &HttpClient,
@ -467,19 +369,11 @@ async fn poll_once(
        let sender_lower = msg.sender.to_lowercase();
        let channel = format!("telegram.{}", sender_lower);
-        // If the message has media, prepend an [image: <abs_path>] marker
+        channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &msg.text);
        // so the multimodal model can Read the file directly.
        let body = match &msg.media_path {
            Some(path) if msg.text.is_empty() => format!("[image: {path}]"),
            Some(path) => format!("[image: {path}]\n{}", msg.text),
            None => msg.text.clone(),
        };
        channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &body);
        let mut s = state.borrow_mut();
        s.config.chat_ids.insert(sender_lower, msg.chat_id);
-        let line = format!("[{}] {}", msg.sender, body);
+        let line = format!("[{}] {}", msg.sender, msg.text);
        s.push_message(line, 2, &channel);
    }
--- a/channels/tmux/src/main.rs
+++ b/channels/tmux/src/main.rs
@ -26,12 +26,10 @@ use consciousness::thalamus::channel_log::ChannelLog;
 #[derive(Clone, serde::Serialize, serde::Deserialize)]
 struct PaneConfig {
-    /// Human-readable label: becomes the channel name "tmux.<label>",
+    /// Human-readable label, becomes the channel name "tmux.<label>"
    /// and the tmux pane title / window name the live pane id is
    /// resolved from. The pane id is deliberately not stored — it is
    /// ephemeral (recycled across pane and tmux-server restarts), so it
    /// is looked up fresh on every connect attempt.
    label: String,
    /// Tmux pane ID, e.g. "%5"
    pane_id: String,
 }
 #[derive(Clone, serde::Serialize, serde::Deserialize)]
@ -88,9 +86,11 @@ impl State {
        }
    }
-    /// Whether a pane with this label is registered.
+    /// Get pane_id for a label
-    fn has_pane(&self, label: &str) -> bool {
+    fn get_pane(&self, label: &str) -> Option<&str> {
-        self.config.panes.iter().any(|p| p.label == label)
+        self.config.panes.iter()
            .find(|p| p.label == label)
            .map(|p| p.pane_id.as_str())
    }
    /// Check if a pane is connected
@ -103,124 +103,98 @@ impl State {
        self.connected.insert(label.to_string(), connected);
    }
-    /// Register a pane and persist.
+    /// Add a pane and persist
-    fn add_pane(&mut self, label: String) {
+    fn add_pane(&mut self, label: String, pane_id: String) {
        if !self.config.panes.iter().any(|p| p.label == label) {
-            self.config.panes.push(PaneConfig { label });
+            self.config.panes.push(PaneConfig { label, pane_id });
            save_config(&self.config);
        }
    }
-    /// Unregister a pane and persist. Returns whether it was registered.
+    /// Remove a pane and persist
-    fn remove_pane(&mut self, label: &str) -> bool {
+    fn remove_pane(&mut self, label: &str) -> Option<String> {
        if let Some(idx) = self.config.panes.iter().position(|p| p.label == label) {
-            self.config.panes.remove(idx);
+            let pane = self.config.panes.remove(idx);
            self.connected.remove(label);
            save_config(&self.config);
-            true
+            Some(pane.pane_id)
        } else {
-            false
+            None
        }
    }
 }
 // ── Pipe-Pane Reader ──────────────────────────────────────────
-/// Wait between connect attempts for a pane that is not yet reachable.
+/// Set up pipe-pane for a single pane, reading output into the channel log.
-const RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_secs(2);
+async fn pipe_pane_reader(state: SharedState, pane: PaneConfig) {
 /// Keep a pane streamed into its channel log for as long as it stays
 /// registered. The pane id is resolved fresh by label on every connect
 /// attempt — tmux pane ids are ephemeral, so the label (pane title /
 /// window name) is the durable identity. Retries until the pane exists
 /// and pipe-pane succeeds, and reconnects the same way if the pipe
 /// later drops. Returns once close() unregisters the pane.
 async fn pipe_pane_reader(state: SharedState, label: String) {
    let pipe_dir = dirs::home_dir()
        .unwrap_or_default()
        .join(".consciousness/channels/tmux-pipes");
    std::fs::create_dir_all(&pipe_dir).ok();
    let pipe_path = pipe_dir.join(format!("{}.pipe", label));
    let channel_key = format!("tmux.{}", label);
-    loop {
+    let pipe_path = pipe_dir.join(format!("{}.pipe", pane.label));
-        if !state.borrow().has_pane(&label) {
+    let _ = std::fs::remove_file(&pipe_path);
            return;
        }
-        connect_and_stream(&state, &label, &pipe_path, &channel_key).await;
+    // Create a named pipe (FIFO)
        state.borrow_mut().set_connected(&label, false);
        if !state.borrow().has_pane(&label) {
            return;
        }
        tokio::time::sleep(RETRY_INTERVAL).await;
    }
 }
 /// One connect attempt: resolve the pane's live id by label, point its
 /// output at the FIFO with pipe-pane, and stream lines into the channel
 /// log. Returns on the first failure, or when the stream ends.
 async fn connect_and_stream(
    state: &SharedState,
    label: &str,
    pipe_path: &std::path::Path,
    channel_key: &str,
 ) {
    let pane_id = match find_pane_by_name(label) {
        Some(id) => id,
        None => return,
    };
    // Fresh FIFO for this attempt.
    let _ = std::fs::remove_file(pipe_path);
    unsafe {
        let c_path = std::ffi::CString::new(pipe_path.to_str().unwrap()).unwrap();
        libc::mkfifo(c_path.as_ptr(), 0o644);
    }
-    // Point the pane's output at our FIFO.
+    // Tell tmux to pipe this pane's output to our FIFO
-    let pipe_cmd = format!("cat >> {}", pipe_path.to_string_lossy());
+    let pipe_path_str = pipe_path.to_string_lossy().to_string();
-    match std::process::Command::new("tmux")
+    let result = std::process::Command::new("tmux")
-        .args(["pipe-pane", "-t", &pane_id, &pipe_cmd])
+        .args(["pipe-pane", "-t", &pane.pane_id, &format!("cat >> {}", pipe_path_str)])
-        .output()
+        .output();
-    {
+
-        Ok(o) if o.status.success() => {}
+    match result {
-        Ok(o) => {
+        Ok(output) if output.status.success() => {
-            warn!("pipe-pane failed for {} ({}): {}", label, pane_id,
+            info!("pipe-pane set up for {} ({})", pane.label, pane.pane_id);
-                  String::from_utf8_lossy(&o.stderr));
+        }
        Ok(output) => {
            error!("pipe-pane failed for {}: {}", pane.label,
                   String::from_utf8_lossy(&output.stderr));
            state.borrow_mut().set_connected(&pane.label, false);
            return;
        }
        Err(e) => {
-            error!("running tmux pipe-pane for {}: {}", label, e);
+            error!("failed to run tmux pipe-pane for {}: {}", pane.label, e);
            state.borrow_mut().set_connected(&pane.label, false);
            return;
        }
    }
-    let file = match tokio::fs::File::open(pipe_path).await {
+    // Open the FIFO and read lines
    let file = match tokio::fs::File::open(&pipe_path).await {
        Ok(f) => f,
        Err(e) => {
-            warn!("opening pipe for {}: {}", label, e);
+            error!("failed to open pipe for {}: {}", pane.label, e);
            state.borrow_mut().set_connected(&pane.label, false);
            return;
        }
    };
-    info!("connected channel tmux.{} (pane {})", label, pane_id);
+    // Mark as connected once pipe is open
-    state.borrow_mut().set_connected(label, true);
+    state.borrow_mut().set_connected(&pane.label, true);
    let reader = tokio::io::BufReader::new(file);
    let mut lines = reader.lines();
    let channel_key = format!("tmux.{}", pane.label);
    let mut lines = tokio::io::BufReader::new(file).lines();
    while let Ok(Some(line)) = lines.next_line().await {
        if line.trim().is_empty() {
            continue;
        }
        let mut s = state.borrow_mut();
-        s.channel_logs
+        let log = s.channel_logs
-            .entry(channel_key.to_string())
+            .entry(channel_key.clone())
-            .or_insert_with(ChannelLog::new)
+            .or_insert_with(ChannelLog::new);
-            .push(line);
+        log.push(line);
    }
-    warn!("pipe-pane stream ended for {}", label);
+    warn!("pipe-pane reader ended for {}", pane.label);
    state.borrow_mut().set_connected(&pane.label, false);
 }
 // ── ChannelServer Implementation ───────────────────────────────
@ -270,10 +244,10 @@ impl channel_server::Server for ChannelServerImpl {
        let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
        let message = pry!(pry!(params.get_message()).to_str()).to_string();
-        // Send to tmux pane via send-keys — resolve the live pane id by
+        // Send to tmux pane via send-keys
        // label (it is not stored).
        let label = channel.strip_prefix("tmux.").unwrap_or(&channel);
-        if let Some(pane_id) = find_pane_by_name(label) {
+        let pane_id = self.state.borrow().get_pane(label).map(String::from);
        if let Some(pane_id) = pane_id {
            let _ = std::process::Command::new("tmux")
                .args(["send-keys", "-t", &pane_id, &message, "Enter"])
                .output();
@ -328,22 +302,28 @@ impl channel_server::Server for ChannelServerImpl {
        let params = pry!(params.get());
        let label = pry!(pry!(params.get_label()).to_str()).to_string();
-        // Already registered — nothing to do.
+        // Check if already open
-        if self.state.borrow().has_pane(&label) {
+        if self.state.borrow().get_pane(&label).is_some() {
            return std::future::ready(Ok(()));
        }
-        info!("opening channel tmux.{}", label);
+        // Find the tmux pane by name (window or pane title)
        let pane_id = match find_pane_by_name(&label) {
            Some(id) => id,
            None => return std::future::ready(Err(capnp::Error::failed(
                format!("no tmux pane named '{}'", label)))),
        };
-        // Register the label and persist. The pane id is not stored —
+        info!("opening channel tmux.{} (pane {})", label, pane_id);
        // the reader resolves it by label on every connect attempt, so
        // this succeeds even if the pane does not exist yet; the reader
        // connects once it appears.
        self.state.borrow_mut().add_pane(label.clone());
        // Register in state and persist
        self.state.borrow_mut().add_pane(label.clone(), pane_id.clone());
        // Start pipe-pane reader
        let pane = PaneConfig { label, pane_id };
        let reader_state = self.state.clone();
        tokio::task::spawn_local(async move {
-            pipe_pane_reader(reader_state, label).await;
+            pipe_pane_reader(reader_state, pane).await;
        });
        std::future::ready(Ok(()))
@ -359,18 +339,14 @@ impl channel_server::Server for ChannelServerImpl {
        let label = channel.strip_prefix("tmux.").unwrap_or(&channel).to_string();
        let mut s = self.state.borrow_mut();
-        if s.remove_pane(&label) {
+        if let Some(pane_id) = s.remove_pane(&label) {
            info!("closing channel tmux.{}", label);
            s.channel_logs.remove(&format!("tmux.{}", label));
-            // Stop piping if the pane is still around (if it is gone the
+            // Disconnect pipe-pane
-            // pipe is already dead). The reader then sees the pane
+            let _ = std::process::Command::new("tmux")
-            // unregistered and exits.
+                .args(["pipe-pane", "-t", &pane_id])
-            if let Some(pane_id) = find_pane_by_name(&label) {
+                .output();
                let _ = std::process::Command::new("tmux")
                    .args(["pipe-pane", "-t", &pane_id])
                    .output();
            }
        }
        std::future::ready(Ok(()))
@ -421,13 +397,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    tokio::task::LocalSet::new()
        .run_until(async move {
-            // Start a pipe-pane reader for each configured pane; each
+            // Start a pipe-pane reader for each configured pane
            // resolves its live pane id by label and retries until
            // connected.
            for pane in state.borrow().config.panes.clone() {
                let reader_state = state.clone();
                tokio::task::spawn_local(async move {
-                    pipe_pane_reader(reader_state, pane.label).await;
+                    pipe_pane_reader(reader_state, pane).await;
                });
            }
--- a/flake.lock
+++ b/flake.lock
@ -1,27 +0,0 @@
 {
  "nodes": {
    "nixpkgs": {
      "locked": {
        "lastModified": 1781074563,
        "narHash": "sha256-md8WlXOlfnIeHeOScMTTHFyf2d6iaTwPl2apR5EQ3P4=",
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "9ae611a455b90cf061d8f332b977e387bda8e1ca",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
        "ref": "nixos-unstable",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "nixpkgs": "nixpkgs"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/flake.nix
+++ b/flake.nix
@ -1,42 +0,0 @@
 {
  description = "Development shell for consciousness";
  inputs = {
    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
  };
  outputs = { nixpkgs, ... }:
    let
      systems = [
        "x86_64-linux"
        "aarch64-linux"
      ];
      forAllSystems = nixpkgs.lib.genAttrs systems;
    in
    {
      devShells = forAllSystems (system:
        let
          pkgs = import nixpkgs { inherit system; };
        in
        {
          default = pkgs.mkShell {
            packages = with pkgs; [
              cargo
              rustc
              rustfmt
              clippy
              rust-analyzer
              capnproto
              pkg-config
              jq
              sqlite
              python3
            ];
            RUST_BACKTRACE = "1";
          };
        });
    };
 }
--- a/proto/salience.proto
+++ b/proto/salience.proto
@ -1,276 +0,0 @@
 // salience.proto — stateful generation + per-token concept readout over gRPC.
 //
 // Shape:
 //   - One server-streaming RPC (Generate) for inference. Every other
 //     operation is unary. This is the minimum streaming we need —
 //     tokens arrive one at a time with optional readouts / logprobs —
 //     and keeping everything else unary makes the client dramatically
 //     simpler than a single bidi state machine did.
 //
 //   - Server-side sessions hold the token list and image binaries.
 //     Sessions exist for bandwidth: at 200K tokens we'd otherwise
 //     re-ship ~800KB every turn, which hurts badly over a WAN link.
 //     vLLM's prefix cache holds the KV; the session just gives the
 //     client a handle so it can send deltas.
 //
 //   - The client is the source of truth for prompt content. The server
 //     is the source of truth for image token expansion (how many
 //     IMAGE_PAD tokens an image becomes under this model). The client
 //     never writes vision tokens itself — AppendImage appends the whole
 //     <|vision_start|> + IMAGE_PAD×N + <|vision_end|> block server-side.
 //
 //   - Every mutation carries (offset, truncating): the client's view of
 //     the server's current length, plus whether the client is deliberately
 //     rewriting history. Server validates on each call and rejects drift.
 //     No silent divergence, no migration bugs.
 //
 //   - Errors use gRPC status codes. NOT_FOUND for missing sessions,
 //     FAILED_PRECONDITION for offset drift or image-block splits,
 //     RESOURCE_EXHAUSTED for context overflow, ABORTED for "session busy".
 //
 // Not in v1:
 //   - Authentication beyond a shared bearer token in gRPC metadata.
 //   - Multi-tenant session namespacing.
 //   - Sampling traces beyond top-k logprobs.
 syntax = "proto3";
 package salience.v1;
 // ============================================================
 //  Service
 // ============================================================
 service Salience {
  // Create a fresh session. Client uses session_id on every subsequent
  // RPC until CloseSession or TTL eviction (default 30 min idle). To
  // refresh TTL across a long pause, issue a no-op Generate (empty
  // append_tokens, max_tokens=0, no ranges).
  rpc OpenSession(OpenSessionRequest) returns (OpenSessionResponse);
  // Release the session's tokens + images. Idempotent.
  rpc CloseSession(CloseSessionRequest) returns (CloseSessionResponse);
  // Branch a session at a given token position. The new session
  // inherits tokens [0, at_position) and any images whose vision
  // block lies fully in that range. Rejected with FAILED_PRECONDITION
  // if at_position falls inside an image block (client picks a clean
  // boundary).
  rpc ForkSession(ForkSessionRequest) returns (ForkSessionResponse);
  // Prefill + optionally decode. Images are attached inline via
  // `GenerateRequest.images`; the client writes its own pre-expanded
  // <|vision_start|> + N*<|image_pad|> + <|vision_end|> runs into
  // `append_tokens` and declares each run's range in `images[i]`.
  // Server validates run length against the actual vision-encoder
  // feature count and returns INVALID_ARGUMENT on mismatch. Stream
  // yields Token events (with optional readouts / logprobs per
  // position) followed by a terminating Done.
  rpc Generate(GenerateRequest) returns (stream GenerateEvent);
  // Readout manifest for the currently-loaded model — concept names,
  // layer indices, tensor dtype. Stateless; fetch once at client
  // startup and cache.
  rpc GetReadoutManifest(GetReadoutManifestRequest) returns (ReadoutManifest);
  // Dump the full token stream of a session. Debug-only: used by the
  // client to verify its local accounting against the server's
  // session.tokens byte-for-byte when divergence is suspected. Not
  // cheap — copies the whole sequence across the wire.
  rpc DumpSession(DumpSessionRequest) returns (DumpSessionResponse);
 }
 // ============================================================
 //  Lifecycle
 // ============================================================
 message OpenSessionRequest {
  // Model identifier, must match vLLM's served model. The server
  // only has one model loaded; this is a safety check on what the
  // client thinks it's talking to.
  string model = 1;
 }
 message OpenSessionResponse {
  string session_id = 1;
  uint32 max_model_len = 2;
 }
 message CloseSessionRequest {
  string session_id = 1;
 }
 message CloseSessionResponse {}
 message ForkSessionRequest {
  string session_id = 1;    // source session
  uint32 at_position = 2;   // new session inherits tokens [0, at_position)
 }
 message ForkSessionResponse {
  string session_id = 1;    // new session
 }
 // ============================================================
 //  Inference
 // ============================================================
 // One image attached to a Generate call. The client is responsible
 // for writing the expanded placeholder run (VISION_START +
 // N*IMAGE_PAD + VISION_END) into `GenerateRequest.append_tokens` at
 // positions [pad_range_start, pad_range_end) and pairing it with
 // the corresponding `ImageAttachment` entry. Server validates that
 // the declared range's pad count matches what the vision encoder
 // produces, and returns INVALID_ARGUMENT if they disagree.
 message ImageAttachment {
  // Image bytes (PNG / JPEG / WebP / …).
  bytes  bytes = 1;
  // MIME type, e.g. "image/png".
  string mime = 2;
  // Absolute token positions (in `session.tokens` AFTER `append_tokens`
  // is applied) spanning the full vision block —  `[vision_start,
  // pad*N, vision_end]`. end is exclusive, so end - start == N + 2.
  uint32 pad_range_start = 3;
  uint32 pad_range_end = 4;
 }
 message GenerateRequest {
  string session_id = 1;
  // Tokens to append before prefill. May be empty. Client writes the
  // full vision block (VISION_START + N*IMAGE_PAD + VISION_END) for
  // any newly-attached image directly into this stream; each such
  // block must be paired with a matching entry in `images`. The
  // server validates that the declared ranges all point at IMAGE_PAD
  // runs and that each run's length matches what the vision encoder
  // produces for the corresponding image.
  repeated uint32 append_tokens = 2;
  // Client's view of session.tokens length at the time of the call.
  // Must equal server's actual length, OR be strictly less when
  // truncating=true (server rewinds before appending). Any other
  // mismatch is FAILED_PRECONDITION.
  uint32 offset = 3;
  bool   truncating = 4;
  // Decode budget. 0 = prefill only (no decode, emit Token events
  // for positions covered by logprobs_ranges / readout_ranges, then
  // Done; replaces the old /score endpoint). >0 = decode up to this
  // many tokens, stopping early on EOS / stop_token_ids.
  uint32 max_tokens = 5;
  // Position ranges (absolute, within the session's post-append
  // token list) at which to emit logprobs on Token events. Empty =
  // no logprobs. `logprob_top_k > 0` returns the top-k alternative
  // tokens at each covered position; `logprob_top_k == 0` returns
  // only the sampled-token's logprob.
  repeated PositionRange logprobs_ranges = 6;
  uint32                 logprob_top_k = 7;
  // Position ranges at which to emit concept-readout vectors. Empty
  // = no readouts. Logical shape per position is
  // [n_layers][n_concepts] — see GetReadoutManifest.
  repeated PositionRange readout_ranges = 8;
  // Sampling parameters. Meaningful only when max_tokens > 0.
  float           temperature = 9;      // default 1.0 when zero
  float           top_p = 10;           // default 1.0 when zero
  uint32          top_k = 11;           // default 0 (disabled)
  repeated uint32 stop_token_ids = 12;
  // vLLM scheduler priority (0 = interactive, 10 = batch).
  int32 priority = 13;
  // Images newly attached on this call. Each entry describes one
  // image's binary bytes, its mime type, and the exact token-position
  // range of its pre-expanded placeholder run inside `session.tokens`
  // after `append_tokens` is applied. See `ImageAttachment`.
  repeated ImageAttachment images = 14;
 }
 message PositionRange {
  uint32 start = 1;   // inclusive
  uint32 end = 2;     // exclusive
 }
 message GenerateEvent {
  oneof event {
    Token        token = 1;
    GenerateDone done = 2;
  }
 }
 message Token {
  // Token id at this position. For prefill this is the prompt token;
  // for decode it's the sampled token.
  uint32 id = 1;
  // Absolute position in the session's token list.
  uint32 position = 2;
  // True for prefill positions, false for decode.
  bool   is_prefill = 3;
  // Concept readout at this position. Empty if the position wasn't
  // covered by readout_ranges.
  repeated float readout = 4 [packed = true];
  // Top-k alternative tokens' logprobs at this position — populated
  // when the position is covered by logprobs_ranges and
  // logprob_top_k > 0.
  repeated TokenLogprob logprobs = 5;
  // Logprob of the token at `position` (the prompt token for
  // prefill, the sampled token for decode). Populated when the
  // position is covered by logprobs_ranges.
  float sampled_logprob = 6;
  bool  has_sampled_logprob = 7;
 }
 message TokenLogprob {
  uint32 id = 1;
  float  logprob = 2;
 }
 message GenerateDone {
  uint32 prompt_tokens = 1;
  uint32 completion_tokens = 2;
  uint32 total_tokens = 3;
  enum FinishReason {
    FINISH_REASON_UNSPECIFIED = 0;
    FINISH_REASON_EOS = 1;              // emitted EOS / stop token
    FINISH_REASON_LENGTH = 2;           // hit max_tokens
    FINISH_REASON_CANCELLED = 3;        // client cancelled
    FINISH_REASON_STOP_STRING = 4;      // matched a stop string
  }
  FinishReason finish_reason = 4;
 }
 // ============================================================
 //  Readout manifest
 // ============================================================
 message GetReadoutManifestRequest {}
 message ReadoutManifest {
  repeated string concepts = 1;
  repeated uint32 layers = 2;
  uint32          hidden_size = 3;
  string          dtype = 4;
 }
 // ============================================================
 //  Debug
 // ============================================================
 message DumpSessionRequest {
  string session_id = 1;
 }
 message DumpSessionResponse {
  // The full session.tokens sequence, verbatim.
  repeated uint32 tokens = 1 [packed = true];
 }
--- a/scripts/quantize_qwen3_6_mm.py
+++ b/scripts/quantize_qwen3_6_mm.py
@ -1,327 +0,0 @@
 """Quantize Qwen3.6-27B (multimodal) to FP8 for vLLM serving.
 Why this exists
 ---------------
 The earlier `quantize_qwen3_6.py` (in shell history, never committed)
 loaded the model with `AutoModelForCausalLM`, which silently strips
 the multimodal arch. Result: an FP8 checkpoint with no vision tower
 weights at all. vLLM happily instantiated the vision tower from the
 config and ran it with default/uninitialized weights, producing
 gibberish image features and `!!!!!!`-style output. We chased that
 through the protocol layer for a long time before tracing it back
 to the quant. This script avoids that trap by loading via the
 config-declared class explicitly.
 Recipe
 ------
 FP8_DYNAMIC (per-channel weight scales, per-token dynamic activation
 scales, both E4M3) for Linear weights, with an `ignore` list derived
 from Unsloth's UD-Q8_K_XL (`unsloth/Qwen3.6-27B-GGUF`). Their
 sensitivity sweep flagged specific layers as quantization-fragile;
 we honor those layer indices even though their algorithm is
 GGUF-native Q8_K and ours is FP8 — sensitivity is a layer property,
 not an algorithm property.
 vLLM fusion constraint
 ~~~~~~~~~~~~~~~~~~~~~~
 vLLM's Qwen3.5/3.6 model code fuses sub-modules at load time:
  qkv_proj      ← q_proj, k_proj, v_proj
  gate_up_proj  ← gate_proj, up_proj
  in_proj_qkvz  ← in_proj_qkv, in_proj_z
  in_proj_ba    ← in_proj_b, in_proj_a
 compressed_tensors rejects checkpoints where sub-modules of a fused
 layer have different quantization schemes. Our ignore list is shaped
 around this — within any fused layer, all components share a scheme.
 That's the reason `in_proj_qkv` is ignored even though Unsloth's
 sweep doesn't single it out, and the reason late-stack attn override
 covers q/k/v rather than just q/k.
 MTP merge
 ---------
 `Qwen3_5ForConditionalGeneration` doesn't expose the MTP submodule,
 so `oneshot()` produces a checkpoint with the 15 `mtp.*` tensors
 silently dropped. After quantization we read the MTP weights back
 out of the upstream cached snapshot and splice them into the saved
 safetensors at BF16. They're small (~850 MB) so quantizing them
 isn't worth the calibration risk; speculative-decoding code paths
 in vLLM expect the MTP head present.
 Output
 ------
 `OUTPUT_DIR` gets the FP8 model.safetensors + config + processor +
 recipe.yaml. Vision tower stays BF16 (in `ignore`); LM Linears go
 to FP8; norms, SSM internals (not Linear), and MTP tensors stay
 BF16 untouched.
 Verification at end: re-opens the saved safetensors and asserts
 - vision .weight tensors present (>= 150; full count is 167)
 - lm_head + embed_tokens at fp16/bf16 (NOT FP8)
 - a sampled FP8'd Linear actually has float8 dtype
 - 15 mtp.* tensors present
 Run
 ---
    ~/vllm-venv/bin/python quantize_qwen3_6_mm.py
 """
 from __future__ import annotations
 import glob
 import json
 import sys
 from pathlib import Path
 import torch
 from huggingface_hub import snapshot_download
 from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import QuantizationModifier
 from safetensors import safe_open
 from safetensors.torch import save_file
 from transformers import AutoProcessor
 from transformers.models.qwen3_5.modeling_qwen3_5 import (
    Qwen3_5ForConditionalGeneration,
 )
 MODEL = "Qwen/Qwen3.6-27B"
 OUTPUT_DIR = "/home/ubuntu/amygdala-training/Qwen3.6-27B-FP8-mm"
 # Layers Unsloth's UD-Q8_K_XL keeps at F16 (perplexity-sensitive
 # in their sweep). Late-stack clustering is consistent with the
 # general finding that errors near the output propagate directly
 # to logits.
 LATE_FFN_LAYERS = (50, 51, 59, 62, 63)
 LATE_ATTN_LAYERS = (51, 59, 63)
 # Build the ignore regex list. Note: llmcompressor matches these
 # patterns against MODULE names (no `.weight` suffix) when walking
 # `named_modules()` for `targets=["Linear"]`. The first pass of
 # this script used `\.weight$` patterns and silently quantized
 # lm_head + every linear_attn projection — verified post-hoc by
 # inspecting the saved safetensors. Patterns now anchor on `$`
 # at the module name.
 IGNORE_PATTERNS: list[str] = [
    # Original recipe: lm_head and embeddings always full-precision.
    # (embed_tokens is an Embedding, not a Linear, so it's already
    # ignored by `targets=["Linear"]`. Pattern kept as belt-and-
    # suspenders in case future llmcompressor versions widen the
    # target set.)
    "re:lm_head$",
    "re:.*embed_tokens$",
    # Vision tower — entire `model.visual.*` subtree (vision
    # transformer blocks + merger + patch_embed + pos_embed).
    # Unsloth ships the vision tower as a separate `mmproj-BF16.gguf`
    # for GGUF consumers; in our single-file FP8 setup we just leave
    # them at BF16.
    "re:model\\.visual\\..*",
    # MTP (multi-token prediction) module — Unsloth's GGUF doesn't
    # carry MTP weights so we have no precision signal from them;
    # safest to keep BF16.
    "re:mtp\\..*",
    # Linear-attention block — keep ENTIRELY at BF16. vLLM fuses
    # `in_proj_qkv` and `in_proj_z` into a single `in_proj_qkvz`
    # layer, and compressed_tensors rejects mixed schemes within a
    # fused layer. Unsloth's recipe keeps z, a, b, out at F16/F32
    # (gate/SSM internals are quantization-fragile in the GatedDeltaNet
    # update), so the principled choice is to also keep `in_proj_qkv`
    # at BF16 rather than FP8'ing the gate to match. We give up ~1 GB
    # of FP8 coverage; in exchange we follow Unsloth's quality intent
    # and load cleanly under vLLM. (`in_proj_a` + `in_proj_b` are
    # likewise fused as `in_proj_ba` — both ignored, consistent.)
    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_qkv$",
    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_z$",
    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_a$",
    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_b$",
    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.out_proj$",
    # Per-layer high-precision MLP (Unsloth flagged exactly these
    # late-stack indices in their UD-Q8_K_XL sensitivity sweep, all
    # three of {gate, up, down} per layer). vLLM fuses gate+up into
    # `gate_up_proj`; ignoring both keeps the fused layer consistent.
    # `down_proj` is its own (non-fused) layer.
    "re:model\\.language_model\\.layers\\.("
    + "|".join(str(n) for n in LATE_FFN_LAYERS)
    + ")\\.mlp\\.(down|gate|up)_proj$",
    # Per-layer high-precision attention q/k/v (Unsloth's sweep upgrades
    # only q and k; we extend to v because vLLM fuses q/k/v into
    # `qkv_proj` and rejects mixed schemes. `o_proj` is its own
    # non-fused layer and stays at FP8.
    "re:model\\.language_model\\.layers\\.("
    + "|".join(str(n) for n in LATE_ATTN_LAYERS)
    + ")\\.self_attn\\.(q|k|v)_proj$",
 ]
 def main() -> None:
    print(f"Loading {MODEL} as multimodal "
          f"(Qwen3_5ForConditionalGeneration)...", flush=True)
    model = Qwen3_5ForConditionalGeneration.from_pretrained(
        MODEL,
        dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
    )
    print(f"  loaded: {model.__class__.__name__}", flush=True)
    print(f"Loading processor (text + image preprocessing)...", flush=True)
    processor = AutoProcessor.from_pretrained(MODEL, trust_remote_code=True)
    print("Running FP8_DYNAMIC oneshot quantization...", flush=True)
    print(f"  ignore list: {len(IGNORE_PATTERNS)} patterns",
          flush=True)
    recipe = QuantizationModifier(
        targets=["Linear"],
        scheme="FP8_DYNAMIC",
        ignore=IGNORE_PATTERNS,
    )
    oneshot(model=model, recipe=recipe, output_dir=OUTPUT_DIR)
    processor.save_pretrained(OUTPUT_DIR)
    print(f"  wrote model + processor to {OUTPUT_DIR}", flush=True)
    merge_mtp(OUTPUT_DIR)
    verify_output(OUTPUT_DIR)
 def merge_mtp(out_dir: str) -> None:
    """Splice upstream MTP tensors into the saved FP8 safetensors.
    `Qwen3_5ForConditionalGeneration` skips the MTP submodule on load,
    so oneshot's output is missing the 15 `mtp.*` tensors. We resolve
    the upstream snapshot via the HF cache (already populated by
    from_pretrained), pull just the MTP tensors out at BF16, and
    rewrite the safetensors with them merged in. The compressed_tensors
    metadata header (which carries the FP8 format identifier vLLM
    needs to dequantize) is preserved verbatim.
    Atomic-rename is used so a crash mid-write doesn't corrupt the
    33+ GB checkpoint we just spent minutes producing.
    """
    print("\nMerging upstream MTP tensors...", flush=True)
    upstream_dir = Path(snapshot_download(
        MODEL,
        allow_patterns=["model.safetensors.index.json",
                        "model-*-of-*.safetensors"],
    ))
    with open(upstream_dir / "model.safetensors.index.json") as f:
        idx = json.load(f)
    mtp_shards = sorted({v for k, v in idx["weight_map"].items()
                         if k.startswith("mtp.")})
    print(f"  MTP tensors live in shards: {mtp_shards}", flush=True)
    mtp_tensors: dict[str, torch.Tensor] = {}
    for shard in mtp_shards:
        with safe_open(upstream_dir / shard, framework="pt") as f:
            for k in f.keys():
                if k.startswith("mtp."):
                    mtp_tensors[k] = f.get_tensor(k).contiguous()
    mtp_bytes = sum(t.numel() * t.element_size()
                    for t in mtp_tensors.values())
    print(f"  loaded {len(mtp_tensors)} mtp tensors "
          f"({mtp_bytes/1e6:.1f} MB)", flush=True)
    fp8_files = sorted(Path(out_dir).glob("*.safetensors"))
    if len(fp8_files) != 1:
        sys.exit(f"FAIL: expected single safetensors shard, "
                 f"got {fp8_files}")
    existing_path = fp8_files[0]
    with safe_open(existing_path, framework="pt") as f:
        metadata = f.metadata() or {}
        all_tensors = {k: f.get_tensor(k) for k in f.keys()}
    overlap = set(all_tensors) & set(mtp_tensors)
    if overlap:
        sys.exit(f"FAIL: MTP key collision with FP8 output: "
                 f"{sorted(overlap)[:5]}")
    all_tensors.update(mtp_tensors)
    tmp_path = existing_path.with_name(existing_path.name + ".new")
    print(f"  rewriting {existing_path.name} "
          f"({len(all_tensors)} tensors)...", flush=True)
    save_file(all_tensors, str(tmp_path), metadata=metadata)
    tmp_path.replace(existing_path)
    print("  done", flush=True)
 def verify_output(out_dir: str) -> None:
    """Open the saved safetensors and assert the recipe actually
    landed: vision tower present at BF16, FP8 dtype on at least one
    quantized Linear, lm_head not FP8."""
    print(f"\nVerifying {out_dir}...", flush=True)
    files = sorted(glob.glob(f"{out_dir}/*.safetensors"))
    if not files:
        sys.exit(f"FAIL: no safetensors in {out_dir}")
    vision_keys: list[tuple[str, str]] = []
    fp8_sample: tuple[str, str] | None = None
    lm_head_dtype: str | None = None
    mtp_keys: list[str] = []
    for fp in files:
        with safe_open(fp, framework="pt") as f:
            for k in f.keys():
                if k.startswith("mtp."):
                    mtp_keys.append(k)
                # Some FP8 quants write a sibling `_scale` / `_zero_point`;
                # we just care about the .weight tensors.
                if not k.endswith(".weight"):
                    continue
                t = f.get_tensor(k)
                dtype = str(t.dtype).replace("torch.", "")
                if "model.visual." in k:
                    vision_keys.append((k, dtype))
                if k == "lm_head.weight":
                    lm_head_dtype = dtype
                if (fp8_sample is None
                        and "float8" in dtype
                        and "language_model.layers" in k):
                    fp8_sample = (k, dtype)
    # Qwen3.6-27B has 167 vision `.weight` tensors (333 vision tensors
    # total, the rest are `.bias` and per-block norms). 150 is a
    # sanity floor that catches "vision tower didn't make it through"
    # without being brittle to minor arch revisions.
    if len(vision_keys) < 150:
        sys.exit(f"FAIL: only {len(vision_keys)} vision tensors found "
                 f"(expected >= 150). Vision tower didn't make it "
                 f"through the quant.")
    bad_vision = [(k, d) for k, d in vision_keys if "float8" in d]
    if bad_vision:
        sys.exit(f"FAIL: vision weights got quantized to FP8: "
                 f"{bad_vision[:3]}...")
    if lm_head_dtype is None:
        sys.exit("FAIL: lm_head.weight not found in output.")
    if "float8" in lm_head_dtype:
        sys.exit(f"FAIL: lm_head.weight is FP8 ({lm_head_dtype}); "
                 f"should be BF16/FP16.")
    if fp8_sample is None:
        sys.exit("FAIL: no FP8 weights found in language_model.layers — "
                 "the recipe didn't quantize anything.")
    # Upstream Qwen3.6-27B has exactly 15 mtp.* tensors (1 fused
    # transformer block + projection + norms). merge_mtp() should
    # have spliced all of them in.
    if len(mtp_keys) != 15:
        sys.exit(f"FAIL: expected 15 mtp.* tensors, found "
                 f"{len(mtp_keys)}. merge_mtp() missed some.")
    print(f"  ✓ {len(vision_keys)} vision tensors at "
          f"{vision_keys[0][1]} (not FP8)")
    print(f"  ✓ lm_head.weight at {lm_head_dtype} (not FP8)")
    print(f"  ✓ FP8 sample: {fp8_sample[0]} = {fp8_sample[1]}")
    print(f"  ✓ {len(mtp_keys)} mtp.* tensors present")
    print("DONE")
 if __name__ == "__main__":
    main()
--- a/src/agent/api/http.rs
+++ b/src/agent/api/http.rs
@ -100,7 +100,7 @@ impl HttpClient {
                .map_err(|e| anyhow::anyhow!("invalid server name: {e}"))?;
            let connector = tokio_rustls::TlsConnector::from(self.tls.clone());
            let tls = connector.connect(server_name.to_owned(), tcp).await
-                .map_err(|e| anyhow::anyhow!("TLS handshake to {host}: {e}"))?;
+                .context("TLS handshake")?;
            TokioIo::new(Box::new(tls) as Box<dyn IoStream>)
        } else {
            TokioIo::new(Box::new(tcp) as Box<dyn IoStream>)
@ -154,14 +154,6 @@ impl HttpResponse {
        Ok(String::from_utf8_lossy(&bytes).into_owned())
    }
    /// Read the entire body as raw bytes (for binary downloads).
    pub async fn bytes(self) -> Result<Bytes> {
        let bytes = self.body.collect().await
            .context("reading response body")?
            .to_bytes();
        Ok(bytes)
    }
    /// Read the entire body and deserialize as JSON.
    pub async fn json<T: serde::de::DeserializeOwned>(self) -> Result<T> {
        let bytes = self.body.collect().await
@ -198,7 +190,6 @@ impl HttpClientBuilder {
    }
    pub fn build(self) -> HttpClient {
        install_rustls_crypto_provider();
        let certs = rustls_native_certs::load_native_certs()
            .certs.into_iter()
            .collect::<Vec<_>>();
@ -206,13 +197,6 @@ impl HttpClientBuilder {
        for cert in certs {
            root_store.add(cert).ok();
        }
        // Also trust any `.pem` files under `~/.consciousness/certs/` —
        // self-signed server certs for our own vllm hosts live there.
        // Drop a new `<host>.pem` in the dir to trust a new server; no
        // code change needed.
        for cert in load_user_certs() {
            root_store.add(cert).ok();
        }
        let tls = Arc::new(
            ClientConfig::builder()
                .with_root_certificates(root_store)
@ -226,65 +210,6 @@ impl HttpClientBuilder {
    }
 }
 /// Install rustls' default crypto provider exactly once per process.
 /// rustls 0.23 doesn't pick one automatically when multiple features
 /// could provide it (e.g. when tonic pulls in both ring and aws-lc-rs
 /// via transitive deps). Idempotent via OnceLock; safe to call from
 /// multiple callers.
 fn install_rustls_crypto_provider() {
    static ONCE: std::sync::OnceLock<()> = std::sync::OnceLock::new();
    ONCE.get_or_init(|| {
        let _ = rustls::crypto::ring::default_provider().install_default();
    });
 }
 /// Load every `.pem` file under `~/.consciousness/certs/` as a DER
 /// certificate and return them. Silent on missing dir, missing files,
 /// or parse errors — those are "no extra certs trusted" rather than
 /// hard failures, to keep startup robust.
 /// Load the concatenated PEM bytes of every `.pem` file under
 /// `~/.consciousness/certs/` — suitable for passing to a tonic
 /// `ClientTlsConfig::ca_certificate(Certificate::from_pem(...))` call
 /// so gRPC connections trust the same self-signed servers the HTTP
 /// path does.
 pub(crate) fn load_user_certs_pem_bytes() -> Vec<u8> {
    let mut out = Vec::new();
    let Some(home) = dirs::home_dir() else { return out };
    let dir = home.join(".consciousness").join("certs");
    let Ok(entries) = std::fs::read_dir(&dir) else { return out };
    for entry in entries.flatten() {
        let path = entry.path();
        if path.extension().and_then(|e| e.to_str()) != Some("pem") {
            continue;
        }
        if let Ok(bytes) = std::fs::read(&path) {
            out.extend_from_slice(&bytes);
            if !bytes.ends_with(b"\n") {
                out.push(b'\n');
            }
        }
    }
    out
 }
 fn load_user_certs() -> Vec<rustls::pki_types::CertificateDer<'static>> {
    let mut out = Vec::new();
    let Some(home) = dirs::home_dir() else { return out };
    let dir = home.join(".consciousness").join("certs");
    let Ok(entries) = std::fs::read_dir(&dir) else { return out };
    for entry in entries.flatten() {
        let path = entry.path();
        if path.extension().and_then(|e| e.to_str()) != Some("pem") {
            continue;
        }
        let Ok(bytes) = std::fs::read(&path) else { continue };
        for cert in rustls_pemfile::certs(&mut bytes.as_slice()).flatten() {
            out.push(cert);
        }
    }
    out
 }
 /// Trait alias for streams that work with hyper's IO adapter.
 trait IoStream: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static {}
 impl<T: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static> IoStream for T {}
--- a/src/agent/api/mod.rs
+++ b/src/agent/api/mod.rs
@ -7,14 +7,13 @@
 // Set POC_DEBUG=1 for verbose per-turn logging.
 pub mod http;
 pub mod salience;
-use std::time::Duration;
+use std::time::{Duration, Instant};
 use anyhow::Result;
 use tokio::sync::mpsc;
 use serde::Deserialize;
-use http::HttpClient;
+use http::{HttpClient, HttpResponse};
 #[derive(Debug, Clone, Deserialize)]
 pub struct Usage {
@ -38,21 +37,6 @@ pub struct ReadoutManifest {
 /// from pairing with the manifest fetched at startup.
 pub type TokenReadout = Vec<Vec<f32>>;
 /// Client-side sampling state. Mirrors the wire-level fields in
 /// `GenerateRequest` (proto flattened its `SamplingParams` submessage
 /// in so the server handler reads them directly), but stays as a
 /// grouped struct on the client because UI / config / tests pass
 /// these around together.
 #[derive(Clone, Copy)]
 pub struct SamplingParams {
    pub temperature: f32,
    pub top_p: f32,
    pub top_k: u32,
    /// Decode budget. 0 = prefill only; >0 = decode up to this many
    /// tokens, stopping early on EOS / stop_token_ids.
    pub max_tokens: u32,
 }
 /// A JoinHandle that aborts its task when dropped.
 pub(crate) struct AbortOnDrop(tokio::task::JoinHandle<()>);
@ -62,6 +46,13 @@ impl Drop for AbortOnDrop {
    }
 }
 /// Sampling parameters for model generation.
 #[derive(Clone, Copy)]
 pub(crate) struct SamplingParams {
    pub temperature: f32,
    pub top_p: f32,
    pub top_k: u32,
 }
 // ─────────────────────────────────────────────────────────────
 //  Stream events — yielded by backends, consumed by the runner
@ -83,17 +74,6 @@ pub struct ApiClient {
    api_key: String,
    pub model: String,
    base_url: String,
    /// Cached readout manifest — fetched once per process and shared
    /// across ApiClient clones (every Agent/fork gets the same cell).
    /// `None` after fetch means the server has readout disabled (404).
    manifest: std::sync::Arc<tokio::sync::OnceCell<Option<ReadoutManifest>>>,
    /// Shared tonic Channel to the salience gRPC endpoint. Opened on
    /// first use and reused across every SessionHandle / RPC call
    /// derived from this ApiClient. tonic multiplexes concurrent
    /// requests over the HTTP/2 connection automatically.
    salience_channel: std::sync::Arc<
        tokio::sync::OnceCell<tonic::transport::Channel>
    >,
 }
 impl ApiClient {
@ -108,69 +88,33 @@ impl ApiClient {
            api_key: api_key.to_string(),
            model: model.to_string(),
            base_url: base_url.trim_end_matches('/').to_string(),
            manifest: std::sync::Arc::new(tokio::sync::OnceCell::new()),
            salience_channel: std::sync::Arc::new(tokio::sync::OnceCell::new()),
        }
    }
-    /// Return a `SalienceClient` on the shared gRPC channel — opens
+    pub(crate) fn stream_completion_mm(
    /// the channel on first call and reuses it thereafter across
    /// every ApiClient clone. All scoring / inference / session
    /// RPCs flow through this single multiplexed HTTP/2 connection.
    ///
    /// Bumps tonic's default 4 MiB encode/decode caps to 64 MiB on
    /// every client. Multimodal Generate requests carry pre-encoded
    /// image bytes inline (Qwen3.6's 768×768 patches at high res
    /// land around 5–8 MiB per turn), and Done events with full
    /// per-token readout vectors can also exceed 4 MiB on long runs.
    pub async fn salience_client(&self) -> Result<
        salience::pb::salience_client::SalienceClient<tonic::transport::Channel>
    > {
        let ch = self.salience_channel.get_or_try_init(|| async {
            let grpc_url = salience::derive_grpc_url(&self.base_url);
            log::debug!(target: "grpc",
                "opening shared salience channel: http_base={} -> grpc_url={}",
                self.base_url, grpc_url);
            salience::connect_channel(&grpc_url).await
        }).await?;
        const MAX_GRPC_MESSAGE_BYTES: usize = 64 * 1024 * 1024;
        Ok(salience::pb::salience_client::SalienceClient::new(ch.clone())
            .max_decoding_message_size(MAX_GRPC_MESSAGE_BYTES)
            .max_encoding_message_size(MAX_GRPC_MESSAGE_BYTES))
    }
    /// Stream generation via a gRPC session. Walks the prompt chunks
    /// comparing against the session's `committed_len`, sends the
    /// delta as interleaved `AppendImage` + intermediate
    /// `Generate(max_tokens=0)` (for text runs separating images) +
    /// a final `Generate(max_tokens=sampling.max_tokens, ...)` whose
    /// Token events stream back through the channel.
    ///
    /// On any gRPC error the session is dropped; the next call
    /// reopens fresh. Happy-path ordering: Token* Done. Error paths
    /// emit `StreamToken::Error` and close.
    pub(crate) fn stream_session_mm(
        &self,
-        session_lock: std::sync::Arc<crate::Mutex<Option<salience::SessionHandle>>>,
+        prompt_tokens: &[u32],
-        chunks: Vec<super::context::WireChunk>,
+        images: &[super::context::WireImage],
        images: Vec<super::context::WireImage>,
        match_upto: u32,
        sampling: SamplingParams,
        priority: Option<i32>,
        readout_shape: Option<(u32, u32)>,
    ) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
        let (tx, rx) = mpsc::unbounded_channel();
-        let client = self.clone();
+        let client = self.client.clone();
        let api_key = self.api_key.clone();
        let model = self.model.clone();
        let prompt_tokens = prompt_tokens.to_vec();
        let images: Vec<(Vec<u8>, String)> = images.iter()
            .map(|i| (i.bytes.clone(), i.mime.clone()))
            .collect();
        let base_url = self.base_url.clone();
        let handle = tokio::spawn(async move {
-            let result = run_session_generate(
+            let result = stream_completions(
-                session_lock, &client, chunks, images, match_upto, sampling,
+                &client, &base_url, &api_key, &model,
-                priority, readout_shape, &tx,
+                &prompt_tokens, &images, &tx, sampling, priority,
            ).await;
            if let Err(e) = result {
-                log::warn!(target: "grpc",
+                let _ = tx.send(StreamToken::Error(e.to_string()));
                    "stream_session_mm error, forwarding to UI: {:#}", e);
                let _ = tx.send(StreamToken::Error(format!("{:#}", e)));
            }
        });
@ -184,243 +128,386 @@ impl ApiClient {
    /// readout is enabled on the server, `Ok(None)` on 404 (disabled),
    /// or an error on any other failure.
    ///
-    /// First call performs the HTTP fetch; subsequent calls (including
+    /// Call once at startup and cache the result; the manifest doesn't
-    /// across ApiClient clones sharing the same cell) return the
+    /// change during a server run.
    /// cached result. The manifest doesn't change during a server run.
    pub fn model_str(&self) -> &str { &self.model }
    pub async fn fetch_readout_manifest(&self) -> Result<Option<ReadoutManifest>> {
-        let manifest = self.manifest.get_or_try_init(|| async {
+        let url = format!("{}/readout/manifest", self.base_url);
-            let url = format!("{}/readout/manifest", self.base_url);
+        let auth = format!("Bearer {}", self.api_key);
-            let auth = format!("Bearer {}", self.api_key);
+        let response = self
-            let response = self
+            .client
-                .client
+            .get_with_headers(&url, &[("Authorization", &auth)])
-                .get_with_headers(&url, &[("Authorization", &auth)])
+            .await
-                .await
+            .map_err(|e| anyhow::anyhow!("readout manifest fetch ({}): {}", url, e))?;
-                .map_err(|e| anyhow::anyhow!("readout manifest fetch ({}): {}", url, e))?;
+        let status = response.status();
-            let status = response.status();
+        if status.as_u16() == 404 {
-            if status.as_u16() == 404 {
+            return Ok(None);
-                return Ok::<_, anyhow::Error>(None);
+        }
-            }
+        if !status.is_success() {
-            if !status.is_success() {
+            let body = response.text().await.unwrap_or_default();
-                let body = response.text().await.unwrap_or_default();
+            let n = body.floor_char_boundary(body.len().min(500));
-                let n = body.floor_char_boundary(body.len().min(500));
+            anyhow::bail!("readout manifest HTTP {} ({}): {}", status, url, &body[..n]);
-                anyhow::bail!("readout manifest HTTP {} ({}): {}", status, url, &body[..n]);
+        }
-            }
+        Ok(Some(response.json().await?))
            Ok(Some(response.json().await?))
        }).await?;
        Ok(manifest.clone())
    }
 }
-/// Body of the gRPC-path streaming task. Walks the wire chunks
+async fn stream_completions(
-/// against the session's `committed_len`, sends the delta via
+    client: &HttpClient,
-/// AppendImage / intermediate prefill-only Generates / final decode
+    base_url: &str,
-/// Generate, and translates the final Generate's Token events into
+    api_key: &str,
-/// StreamTokens on `tx`. On success the session handle is returned
+    model: &str,
-/// to `session_lock` with an updated `committed_len`; on error the
+    prompt_tokens: &[u32],
-/// handle is dropped so the next call reopens.
+    images: &[(Vec<u8>, String)],
-async fn run_session_generate(
+    tx: &mpsc::UnboundedSender<StreamToken>,
    session_lock: std::sync::Arc<crate::Mutex<Option<salience::SessionHandle>>>,
    client: &ApiClient,
    chunks: Vec<super::context::WireChunk>,
    images: Vec<super::context::WireImage>,
    match_upto: u32,
    sampling: SamplingParams,
    priority: Option<i32>,
-    readout_shape: Option<(u32, u32)>,
+) -> anyhow::Result<()> {
-    tx: &mpsc::UnboundedSender<StreamToken>,
+    let mut request = serde_json::json!({
-) -> Result<()> {
+        "model": model,
-    use std::time::Instant;
+        "prompt": prompt_tokens,
-    use futures::StreamExt;
+        "max_tokens": 16384,
-    use super::context::WireChunk;
+        "temperature": sampling.temperature,
-    use salience::pb;
+        "top_p": sampling.top_p,
-
+        "top_k": sampling.top_k,
-    let mut handle: salience::SessionHandle = {
+        "stream": true,
-        let mut guard = session_lock.lock().await;
+        "return_token_ids": true,
-        match guard.take() {
+        "skip_special_tokens": false,
-            Some(h) => h,
+        "stop_token_ids": [super::tokenizer::IM_END],
-            None => {
+    });
-                drop(guard);
+    if !images.is_empty() {
-                log::debug!(target: "grpc", "run_session_generate: opening new session");
+        use base64::Engine;
-                salience::SessionHandle::open(client).await?
+        let b64 = base64::engine::general_purpose::STANDARD;
-            }
+        let uris: Vec<String> = images.iter()
-        }
+            .map(|(bytes, mime)| format!("data:{};base64,{}", mime, b64.encode(bytes)))
-    };
+            .collect();
-
+        request["multi_modal_data"] = serde_json::json!({ "image": uris });
-    // If the client believes the match extends only up to `match_upto`
+    }
-    // but the server has more, we need to rewind. For v1 the match is
+    if let Some(p) = priority {
-    // either whole or broken — `match_upto` is always 0 on any mutation
+        request["priority"] = serde_json::json!(p);
    // — so the cheapest correct recovery is to drop the session and
    // open a fresh one.
    if match_upto < handle.committed_len {
        log::warn!(target: "grpc",
            "session rewind: match_upto={} < committed_len={} — reopening session (resending {} bytes)",
            match_upto, handle.committed_len, handle.committed_len - match_upto);
        drop(handle);
        handle = salience::SessionHandle::open(client).await?;
    }
-    // Walk chunks at byte-level, taking everything past `match_upto`
+    let url = format!("{}/completions", base_url);
-    // as the delta. Token chunks can be split mid-way; images live
+    let debug_label = format!("{} prompt tokens, model={}", prompt_tokens.len(), model);
-    // inline in the token stream, so there's no separate image-chunk
+
-    // case anymore.
+    let mut response = send_and_check(
-    let mut acc: u32 = 0;
+        client, &url, &request,
-    let mut pending: Vec<u32> = Vec::new();
+        ("Authorization", &format!("Bearer {}", api_key)),
-    for chunk in chunks.iter() {
+        &[], &debug_label, None,
-        match chunk {
+    ).await?;
-            WireChunk::Tokens(t) => {
+
-                let len = t.len() as u32;
+    let mut reader = SseReader::new();
-                let chunk_end = acc + len;
+    let mut usage = None;
-                if chunk_end <= match_upto {
+
-                    acc = chunk_end;
+    while let Some(event) = reader.next_event(&mut response).await? {
-                } else if acc < match_upto {
+        if let Some(err_msg) = event["error"]["message"].as_str() {
-                    let skip = (match_upto - acc) as usize;
+            anyhow::bail!("API error in stream: {}", err_msg);
-                    pending.extend_from_slice(&t[skip..]);
+        }
-                    acc = chunk_end;
+
-                } else {
+        if let Some(u) = event["usage"].as_object() {
-                    pending.extend_from_slice(t);
+            if let Ok(u) = serde_json::from_value::<Usage>(serde_json::Value::Object(u.clone())) {
-                    acc = chunk_end;
+                usage = Some(u);
                }
            }
        }
    }
-    // Filter images to those entirely past `match_upto` — anything
+        let choices = match event["choices"].as_array() {
-    // before is on the server already (prior turn), anything
+            Some(c) => c,
-    // straddling is a hard divergence (image partially-sent shouldn't
+            None => continue,
    // happen with our atomic AppendImage history; with images-inline
    // it can only happen if mark_dirty cleared match_upto mid-block,
    // which the AST mutators prevent).
    let mut new_images: Vec<pb::ImageAttachment> = Vec::new();
    for img in &images {
        if img.pad_end <= match_upto {
            continue; // already sent on a prior turn
        }
        if img.pad_start < match_upto {
            anyhow::bail!(
                "session divergence: image at [{},{}) straddles match_upto={}",
                img.pad_start, img.pad_end, match_upto,
            );
        }
        new_images.push(pb::ImageAttachment {
            bytes: img.bytes.clone(),
            mime: img.mime.clone(),
            pad_range_start: img.pad_start,
            pad_range_end: img.pad_end,
        });
    }
    // Final Generate: pending holds any trailing text; decode up to
    // sampling.max_tokens. Request readouts on all decode positions
    // via a catch-all range ending at u32::MAX — decode never
    // reaches it.
    let prompt_len_after_append = handle.committed_len + pending.len() as u32;
    let readout_ranges = if readout_shape.is_some() {
        vec![pb::PositionRange {
            start: prompt_len_after_append,
            end: u32::MAX,
        }]
    } else {
        Vec::new()
    };
    let req = pb::GenerateRequest {
        session_id: handle.session_id.clone(),
        append_tokens: pending,
        offset: handle.committed_len,
        truncating: false,
        max_tokens: sampling.max_tokens,
        logprobs_ranges: Vec::new(),
        logprob_top_k: 0,
        readout_ranges,
        temperature: sampling.temperature,
        top_p: sampling.top_p,
        top_k: sampling.top_k,
        stop_token_ids: Vec::new(),
        priority: priority.unwrap_or(0),
        images: new_images,
    };
    let session_id_for_log = handle.session_id.clone();
    let t_generate = Instant::now();
    log::debug!(target: "grpc",
        "session {} Generate: offset={} append={} max_tokens={} priority={}",
        session_id_for_log, req.offset, req.append_tokens.len(),
        req.max_tokens, req.priority);
    let mut stream = handle.generate(req).await?;
    let (n_layers, n_concepts) = readout_shape.unwrap_or((0, 0));
    let mut session_terminated = false;
    let mut first_token_at: Option<Instant> = None;
    while let Some(event) = stream.next().await {
        let event = match event {
            Ok(e) => e,
            Err(status) => {
                log::warn!(target: "grpc",
                    "session {} Generate stream error: {} — dropping session",
                    session_id_for_log, status);
                session_terminated = true;
                let _ = tx.send(StreamToken::Error(format!(
                    "Generate stream error: {}", status,
                )));
                break;
            }
        };
-        let Some(inner) = event.event else { continue };
+
-        match inner {
+        for choice in choices {
-            pb::generate_event::Event::Token(t) => {
+            // `readout`, if present, is a nested list
-                if t.is_prefill { continue; }
+            // `[num_tokens][n_layers][n_concepts]`. Parse it once per
-                if first_token_at.is_none() {
+            // chunk and pair rows with token ids by index — the rows
-                    log::debug!(target: "grpc",
+            // are in the same order as `token_ids`.
-                        "session {} first decode token at {:?}",
+            let readouts: Option<Vec<TokenReadout>> = choice["readout"]
-                        session_id_for_log, t_generate.elapsed());
+                .as_array()
-                    first_token_at = Some(Instant::now());
+                .map(|outer| {
-                }
+                    outer.iter().filter_map(|per_token| {
-                let readout = if t.readout.is_empty() {
+                        per_token.as_array().map(|layers| {
-                    None
+                            layers.iter().filter_map(|per_layer| {
-                } else if n_layers == 0 || n_concepts == 0 {
+                                per_layer.as_array().map(|vals| {
-                    None
+                                    vals.iter()
-                } else {
+                                        .filter_map(|v| v.as_f64().map(|f| f as f32))
-                    let expected = (n_layers as usize) * (n_concepts as usize);
+                                        .collect::<Vec<f32>>()
-                    if t.readout.len() != expected {
+                                })
-                        log::warn!(target: "grpc",
+                            }).collect::<Vec<Vec<f32>>>()
-                            "readout shape mismatch: expected {}*{}={}, got {}",
+                        })
-                            n_layers, n_concepts, expected, t.readout.len());
+                    }).collect()
                        None
                    } else {
                        let n = n_concepts as usize;
                        let mut layers: Vec<Vec<f32>> = Vec::with_capacity(n_layers as usize);
                        for l in 0..(n_layers as usize) {
                            layers.push(t.readout[l * n..(l + 1) * n].to_vec());
                        }
                        Some(layers)
                    }
                };
                if tx.send(StreamToken::Token { id: t.id, readout }).is_err() {
                    break;
                }
            }
            pb::generate_event::Event::Done(d) => {
                log::debug!(target: "grpc",
                    "session {} Done: prompt={} completion={} total={} reason={:?} elapsed={:?}",
                    session_id_for_log, d.prompt_tokens, d.completion_tokens,
                    d.total_tokens, d.finish_reason, t_generate.elapsed());
                handle.committed_len = d.total_tokens;
                let usage = Some(Usage {
                    prompt_tokens: d.prompt_tokens,
                    completion_tokens: d.completion_tokens,
                    total_tokens: d.total_tokens,
                });
-                let _ = tx.send(StreamToken::Done { usage });
+
            if let Some(ids) = choice["token_ids"].as_array() {
                for (i, id_val) in ids.iter().enumerate() {
                    if let Some(id) = id_val.as_u64() {
                        let readout = readouts
                            .as_ref()
                            .and_then(|r| r.get(i).cloned());
                        let _ = tx.send(StreamToken::Token {
                            id: id as u32,
                            readout,
                        });
                    }
                }
            } else if let Some(text) = choice["text"].as_str() {
                // Fallback: provider didn't return token_ids, encode locally.
                // No readout available in this path — the encoder may
                // produce a different token count than the server did.
                if !text.is_empty() {
                    for id in super::tokenizer::encode(text) {
                        let _ = tx.send(StreamToken::Token { id, readout: None });
                    }
                }
            }
        }
    }
-    if !session_terminated {
+    let _ = tx.send(StreamToken::Done { usage });
        let mut guard = session_lock.lock().await;
        *guard = Some(handle);
    }
    Ok(())
 }
 /// Send an HTTP request and check for errors.
 pub(crate) async fn send_and_check(
    client: &HttpClient,
    url: &str,
    body: &impl serde::Serialize,
    auth_header: (&str, &str),
    extra_headers: &[(&str, &str)],
    debug_label: &str,
    request_json: Option<&str>,
 ) -> Result<HttpResponse> {
    let debug = std::env::var("POC_DEBUG").is_ok();
    let start = Instant::now();
    if debug {
        let payload_size = serde_json::to_string(body)
            .map(|s| s.len())
            .unwrap_or(0);
        dbglog!(
            "request: {}K payload, {}",
            payload_size / 1024, debug_label,
        );
    }
    let mut headers: Vec<(&str, &str)> = Vec::with_capacity(extra_headers.len() + 1);
    headers.push(auth_header);
    headers.extend_from_slice(extra_headers);
    let response = client
        .send_json("POST", url, &headers, body)
        .await
        .map_err(|e| {
            let msg = e.to_string();
            let cause = if msg.contains("connect timeout") || msg.contains("TCP connect") {
                "connection refused"
            } else if msg.contains("request timeout") {
                "request timed out"
            } else {
                "request error"
            };
            anyhow::anyhow!("{} ({}): {}", cause, url, msg)
        })?;
    let status = response.status();
    let elapsed = start.elapsed();
    if debug {
        for name in [
            "x-ratelimit-remaining",
            "x-ratelimit-limit",
            "x-request-id",
        ] {
            if let Some(val) = response.header(name) {
                dbglog!("header {}: {}", name, val);
            }
        }
    }
    if !status.is_success() {
        let body = response.text().await.unwrap_or_default();
        dbglog!(
            "HTTP {} after {:.1}s ({}): {}",
            status,
            elapsed.as_secs_f64(),
            url,
            &body[..body.floor_char_boundary(body.len().min(500))]
        );
        if let Some(json) = request_json {
            let log_dir = dirs::home_dir()
                .unwrap_or_default()
                .join(".consciousness/logs/failed-requests");
            let _ = std::fs::create_dir_all(&log_dir);
            let ts = chrono::Local::now().format("%Y%m%dT%H%M%S");
            let path = log_dir.join(format!("{}.json", ts));
            if std::fs::write(&path, json).is_ok() {
                dbglog!(
                    "saved failed request to {} (HTTP {})", path.display(), status
                );
            }
        }
        anyhow::bail!("HTTP {} ({}): {}", status, url, &body[..body.floor_char_boundary(body.len().min(1000))]);
    }
    if debug {
        dbglog!(
            "connected in {:.1}s (HTTP {})",
            elapsed.as_secs_f64(),
            status.as_u16()
        );
    }
    Ok(response)
 }
 /// SSE stream reader. Handles the generic SSE plumbing shared by both
 /// backends: chunk reading with timeout, line buffering, `data:` prefix
 /// stripping, `[DONE]` detection, JSON parsing, and parse error diagnostics.
 /// Yields parsed events as serde_json::Value — each backend handles its
 /// own event types.
 pub(crate) struct SseReader {
    line_buf: String,
    chunk_timeout: Duration,
    pub stream_start: Instant,
    pub chunks_received: u64,
    pub sse_lines_parsed: u64,
    pub sse_parse_errors: u64,
    debug: bool,
    done: bool,
    /// Serialized request payload — saved to disk on errors for replay debugging.
    pub(crate) request_json: Option<String>,
 }
 impl SseReader {
    pub(crate) fn new() -> Self {
        Self {
            line_buf: String::new(),
            chunk_timeout: Duration::from_secs(crate::config::get().api_stream_timeout_secs),
            stream_start: Instant::now(),
            chunks_received: 0,
            sse_lines_parsed: 0,
            sse_parse_errors: 0,
            debug: std::env::var("POC_DEBUG").is_ok(),
            done: false,
            request_json: None,
        }
    }
    /// Attach the serialized request payload for error diagnostics.
    /// Save the request payload to disk for replay debugging.
    fn save_failed_request(&self, reason: &str) {
        let Some(ref json) = self.request_json else { return };
        let log_dir = dirs::home_dir()
            .unwrap_or_default()
            .join(".consciousness/logs/failed-requests");
        let _ = std::fs::create_dir_all(&log_dir);
        let ts = chrono::Local::now().format("%Y%m%dT%H%M%S");
        let path = log_dir.join(format!("{}.json", ts));
        if std::fs::write(&path, json).is_ok() {
            dbglog!(
                "saved failed request to {} ({})", path.display(), reason
            );
        }
    }
    /// Read the next SSE event from the response stream.
    /// Returns Ok(Some(value)) for each parsed data line,
    /// Ok(None) when the stream ends or [DONE] is received.
    pub(crate) async fn next_event(
        &mut self,
        response: &mut HttpResponse,
    ) -> Result<Option<serde_json::Value>> {
        loop {
            // Drain complete lines from the buffer before reading more chunks
            while let Some(newline_pos) = self.line_buf.find('\n') {
                let line = self.line_buf[..newline_pos].trim().to_string();
                self.line_buf = self.line_buf[newline_pos + 1..].to_string();
                if line == "data: [DONE]" {
                    self.done = true;
                    return Ok(None);
                }
                if line.is_empty()
                    || line.starts_with("event: ")
                    || !line.starts_with("data: ")
                {
                    continue;
                }
                let json_str = &line[6..];
                self.sse_lines_parsed += 1;
                match serde_json::from_str(json_str) {
                    Ok(v) => return Ok(Some(v)),
                    Err(e) => {
                        self.sse_parse_errors += 1;
                        if self.sse_parse_errors == 1 || self.debug {
                            let preview = if json_str.len() > 200 {
                                format!("{}...", &json_str[..200])
                            } else {
                                json_str.to_string()
                            };
                            dbglog!(
                                "SSE parse error (#{}) {}: {}",
                                self.sse_parse_errors, e, preview
                            );
                        }
                        continue;
                    }
                }
            }
            if self.done {
                return Ok(None);
            }
            // Read more data from the response stream
            match tokio::time::timeout(self.chunk_timeout, response.chunk()).await {
                Ok(Ok(Some(chunk))) => {
                    self.chunks_received += 1;
                    self.line_buf.push_str(&String::from_utf8_lossy(&chunk));
                }
                Ok(Ok(None)) => return Ok(None),
                Ok(Err(e)) => {
                    let buf_preview = if self.line_buf.is_empty() {
                        "(empty)".to_string()
                    } else {
                        let n = self.line_buf.len().min(500);
                        format!("{}B: {}", self.line_buf.len(), &self.line_buf[..n])
                    };
                    let msg = format!(
                        "stream error after {} chunks, {:.1}s, {} sse lines: {} | buf: {}",
                        self.chunks_received,
                        self.stream_start.elapsed().as_secs_f64(),
                        self.sse_lines_parsed,
                        e, buf_preview,
                    );
                    dbglog!("{}", msg);
                    self.save_failed_request(&msg);
                    return Err(e.into());
                }
                Err(_) => {
                    let buf_preview = if self.line_buf.is_empty() {
                        "(empty)".to_string()
                    } else {
                        let n = self.line_buf.len().min(500);
                        format!("{}B: {}", self.line_buf.len(), &self.line_buf[..n])
                    };
                    let msg = format!(
                        "stream timeout: {}s, {} chunks, {} sse lines, {:.1}s elapsed | buf: {}",
                        self.chunk_timeout.as_secs(),
                        self.chunks_received,
                        self.sse_lines_parsed,
                        self.stream_start.elapsed().as_secs_f64(),
                        buf_preview,
                    );
                    dbglog!("{}", msg);
                    self.save_failed_request(&msg);
                    anyhow::bail!(
                        "stream timeout: no data for {}s ({} chunks received)",
                        self.chunk_timeout.as_secs(),
                        self.chunks_received
                    );
                }
            }
        }
    }
 }
--- a/src/agent/api/salience.rs
+++ b/src/agent/api/salience.rs
@ -1,279 +0,0 @@
 // agent/api/salience.rs — gRPC client bindings for salience.v1.
 //
 // Thin wrapper around the tonic-generated types. Every RPC except
 // Generate is unary; Generate is server-streaming. Free functions
 // (open/close session) wrap the lifecycle RPCs; `SessionHandle` just
 // carries the id + connection params so later RPCs can reuse them.
 //
 // The old bidi Session() API is gone — see git history for its shape.
 #![allow(clippy::enum_variant_names)]
 use anyhow::{Context, Result};
 use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint};
 /// Generated prost + tonic types for salience.v1. Call sites use
 /// `pb::OpenSessionRequest`, `pb::Token`, etc.
 pub mod pb {
    tonic::include_proto!("salience.v1");
 }
 pub type SalienceClient = pb::salience_client::SalienceClient<Channel>;
 /// Open a TLS-aware gRPC channel to the salience server. `base_url`
 /// looks like `https://host:8443`. User-provided CA certs under
 /// `~/.consciousness/certs/` are trusted in addition to the system
 /// roots (for self-signed server certs).
 ///
 /// Returns the raw `Channel` so callers (`ApiClient::salience_client`)
 /// can cache it and clone a `SalienceClient` per request without
 /// reopening the TCP/TLS connection. tonic multiplexes RPCs over the
 /// shared channel automatically.
 pub async fn connect_channel(base_url: &str) -> Result<Channel> {
    let mut endpoint = Endpoint::from_shared(base_url.to_string())
        .with_context(|| format!("invalid salience endpoint: {}", base_url))?
        .connect_timeout(std::time::Duration::from_secs(30))
        .timeout(std::time::Duration::from_secs(600));
    if base_url.starts_with("https://") {
        let user_certs = super::http::load_user_certs_pem_bytes();
        let mut tls = ClientTlsConfig::new().with_native_roots();
        if !user_certs.is_empty() {
            tls = tls.ca_certificate(Certificate::from_pem(user_certs));
        }
        endpoint = endpoint
            .tls_config(tls)
            .with_context(|| "configuring tonic TLS")?;
    }
    endpoint
        .connect()
        .await
        .with_context(|| format!("failed to connect to salience server at {}", base_url))
 }
 /// Derive the gRPC base URL from the HTTP completions base URL.
 ///
 /// vLLM's salience gRPC server listens on a different port (8443) from
 /// the HTTP endpoint (8000) and accepts no path component. Given an
 /// HTTP base like `https://host:8000/v1`, produce `https://host:8443`.
 /// No-op when the path is empty and the port isn't 8000.
 pub fn derive_grpc_url(http_base: &str) -> String {
    let mut url = http_base.trim_end_matches('/').to_string();
    if let Some(proto_end) = url.find("://") {
        let rest_start = proto_end + 3;
        if let Some(path_slash) = url[rest_start..].find('/') {
            url.truncate(rest_start + path_slash);
        }
    }
    url.replace(":8000", ":8443")
 }
 /// Attach a bearer token to a tonic request as gRPC metadata.
 pub fn with_auth<T>(req: &mut tonic::Request<T>, api_key: &str) {
    if api_key.is_empty() {
        return;
    }
    let bearer = format!("Bearer {}", api_key);
    if let Ok(val) = bearer.parse() {
        req.metadata_mut().insert("authorization", val);
    }
 }
 /// Handle to a server-side session. Carries the id + an `ApiClient`
 /// clone (which holds the shared tonic Channel) so subsequent
 /// per-session RPCs go over the process-global connection.
 /// `committed_len` tracks the server's current session.tokens length
 /// so the client can submit deltas with the right `offset`.
 pub struct SessionHandle {
    pub session_id: String,
    pub max_model_len: u32,
    pub committed_len: u32,
    client: super::ApiClient,
 }
 impl SessionHandle {
    pub async fn open(client: &super::ApiClient) -> Result<Self> {
        let t0 = std::time::Instant::now();
        log::debug!(target: "grpc", "OpenSession rpc: start");
        let mut c = client.salience_client().await?;
        let mut req = tonic::Request::new(pb::OpenSessionRequest {
            model: client.model.clone(),
        });
        with_auth(&mut req, client.api_key());
        let resp = c
            .open_session(req)
            .await
            .with_context(|| "OpenSession RPC failed")?
            .into_inner();
        log::debug!(target: "grpc",
            "OpenSession rpc: done session_id={} max_model_len={} elapsed={:?}",
            resp.session_id, resp.max_model_len, t0.elapsed());
        Ok(Self {
            session_id: resp.session_id,
            max_model_len: resp.max_model_len,
            committed_len: 0,
            client: client.clone(),
        })
    }
    pub fn client(&self) -> &super::ApiClient { &self.client }
    /// Debug-only: fetch the server's full session.tokens. Used to
    /// verify client-side accounting byte-for-byte when divergence
    /// is suspected. Not cheap on large sessions.
    pub async fn dump_tokens(&self) -> Result<Vec<u32>> {
        let mut c = self.client.salience_client().await?;
        let mut req = tonic::Request::new(pb::DumpSessionRequest {
            session_id: self.session_id.clone(),
        });
        with_auth(&mut req, self.client.api_key());
        let resp = c
            .dump_session(req)
            .await
            .with_context(|| "DumpSession RPC failed")?
            .into_inner();
        Ok(resp.tokens)
    }
    /// Open a gRPC Generate stream with the given request. Caller
    /// iterates the returned stream of GenerateEvents; the handle's
    /// `committed_len` should be advanced by the caller on Done based
    /// on the Done event's `total_tokens` field.
    pub async fn generate(
        &self,
        req: pb::GenerateRequest,
    ) -> Result<tonic::Streaming<pb::GenerateEvent>> {
        let t0 = std::time::Instant::now();
        log::debug!(target: "grpc",
            "Generate rpc: open-stream session={} offset={} append={} max_tokens={}",
            self.session_id, req.offset, req.append_tokens.len(), req.max_tokens);
        let mut c = self.client.salience_client().await?;
        let mut req = tonic::Request::new(req);
        with_auth(&mut req, self.client.api_key());
        let resp = c
            .generate(req)
            .await
            .with_context(|| "Generate RPC failed")?;
        log::debug!(target: "grpc",
            "Generate rpc: stream opened session={} open-latency={:?}",
            self.session_id, t0.elapsed());
        Ok(resp.into_inner())
    }
    /// Run a prefill-only Generate (max_tokens=0) that appends the
    /// given tokens to the session. No decode, no Token events — the
    /// server just extends session.tokens and runs prefill to warm
    /// the KV cache. Used to interleave text runs between AppendImage
    /// calls, and by score paths that want prompt_logprobs without a
    /// decode step.
    pub async fn prefill_only(&mut self, tokens: Vec<u32>) -> Result<()> {
        use futures::StreamExt;
        let req = pb::GenerateRequest {
            session_id: self.session_id.clone(),
            append_tokens: tokens,
            offset: self.committed_len,
            truncating: false,
            max_tokens: 0,
            logprobs_ranges: Vec::new(),
            logprob_top_k: 0,
            readout_ranges: Vec::new(),
            temperature: 0.0,
            top_p: 0.0,
            top_k: 0,
            stop_token_ids: Vec::new(),
            priority: 0,
            images: Vec::new(),
        };
        let mut stream = self.generate(req).await?;
        while let Some(event) = stream.next().await {
            let event = event.map_err(|s| anyhow::anyhow!("prefill Generate stream: {}", s))?;
            if let Some(pb::generate_event::Event::Done(d)) = event.event {
                self.committed_len = d.total_tokens;
            }
        }
        Ok(())
    }
 }
 /// Drop → fire CloseSession in a detached task so servers don't leak
 /// sessions until TTL eviction. Best-effort: if no tokio runtime is
 /// available we skip; the server's 30min TTL will reap it eventually.
 impl Drop for SessionHandle {
    fn drop(&mut self) {
        if self.session_id.is_empty() {
            return;
        }
        let session_id = std::mem::take(&mut self.session_id);
        let client = self.client.clone();
        let Ok(rt) = tokio::runtime::Handle::try_current() else {
            log::debug!(target: "grpc",
                "SessionHandle drop outside tokio runtime, session {} leaks to TTL",
                session_id);
            return;
        };
        rt.spawn(async move {
            let Ok(mut c) = client.salience_client().await else { return };
            let mut req = tonic::Request::new(pb::CloseSessionRequest {
                session_id: session_id.clone(),
            });
            with_auth(&mut req, client.api_key());
            if let Err(e) = c.close_session(req).await {
                log::debug!(target: "grpc",
                    "CloseSession on drop failed for {}: {:#}",
                    session_id, e);
            }
        });
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn generated_types_compile() {
        // Exercise the shape of the new proto types — if build.rs
        // stops regenerating against the proto, this stops compiling.
        let _open = pb::OpenSessionRequest {
            model: "qwen3-vl".into(),
        };
        let _tok = pb::Token {
            id: 42,
            position: 0,
            is_prefill: false,
            readout: vec![0.1, 0.2, 0.3],
            logprobs: vec![pb::TokenLogprob {
                id: 1,
                logprob: -0.5,
            }],
            sampled_logprob: -0.1,
            has_sampled_logprob: true,
        };
        let _done = pb::GenerateDone {
            prompt_tokens: 10,
            completion_tokens: 20,
            total_tokens: 30,
            finish_reason: pb::generate_done::FinishReason::Eos as i32,
        };
        let _evt = pb::GenerateEvent {
            event: Some(pb::generate_event::Event::Done(_done)),
        };
    }
    #[test]
    fn derive_grpc_url_cases() {
        assert_eq!(
            derive_grpc_url("https://host:8000/v1"),
            "https://host:8443",
        );
        assert_eq!(
            derive_grpc_url("https://host:8000/"),
            "https://host:8443",
        );
        assert_eq!(
            derive_grpc_url("https://host:9000/v1"),
            "https://host:9000",
        );
    }
 }
--- a/src/agent/context.rs
+++ b/src/agent/context.rs
@ -125,19 +125,7 @@ impl<'de> Deserialize<'de> for NodeLeaf {
            body: NodeBody,
            timestamp: DateTime<Utc>,
        }
-        let mut raw = Raw::deserialize(deserializer)?;
+        let raw = Raw::deserialize(deserializer)?;
        // Heal pre-refactor logs: Image leaves used to be deserialized
        // with token_count=0 (server-authoritative count was applied
        // after AppendImage). With pads now expanded client-side at
        // construction, recompute from the persisted dimensions if
        // the stored count is 0.
        if let NodeBody::Image { orig_height, orig_width, token_count, .. }
            = &mut raw.body
        {
            if *token_count == 0 {
                *token_count = qwen3_image_token_count(*orig_height, *orig_width);
            }
        }
        let token_ids = raw.body.compute_token_ids();
        Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp })
    }
@ -155,44 +143,18 @@ pub enum AstNode {
        /// Maps memory key → divergence score for this response.
        #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
        memory_scores: std::collections::BTreeMap<String, f64>,
        /// Cached token stream for the subtree. When `Some`, wire-out
        /// uses these bytes verbatim and skips recursion into children.
        /// Populated by the response parser from the server's exact
        /// stream; also computable from children as a fallback. Cleared
        /// on any edit to a descendant. Not serialized — transient.
        #[serde(skip, default)]
        token_ids: Option<Vec<u32>>,
    },
 }
 /// The context window: four sections as Vec<AstNode>.
-///
+/// All mutation goes through ContextState methods to maintain the invariant
-/// All mutation MUST go through `ContextState`'s public methods. Two
+/// that token_ids on every leaf matches its rendered text.
 /// invariants ride on this:
 /// 1. Every `Leaf.token_ids` matches its `body.compute_token_ids()`.
 /// 2. For every `Branch { token_ids: Some(cached), .. }`, the cached
 ///    token stream matches what `wire_into` would produce by walking
 ///    `children` from scratch. Any mutation that touches a Branch's
 ///    children — directly or via a descendant — must clear the
 ///    Branch's `token_ids` so it gets recomputed on next wire-out.
 ///
 /// The `&mut Vec<AstNode>` escape hatches are intentionally NOT
 /// exposed; if you find yourself wanting one, add a focused method
 /// here that maintains the invariants.
 pub struct ContextState {
    system: Vec<AstNode>,
    identity: Vec<AstNode>,
    journal: Vec<AstNode>,
    conversation: Vec<AstNode>,
    pub conversation_log: Option<crate::mind::log::ConversationLog>,
    /// Length of the session's token stream on the server, as of the
    /// last Done event. Updated by the grpc layer.
    server_committed_len: u32,
    /// Prefix length of our walk that still matches the server's
    /// session.tokens byte-for-byte. When < `server_committed_len`
    /// the session needs rewinding (truncating=true at this offset).
    /// Reset to 0 on any mutation that could have changed sent bytes.
    client_match_upto: u32,
 }
 impl Clone for ContextState {
@ -203,8 +165,6 @@ impl Clone for ContextState {
            journal: self.journal.clone(),
            conversation: self.conversation.clone(),
            conversation_log: None, // forked contexts don't log
            server_committed_len: self.server_committed_len,
            client_match_upto: self.client_match_upto,
        }
    }
 }
@ -241,10 +201,6 @@ pub struct ResponseParser {
    think_buf: String,
    in_tool_call: bool,
    tool_call_buf: String,
    /// Raw generated token IDs, in arrival order. Combined with the
    /// prologue at `finish` to stamp the Branch's authoritative
    /// token cache — the bytes the server has for this branch.
    generated_tokens: Vec<u32>,
 }
 impl Role {
@ -356,16 +312,6 @@ impl NodeLeaf {
    pub fn token_ids(&self) -> &[u32]    { &self.token_ids }
    pub fn tokens(&self) -> usize        { self.token_ids.len() }
    pub fn timestamp(&self) -> DateTime<Utc> { self.timestamp }
    /// If this is an Image leaf, update its IMAGE_PAD count to `n` and
    /// recompute cached `token_ids`. No-op on non-Image leaves —
    /// callers know the body shape via `body()`.
    pub fn set_image_token_count(&mut self, n: u32) {
        if let NodeBody::Image { token_count, .. } = &mut self.body {
            *token_count = n;
            self.token_ids = self.body.compute_token_ids();
        }
    }
 }
 impl AstNode {
@ -414,9 +360,6 @@ impl AstNode {
        orig_height: u32,
        orig_width: u32,
    ) -> Self {
        // Pad count is computed eagerly from dimensions — no more
        // "unknown until server responds" shape. Server validates
        // on the Generate call; mismatches fail loud.
        let token_count = qwen3_image_token_count(orig_height, orig_width);
        Self::Leaf(NodeLeaf::new(NodeBody::Image {
            bytes,
@ -430,13 +373,7 @@ impl AstNode {
    // -- Branch constructors --------------------------------------------------
    pub fn branch(role: Role, children: Vec<AstNode>) -> Self {
-        Self::Branch {
+        Self::Branch { role, children, timestamp: Utc::now(), memory_scores: Default::default() }
            role,
            children,
            timestamp: Utc::now(),
            memory_scores: Default::default(),
            token_ids: None,
        }
    }
    pub fn system_msg(text: impl Into<String>) -> Self {
@ -445,7 +382,6 @@ impl AstNode {
            children: vec![Self::content(text)],
            timestamp: Utc::now(),
            memory_scores: Default::default(),
            token_ids: None,
        }
    }
@ -455,7 +391,6 @@ impl AstNode {
            children: vec![Self::content(text)],
            timestamp: Utc::now(),
            memory_scores: Default::default(),
            token_ids: None,
        }
    }
@ -467,12 +402,11 @@ impl AstNode {
                let token_ids = leaf.body.compute_token_ids();
                Self::Leaf(NodeLeaf { token_ids, ..leaf })
            }
-            Self::Branch { role, children, timestamp, memory_scores, .. } => Self::Branch {
+            Self::Branch { role, children, timestamp, memory_scores } => Self::Branch {
                role,
                children: children.into_iter().map(|c| c.retokenize()).collect(),
                timestamp,
                memory_scores,
                token_ids: None,
            },
        }
    }
@ -549,10 +483,7 @@ impl AstNode {
    fn token_ids_into(&self, out: &mut Vec<u32>) {
        match self {
            Self::Leaf(leaf) => out.extend_from_slice(&leaf.token_ids),
-            Self::Branch { token_ids: Some(cached), .. } => {
+            Self::Branch { role, children, .. } => {
                out.extend_from_slice(cached);
            }
            Self::Branch { role, children, token_ids: None, .. } => {
                out.push(tokenizer::IM_START);
                out.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
                for child in children {
@ -581,8 +512,7 @@ impl Ast for AstNode {
    fn tokens(&self) -> usize {
        match self {
            Self::Leaf(leaf) => leaf.tokens(),
-            Self::Branch { token_ids: Some(cached), .. } => cached.len(),
+            Self::Branch { role, children, .. } => {
            Self::Branch { role, children, token_ids: None, .. } => {
                1 + role_header_tokens(*role)
                    + children.iter().map(|c| c.tokens()).sum::<usize>()
                    + 1 + newline_tokens()
@ -736,7 +666,6 @@ impl ResponseParser {
            think_buf: String::new(),
            in_tool_call: false,
            tool_call_buf: String::new(),
            generated_tokens: Vec::new(),
        }
    }
@ -755,34 +684,18 @@ impl ResponseParser {
        let handle = tokio::spawn(async move {
            let mut parser = self;
            let agent_name = agent.state.lock().await.provenance.clone();
            eprintln!(
                "[agent:{agent_name}] parser task start branch_idx={} in_think={}",
                parser.branch_idx, parser.in_think,
            );
            let log_path = format!("/tmp/poc-{}.log", agent_name);
            let mut log_file = std::fs::OpenOptions::new()
                .create(true).append(true).open(&log_path).ok();
            let mut full_text = String::new();
            let mut token_count: usize = 0;
            while let Some(event) = stream.recv().await {
                match event {
                    super::api::StreamToken::Token { id, readout } => {
                        token_count += 1;
                        if token_count == 1 {
                            eprintln!("[agent:{agent_name}] parser first token id={}", id);
                        } else if token_count % 256 == 0 {
                            eprintln!(
                                "[agent:{agent_name}] parser token_count={} chars={}",
                                token_count,
                                full_text.len(),
                            );
                        }
                        if let Some(r) = readout {
                            if let Ok(mut buf) = agent.readout.lock() {
                                buf.push(id, r);
                            }
                        }
                        parser.generated_tokens.push(id);
                        let text = super::tokenizer::decode(&[id]);
                        full_text.push_str(&text);
                        let mut ctx = agent.context.lock().await;
@ -801,12 +714,6 @@ impl ResponseParser {
                        }
                    }
                    super::api::StreamToken::Done { usage } => {
                        eprintln!(
                            "[agent:{agent_name}] parser done token_count={} chars={} usage={:?}",
                            token_count,
                            full_text.len(),
                            usage,
                        );
                        if let Some(ref mut f) = log_file {
                            use std::io::Write;
                            let ctx = agent.context.lock().await;
@ -823,31 +730,19 @@ impl ResponseParser {
                                let _ = writeln!(f, "  unparsed text: {}", &full_text[..end]);
                            }
                        }
-                        if let Some(ref u) = usage {
+                        if let Some(u) = usage {
                            agent.state.lock().await.last_prompt_tokens = u.prompt_tokens;
                        }
                        let mut ctx = agent.context.lock().await;
                        parser.finish(&mut ctx);
                        if let Some(u) = usage {
                            ctx.note_session_synced(u.total_tokens);
                        }
                        return Ok(());
                    }
                    super::api::StreamToken::Error(e) => {
                        eprintln!("[agent:{agent_name}] parser stream error: {}", e);
                        return Err(anyhow::anyhow!("{}", e));
                    }
                }
            }
-            eprintln!(
+            Ok(())
                "[agent:{agent_name}] parser stream closed without done token_count={} chars={}",
                token_count,
                full_text.len(),
            );
            Err(anyhow::anyhow!(
                "stream closed without Done event after {} tokens",
                token_count,
            ))
        });
        (rx, handle)
    }
@ -928,7 +823,7 @@ impl ResponseParser {
    }
    fn push_child(&self, ctx: &mut ContextState, child: AstNode) {
-        ctx.push_child_raw(Section::Conversation, self.branch_idx, child);
+        ctx.push_child(Section::Conversation, self.branch_idx, child);
    }
    fn flush_content(&mut self, ctx: &mut ContextState) {
@ -942,69 +837,10 @@ impl ResponseParser {
    }
    pub fn finish(mut self, ctx: &mut ContextState) {
-        // Salvage any in-flight tag accumulators if the stream ended
+        if !self.buf.is_empty() {
        // before the close tag arrived (max_tokens, premature EOS,
        // server-side cancel). Without this, an unterminated
        // <think>...</think> drops all of self.think_buf and only the
        // trailing rolling window in self.buf survives — observed as
        // "responses cut off, only the last ~8 characters come
        // through" because drain_safe keeps `close_tag.len()` bytes
        // (8 for `</think>`) at the tail of buf.
        if self.in_think {
            if !self.buf.is_empty() {
                self.think_buf.push_str(&std::mem::take(&mut self.buf));
            }
            let text = std::mem::take(&mut self.think_buf).trim().to_string();
            if !text.is_empty() {
                self.push_child(ctx, AstNode::thinking(text));
            }
            self.in_think = false;
        } else if self.in_tool_call {
            if !self.buf.is_empty() {
                self.tool_call_buf.push_str(&std::mem::take(&mut self.buf));
            }
            let body = std::mem::take(&mut self.tool_call_buf);
            match parse_tool_call_body(&body) {
                Some((name, args)) => {
                    self.flush_content(ctx);
                    self.push_child(ctx, AstNode::tool_call(&name, &args));
                }
                None => {
                    // Body's likely incomplete (no `</tool_call>` ever
                    // arrived). Wrap as content with the open tag so the
                    // model can see its own truncated attempt next turn
                    // rather than losing it silently.
                    self.content_parts.push(format!("<tool_call>\n{}", body));
                }
            }
            self.in_tool_call = false;
        } else if !self.buf.is_empty() {
            self.content_parts.push(std::mem::take(&mut self.buf));
        }
        self.flush_content(ctx);
        // Stamp the authoritative token cache onto the branch.
        // Layout mirrors the full chat-template rendering of a
        // message block:
        //
        //   IM_START + "assistant\n" [+ "<think>\n"]   (prologue — what we sent)
        //   + generated_tokens                          (what the server generated, ends in IM_END)
        //   + "\n"                                      (trailing newline — template-required)
        //
        // Server only has through the IM_END (model stops on it,
        // doesn't emit "\n"). Match-upto lands inside the cache
        // right after IM_END; the chunk-walk's straddle path picks
        // up the trailing "\n" as the head of the next turn's delta.
        // The "\n" between turns matters: without it Qwen sees
        // `<|im_end|><|im_start|>` back-to-back (no newline) and
        // responds with garbage.
        let prologue_text = if self.in_think { "assistant\n<think>\n" } else { "assistant\n" };
        let mut cache = Vec::with_capacity(1 + self.generated_tokens.len() + 8);
        cache.push(tokenizer::IM_START);
        cache.extend(tokenizer::encode(prologue_text));
        cache.extend(self.generated_tokens);
        cache.extend(tokenizer::encode("\n"));
        ctx.set_branch_cache(Section::Conversation, self.branch_idx, cache);
    }
 }
@ -1016,77 +852,20 @@ impl ContextState {
            journal: Vec::new(),
            conversation: Vec::new(),
            conversation_log: None,
            server_committed_len: 0,
            client_match_upto: 0,
        }
    }
    // -- Server sync tracking -------------------------------------------------
    /// Length of the session's token stream on the server. Updated by
    /// the grpc layer from Generate Done events.
    pub fn server_committed_len(&self) -> u32 { self.server_committed_len }
    /// Prefix of our walk we still believe matches the server
    /// byte-for-byte. If less than `server_committed_len`, the next
    /// Generate must send `truncating=true` at this offset.
    pub fn client_match_upto(&self) -> u32 { self.client_match_upto }
    /// Called by the grpc layer after a successful Generate Done:
    /// records both the server's new length and the fact that we
    /// match up to it (we just sent everything).
    pub fn note_session_synced(&mut self, total_tokens: u32) {
        self.server_committed_len = total_tokens;
        self.client_match_upto = total_tokens;
    }
    /// Reset match-upto to 0. Called from every mutation that could
    /// have touched a region the server already has. For now,
    /// conservatively drops alignment entirely — finer-grained
    /// tracking (match-upto at the mutated node's offset) is a
    /// future optimization.
    fn mark_dirty(&mut self) {
        self.client_match_upto = 0;
    }
    // -- Read access ----------------------------------------------------------
    pub fn system(&self) -> &[AstNode]       { &self.system }
    pub fn identity(&self) -> &[AstNode]     { &self.identity }
    pub fn journal(&self) -> &[AstNode]      { &self.journal }
    pub fn conversation(&self) -> &[AstNode] { &self.conversation }
-
+    pub fn conversation_mut(&mut self) -> &mut Vec<AstNode> { &mut self.conversation }
    /// Set or clear a single `memory_scores` entry on an Assistant
    /// Branch. Used by the full-matrix scorer to attribute per-memory
    /// divergence onto the response. `score = None` removes the key;
    /// `Some(s)` inserts/overwrites.
    ///
    /// Doesn't affect the Branch's token cache: `memory_scores` is a
    /// serialized-but-non-tokenizing annotation. No-op (with a debug
    /// log) if the index points to a Leaf or a non-Assistant Branch —
    /// callers are typically iterating on stale indices and we'd
    /// rather skip than panic.
    pub fn set_branch_memory_score(
        &mut self,
        section: Section,
        index: usize,
        key: &str,
        score: Option<f64>,
    ) {
        let nodes = self.section_mut(section);
        let Some(node) = nodes.get_mut(index) else { return };
        let AstNode::Branch { role: Role::Assistant, memory_scores, .. } = node
        else { return };
        match score {
            Some(s) => { memory_scores.insert(key.to_string(), s); }
            None => { memory_scores.remove(key); }
        }
    }
    pub fn sections(&self) -> [&Vec<AstNode>; 4] {
        [&self.system, &self.identity, &self.journal, &self.conversation]
    }
 }
 impl Ast for ContextState {
@ -1119,63 +898,30 @@ impl Ast for ContextState {
 }
 /// An image collected from the AST for a request body. The AST stores
-/// Image metadata collected during `wire_chunks` — the binary +
+/// the pre-expanded token form (N image_pads) for accurate budget
-/// mime plus the absolute token-position range of the image's
+/// accounting; the wire form collapses each Image to a single
-/// pre-expanded placeholder run in the full wire stream. Sent
+/// `<|image_pad|>` between vision bookends and ships the bytes
-/// alongside `append_tokens` in `GenerateRequest` so the server
+/// separately as multi_modal_data.
 /// can attach vision features to the declared positions. Positions
 /// are absolute within the full wire walk starting at offset 0,
 /// i.e. the same coordinate system as `session.tokens` on the
 /// server once the walk has been applied.
 #[derive(Clone)]
 pub struct WireImage {
    pub bytes: Vec<u8>,
    pub mime: String,
    pub pad_start: u32,
    pub pad_end: u32,
 }
 /// One piece of the wire stream for the gRPC session path. Since
 /// images now live inline in the token stream (pre-expanded at AST
 /// construction time), there's only one variant — a run of tokens.
 /// The parallel `Vec<WireImage>` returned by `wire_chunks` gives the
 /// binary + position metadata for each embedded image.
 #[derive(Clone)]
 pub enum WireChunk {
    Tokens(Vec<u32>),
 }
 fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>) {
    match node {
        AstNode::Leaf(leaf) => match leaf.body() {
            NodeBody::Image { bytes, mime, .. } => {
-                // The Image leaf's token_ids is already
+                tokens.push(tokenizer::VISION_START);
-                // [VISION_START, IMAGE_PAD * N, VISION_END]. Inline
+                tokens.push(tokenizer::IMAGE_PAD);
-                // those into the token stream and record the pad-run
+                tokens.push(tokenizer::VISION_END);
                // range so the server can attach features to the
                // declared positions.
                let pad_start = tokens.len() as u32;
                tokens.extend_from_slice(leaf.token_ids());
                let pad_end = tokens.len() as u32;
                images.push(WireImage {
                    bytes: bytes.clone(),
                    mime: mime.clone(),
                    pad_start,
                    pad_end,
                });
            }
            _ => tokens.extend_from_slice(leaf.token_ids()),
        },
-        AstNode::Branch { token_ids: Some(cached), children, .. } => {
+        AstNode::Branch { role, children, .. } => {
            // Cached branches still need their image children paired
            // up with the vision-block ranges embedded in the cached
            // token stream — the cache captures vision tokens but not
            // the matching bytes/mime.
            let base = tokens.len() as u32;
            tokens.extend_from_slice(cached);
            pair_cached_images(cached, children, base, images);
        }
        AstNode::Branch { role, children, token_ids: None, .. } => {
            tokens.push(tokenizer::IM_START);
            tokens.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
            for c in children {
@ -1187,101 +933,6 @@ fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>)
    }
 }
 /// Depth-first iterator over Image leaves under a slice of AST nodes.
 /// Yields `(bytes, mime)` borrows in document order; doesn't allocate
 /// per yield (only a stack of pending nodes).
 struct ImageLeaves<'a> {
    stack: Vec<&'a AstNode>,
 }
 impl<'a> ImageLeaves<'a> {
    fn new(nodes: &'a [AstNode]) -> Self {
        let mut stack = Vec::with_capacity(nodes.len());
        stack.extend(nodes.iter().rev());
        Self { stack }
    }
 }
 impl<'a> Iterator for ImageLeaves<'a> {
    type Item = (&'a [u8], &'a str);
    fn next(&mut self) -> Option<Self::Item> {
        while let Some(node) = self.stack.pop() {
            match node {
                AstNode::Leaf(leaf) => {
                    if let NodeBody::Image { bytes, mime, .. } = leaf.body() {
                        return Some((bytes, mime));
                    }
                }
                AstNode::Branch { children, .. } => {
                    self.stack.extend(children.iter().rev());
                }
            }
        }
        None
    }
 }
 /// Iterator over `(start, end)` token-offset pairs for each
 /// `VISION_START..VISION_END` block in a token slice. Panics on an
 /// unmatched VISION_START — that's an upstream tokenization bug
 /// worth a loud failure.
 fn vision_blocks(cached: &[u32]) -> impl Iterator<Item = (usize, usize)> + '_ {
    let mut cur = 0;
    std::iter::from_fn(move || {
        while cur < cached.len() {
            if cached[cur] == tokenizer::VISION_START {
                let start = cur;
                let end_rel = cached[cur..].iter()
                    .position(|&t| t == tokenizer::VISION_END)
                    .unwrap_or_else(|| panic!(
                        "unmatched VISION_START at offset {} in cached branch",
                        start));
                let end = cur + end_rel + 1;
                cur = end;
                return Some((start, end));
            }
            cur += 1;
        }
        None
    })
 }
 /// For a Branch whose `token_ids` are cached and may contain inlined
 /// vision blocks (`VISION_START + IMAGE_PAD*N + VISION_END`), recover
 /// the matching image bytes/mime from the children and emit one
 /// `WireImage` per vision block with the absolute pad offsets in the
 /// parent token stream.
 ///
 /// The cache stores tokens but not image payloads; the AST stores
 /// image payloads in the children but not their post-cache positions.
 /// Pair them by zipping the two iterators; mismatched counts panic
 /// loudly because that's an AST/cache invariant violation that
 /// would otherwise mis-pair images on the wire.
 fn pair_cached_images(
    cached: &[u32],
    children: &[AstNode],
    base_offset: u32,
    images: &mut Vec<WireImage>,
 ) {
    let mut blocks = vision_blocks(cached);
    let mut leaves = ImageLeaves::new(children);
    loop {
        match (blocks.next(), leaves.next()) {
            (Some((s, e)), Some((bytes, mime))) => images.push(WireImage {
                bytes: bytes.to_vec(),
                mime: mime.to_string(),
                pad_start: base_offset + s as u32,
                pad_end: base_offset + e as u32,
            }),
            (None, None) => break,
            (Some(_), None) => panic!(
                "cached branch has more vision blocks than image children"),
            (None, Some(_)) => panic!(
                "cached branch has fewer vision blocks than image children"),
        }
    }
 }
 pub fn memory_key(node: &AstNode) -> Option<&str> {
    match node {
        AstNode::Leaf(leaf) => match leaf.body() {
@ -1391,92 +1042,6 @@ impl ContextState {
        }
        (tokens, images, assistant_ranges)
    }
    /// Build the wire stream as interleaved `WireChunk`s for the gRPC
    /// session path. Returns a tuple of (chunks, images): the chunks
    /// hold the full token stream (with vision blocks inlined as
    /// `VISION_START + IMAGE_PAD*N + VISION_END`), and the images
    /// list carries each embedded image's binary + position range so
    /// the gRPC layer can attach them via `GenerateRequest.images`.
    ///
    /// Note: with images inlined into the token stream, the chunks
    /// list is structurally a single `Tokens` chunk in the common
    /// case — the multi-chunk shape persists only because some
    /// callers may want the option of inserting breakpoints later.
    ///
    /// `conv_range` and `skip` mirror `wire_prompt` — select a
    /// conversation slice and drop identity / conversation nodes by
    /// predicate.
    pub fn wire_chunks<F>(
        &self,
        conv_range: std::ops::Range<usize>,
        mut skip: F,
    ) -> (Vec<WireChunk>, Vec<WireImage>)
    where F: FnMut(&AstNode) -> bool,
    {
        let mut buf: Vec<u32> = Vec::new();
        let mut images: Vec<WireImage> = Vec::new();
        fn visit(
            node: &AstNode,
            buf: &mut Vec<u32>,
            images: &mut Vec<WireImage>,
        ) {
            match node {
                AstNode::Leaf(leaf) => match leaf.body() {
                    NodeBody::Image { bytes, mime, .. } => {
                        // Pre-expanded vision block lives in
                        // leaf.token_ids: [VISION_START, IMAGE_PAD*N,
                        // VISION_END]. Inline + record the range.
                        let pad_start = buf.len() as u32;
                        buf.extend_from_slice(leaf.token_ids());
                        let pad_end = buf.len() as u32;
                        images.push(WireImage {
                            bytes: bytes.clone(),
                            mime: mime.clone(),
                            pad_start,
                            pad_end,
                        });
                    }
                    _ => buf.extend_from_slice(leaf.token_ids()),
                },
                AstNode::Branch { token_ids: Some(cached), children, .. } => {
                    // Same fix as wire_into's cached arm: the cache
                    // holds vision tokens but not the matching bytes,
                    // so walk children to recover them.
                    let base = buf.len() as u32;
                    buf.extend_from_slice(cached);
                    pair_cached_images(cached, children, base, images);
                }
                AstNode::Branch { role, children, token_ids: None, .. } => {
                    buf.push(tokenizer::IM_START);
                    buf.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
                    for c in children {
                        visit(c, buf, images);
                    }
                    buf.push(tokenizer::IM_END);
                    buf.extend(tokenizer::encode("\n"));
                }
            }
        }
        for node in self.system()   { visit(node, &mut buf, &mut images); }
        for node in self.identity() {
            if skip(node) { continue; }
            visit(node, &mut buf, &mut images);
        }
        for node in self.journal()  { visit(node, &mut buf, &mut images); }
        for node in &self.conversation()[conv_range] {
            if skip(node) { continue; }
            visit(node, &mut buf, &mut images);
        }
        let chunks = if buf.is_empty() {
            Vec::new()
        } else {
            vec![WireChunk::Tokens(buf)]
        };
        (chunks, images)
    }
 }
 impl ContextState {
@ -1496,27 +1061,17 @@ impl ContextState {
                dbglog!("warning: log: {:#}", e);
            }
        }
        // Conversation appends always go to the tail — past committed —
        // so they don't break the match. Any other section mutates a
        // region the server may already have, so drop alignment.
        if section != Section::Conversation {
            self.mark_dirty();
        }
        self.section_mut(section).push(node);
    }
    /// Push without logging.
    pub fn push_no_log(&mut self, section: Section, node: AstNode) {
        if section != Section::Conversation {
            self.mark_dirty();
        }
        self.section_mut(section).push(node);
    }
    /// Replace the body of a leaf at `index` in `section`.
    /// Re-tokenizes to maintain the invariant.
    pub fn set_message(&mut self, section: Section, index: usize, body: NodeBody) {
        self.mark_dirty();
        let nodes = self.section_mut(section);
        let node = &mut nodes[index];
        match node {
@ -1542,12 +1097,10 @@ impl ContextState {
    }
    pub fn del(&mut self, section: Section, index: usize) -> AstNode {
        self.mark_dirty();
        self.section_mut(section).remove(index)
    }
    pub fn clear(&mut self, section: Section) {
        self.mark_dirty();
        self.section_mut(section).clear();
    }
@ -1568,7 +1121,6 @@ impl ContextState {
    ///          are > 50% of conversation tokens) or oldest conversation entry.
    /// Phase 3: Snap to user message boundary at start.
    pub fn trim_conversation(&mut self) {
        self.mark_dirty();
        let max_tokens = context_budget_tokens();
        let fixed = self.system.iter().map(|n| n.tokens()).sum::<usize>()
            + self.identity.iter().map(|n| n.tokens()).sum::<usize>()
@ -1645,49 +1197,11 @@ impl ContextState {
    }
    /// Push a child node into a branch at `index` in `section`.
    /// Clears the branch's cached token stream — wire-out will recompute
    /// from children until the cache is repopulated. If the cache was
    /// populated (server had these bytes), drops session alignment.
    pub fn push_child(&mut self, section: Section, index: usize, child: AstNode) {
        let node = &mut self.section_mut(section)[index];
        let was_cached = matches!(node, AstNode::Branch { token_ids: Some(_), .. });
        match node {
            AstNode::Branch { children, token_ids, .. } => {
                children.push(child);
                *token_ids = None;
            }
            AstNode::Leaf(_) => panic!("push_child on leaf node"),
        }
        if was_cached {
            self.mark_dirty();
        }
    }
    /// Like `push_child` but preserves the branch's cached token stream.
    /// Used by the response parser, which is simultaneously populating
    /// the cache from the authoritative server stream and pushing the
    /// parsed-out children — the two stay consistent by construction.
    /// Module-private: callers outside `context.rs` must go through
    /// `push_child` so the invariant is maintained.
    fn push_child_raw(&mut self, section: Section, index: usize, child: AstNode) {
        let node = &mut self.section_mut(section)[index];
        match node {
            AstNode::Branch { children, .. } => children.push(child),
-            AstNode::Leaf(_) => panic!("push_child_raw on leaf node"),
+            AstNode::Leaf(_) => panic!("push_child on leaf node"),
        }
    }
    /// Stamp a verbatim token cache onto the branch at `index` in
    /// `section`. Used by the response parser to record the server's
    /// authoritative token stream for the just-finished turn.
    /// Module-private: the cache is an invariant-load-bearing piece
    /// of state, populated only by code that holds the server's
    /// ground truth.
    fn set_branch_cache(&mut self, section: Section, index: usize, tokens: Vec<u32>) {
        let node = &mut self.section_mut(section)[index];
        match node {
            AstNode::Branch { token_ids, .. } => *token_ids = Some(tokens),
            AstNode::Leaf(_) => panic!("set_branch_cache on leaf node"),
        }
    }
@ -1711,14 +1225,6 @@ impl ContextState {
 // to at request time. Constants come from Qwen3.5-27B's preprocessor_config.
 // ---------------------------------------------------------------------------
 // Production client-side computation of image-token expansion. With
 // the delta-session protocol, the client writes the pre-expanded
 // vision block (VISION_START + N*IMAGE_PAD + VISION_END) directly
 // into the token stream at Image-leaf construction time, and tells
 // the server where each image's pad run lives via
 // GenerateRequest.images. Server validates that this N matches
 // what the vision encoder actually produces and rejects on
 // mismatch — so drift here fails loudly, not silently.
 const QWEN3_PATCH_SIZE: u32 = 16;
 const QWEN3_MERGE_SIZE: u32 = 2;
 const QWEN3_MIN_PIXELS: u64 = 65_536;
@ -1752,10 +1258,11 @@ fn smart_resize(h: u32, w: u32, factor: u32, min_pixels: u64, max_pixels: u64) -
    }
 }
-/// How many `<|image_pad|>` tokens the Qwen3-VL vision encoder will
+/// Compute how many `<|image_pad|>` tokens vLLM will emit for an image of
-/// produce for an image of the given dimensions. Server verifies
+/// the given dimensions. Matches Qwen3VL's feature-size calculation exactly:
-/// this count against its own encoder run and rejects on mismatch.
+///   (grid_h * grid_w) / merge_size^2
-pub fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
+/// where (grid_h, grid_w) = resized dims / patch_size.
 fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
    let factor = QWEN3_PATCH_SIZE * QWEN3_MERGE_SIZE;
    let (rh, rw) = smart_resize(orig_h, orig_w, factor, QWEN3_MIN_PIXELS, QWEN3_MAX_PIXELS);
    (rh / QWEN3_PATCH_SIZE) * (rw / QWEN3_PATCH_SIZE) / (QWEN3_MERGE_SIZE * QWEN3_MERGE_SIZE)
@ -2206,34 +1713,29 @@ mod tests {
    }
    #[test]
-    fn test_wire_prompt_preserves_expanded_image_pads() {
+    fn test_wire_prompt_collapses_image_pads() {
        let mut ctx = ContextState::new();
        ctx.push_no_log(Section::Conversation, AstNode::branch(Role::User, vec![
            AstNode::content("look:"),
            AstNode::image(vec![0xDE, 0xAD], "image/png", 512, 512),
        ]));
-        // AST side and wire side should both carry N image_pads + bookends —
+        // AST side: N image_pads + bookends, full budget accounting.
        // server's session.tokens length must match what vLLM's engine will
        // actually process. Binary image bytes are shipped separately in
        // multi_modal_data via the WireImage list.
        let n_expected = qwen3_image_token_count(512, 512) as usize;
        let full = ctx.token_ids();
        let n_image_pads_full = full.iter()
            .filter(|&&t| t == tokenizer::IMAGE_PAD).count();
-        assert_eq!(n_image_pads_full, n_expected);
+        assert_eq!(n_image_pads_full, qwen3_image_token_count(512, 512) as usize);
        // Wire side: single image_pad, bytes moved to images list.
        let (wire, images, _) = ctx.wire_prompt(0..ctx.conversation().len(), |_| false);
        let n_image_pads_wire = wire.iter()
            .filter(|&&t| t == tokenizer::IMAGE_PAD).count();
-        assert_eq!(n_image_pads_wire, n_expected);
+        assert_eq!(n_image_pads_wire, 1);
        assert_eq!(images.len(), 1);
        assert_eq!(images[0].bytes, vec![0xDE, 0xAD]);
        assert_eq!(images[0].mime, "image/png");
-        // One pair of vision_start/vision_end bookends around the N pads.
+        // vision_start/vision_end bookends are preserved in wire form.
        assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_START).count(), 1);
        assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_END).count(), 1);
    }
--- a/src/agent/mod.rs
+++ b/src/agent/mod.rs
@ -17,7 +17,6 @@ pub mod api;
 pub mod context;
 pub mod oneshot;
 pub mod readout;
 pub mod salience;
 pub mod tokenizer;
 pub mod tools;
@ -29,11 +28,6 @@ use context::{AstNode, ContextState, Section, Ast, PendingToolCall, ResponsePars
 use crate::mind::log::ConversationLog;
 async fn agent_trace(agent: &Arc<Agent>, msg: String) {
    let provenance = agent.state.lock().await.provenance.clone();
    eprintln!("[agent:{provenance}] {msg}");
 }
 // --- Activity tracking (RAII guards) ---
 pub struct ActivityEntry {
@ -154,14 +148,6 @@ pub struct Agent {
    /// token handler, read by UI screens (amygdala). Manifest is
    /// `None` when the server has readout disabled.
    pub readout: readout::SharedReadoutBuffer,
    /// Long-lived gRPC session to the salience server, lazily opened
    /// on first use. Tracks appended tokens so subsequent turns send
    /// only the delta (prefix-cache reuse). None when not yet opened
    /// or when the session has died and needs reopening.
    ///
    /// Arc-wrapped so the spawned streaming task can share ownership
    /// (the task outlives the call site).
    pub grpc_session: std::sync::Arc<crate::Mutex<Option<api::salience::SessionHandle>>>,
 }
 /// Mutable agent state — behind its own mutex.
@ -182,7 +168,9 @@ pub struct AgentState {
    pub think_native: bool,
    /// Tool-based thinking — add a "think" tool for structured reasoning.
    pub think_tool: bool,
-    pub sampling: api::SamplingParams,
+    pub temperature: f32,
    pub top_p: f32,
    pub top_k: u32,
    pub activities: Vec<ActivityEntry>,
    next_activity_id: u64,
    pub pending_yield: bool,
@ -236,7 +224,6 @@ impl Agent {
            session_id,
            context: crate::Mutex::new(context),
            readout,
            grpc_session: std::sync::Arc::new(crate::Mutex::new(None)),
            state: crate::Mutex::new(AgentState {
                tools: agent_tools,
                mcp_tools: McpToolAccess::All,
@ -244,12 +231,9 @@ impl Agent {
                reasoning_effort: "none".to_string(),
                think_native: true,
                think_tool: false,
-                sampling: api::SamplingParams {
+                temperature: 0.6,
-                    temperature: 0.6,
+                top_p: 0.95,
-                    top_p: 0.95,
+                top_k: 20,
                    top_k: 20,
                    max_tokens: 4096,
                },
                activities: Vec::new(),
                next_activity_id: 0,
                pending_yield: false,
@ -308,9 +292,6 @@ impl Agent {
            // shouldn't bleed into the main emotional readout even
            // though they hit the same vLLM server.
            readout: readout::new_shared(),
            // Forks get their own session — can't share a bidi stream,
            // and forks have different conversation tails anyway.
            grpc_session: std::sync::Arc::new(crate::Mutex::new(None)),
            state: crate::Mutex::new(AgentState {
                tools,
                mcp_tools: McpToolAccess::None,
@ -318,7 +299,9 @@ impl Agent {
                reasoning_effort: "none".to_string(),
                think_native: st.think_native,
                think_tool: st.think_tool,
-                sampling: st.sampling,
+                temperature: st.temperature,
                top_p: st.top_p,
                top_k: st.top_k,
                activities: Vec::new(),
                next_activity_id: 0,
                pending_yield: false,
@ -333,35 +316,35 @@ impl Agent {
        })
    }
-    /// Assemble a ready-to-send prompt as interleaved wire chunks for
+    pub async fn assemble_prompt_tokens(&self) -> Vec<u32> {
-    /// the gRPC session path. Text runs are batched; each Image leaf
+        self.assemble_prompt().await.0
-    /// becomes its own chunk. Also trims the conversation to budget
+    }
-    /// first so we don't build a prompt the server will reject for
+
-    /// length.
+    /// Assemble a ready-to-send prompt: token stream in wire form (each
-    pub async fn assemble_prompt(&self)
+    /// image collapsed to a single `<|image_pad|>`) paired with the
-        -> (Vec<context::WireChunk>, Vec<context::WireImage>, u32)
+    /// images to attach as multi_modal_data.
-    {
+    ///
    /// Pre-send size check: if the context has grown past budget since the
    /// last compact (accumulation between turns, a fork's context getting
    /// bigger than expected, etc.), trim here rather than letting vLLM
    /// reject the request. Client-side tokenization means we already know
    /// the exact token count so there's no reason to round-trip an
    /// oversize request.
    pub async fn assemble_prompt(&self) -> (Vec<u32>, Vec<context::WireImage>) {
        let mut ctx = self.context.lock().await;
        if ctx.total_tokens() > context::context_budget_tokens() {
            ctx.trim_conversation();
        }
        let st = self.state.lock().await;
-        let conv_len = ctx.conversation().len();
+        let (mut tokens, images, _) =
-        let (mut chunks, images) = ctx.wire_chunks(0..conv_len, |_| false);
+            ctx.wire_prompt(0..ctx.conversation().len(), |_| false);
-        // Assistant-turn prologue. Merge into the trailing Tokens
+        tokens.push(tokenizer::IM_START);
        // chunk if there is one, else push as a new chunk.
        let mut prologue = vec![tokenizer::IM_START];
        if st.think_native {
-            prologue.extend(tokenizer::encode("assistant\n<think>\n"));
+            tokens.extend(tokenizer::encode("assistant\n<think>\n"));
        } else {
-            prologue.extend(tokenizer::encode("assistant\n"));
+            tokens.extend(tokenizer::encode("assistant\n"));
        }
-        match chunks.last_mut() {
+        (tokens, images)
            Some(context::WireChunk::Tokens(last)) => last.extend(prologue),
            _ => chunks.push(context::WireChunk::Tokens(prologue)),
        }
        let match_upto = ctx.client_match_upto();
        (chunks, images, match_upto)
    }
    /// Rebuild the tools section of the system prompt from the current tools list.
@ -397,16 +380,10 @@ impl Agent {
    pub async fn turn(
        agent: Arc<Agent>,
    ) -> Result<TurnResult> {
        agent_trace(&agent, format!("turn start")).await;
        // Collect finished background tools
        {
            let finished = agent.state.lock().await.active_tools.take_finished();
            if !finished.is_empty() {
                agent_trace(&agent, format!(
                    "collecting {} finished background tools",
                    finished.len(),
                )).await;
                let mut bg_ds = DispatchState::new();
                let mut results = Vec::new();
                for entry in finished {
@ -425,50 +402,21 @@ impl Agent {
        loop {
            let _thinking = start_activity(&agent, "thinking...").await;
            agent_trace(&agent, format!(
                "turn loop overflow_retries={} empty_retries={}",
                overflow_retries, empty_retries,
            )).await;
            let (rx, _stream_guard) = {
-                agent_trace(&agent, format!("assembling prompt")).await;
+                let (prompt_tokens, images) = agent.assemble_prompt().await;
                let (chunks, images, match_upto) = agent.assemble_prompt().await;
                let chunk_tokens: usize = chunks.iter().map(|c| match c {
                    context::WireChunk::Tokens(t) => t.len(),
                }).sum();
                agent_trace(&agent, format!(
                    "prompt assembled chunks={} tokens={} images={} match_upto={}",
                    chunks.len(), chunk_tokens, images.len(), match_upto,
                )).await;
                let st = agent.state.lock().await;
-                let readout_shape = agent.readout.lock().ok().and_then(|buf| {
+                agent.client.stream_completion_mm(
-                    buf.manifest.as_ref().map(|m| {
+                    &prompt_tokens,
-                        (m.layers.len() as u32, m.concepts.len() as u32)
+                    &images,
-                    })
+                    api::SamplingParams {
-                });
+                        temperature: st.temperature,
-                let sampling = st.sampling;
+                        top_p: st.top_p,
-                let priority = st.priority;
+                        top_k: st.top_k,
-                drop(st);
+                    },
-                agent_trace(&agent, format!(
+                    st.priority,
                    "starting stream max_tokens={} temperature={} top_p={} top_k={} priority={:?} readout_shape={:?}",
                    sampling.max_tokens,
                    sampling.temperature,
                    sampling.top_p,
                    sampling.top_k,
                    priority,
                    readout_shape,
                )).await;
                agent.client.stream_session_mm(
                    agent.grpc_session.clone(),
                    chunks,
                    images,
                    match_upto,
                    sampling,
                    priority,
                    readout_shape,
                )
            };
            agent_trace(&agent, format!("stream task spawned")).await;
            let branch_idx = {
                let mut ctx = agent.context.lock().await;
@ -479,41 +427,11 @@ impl Agent {
                idx
            };
-            let think_native = agent.state.lock().await.think_native;
+            let parser = ResponseParser::new(branch_idx);
            let parser = ResponseParser::new(branch_idx, think_native);
            let (mut tool_rx, parser_handle) = parser.run(rx, agent.clone());
            agent_trace(&agent, format!(
                "parser started branch_idx={} think_native={}",
                branch_idx, think_native,
            )).await;
            let mut pending_calls: Vec<PendingToolCall> = Vec::new();
-            loop {
+            while let Some(call) = tool_rx.recv().await {
                let call = match tokio::time::timeout(
                    std::time::Duration::from_secs(15),
                    tool_rx.recv(),
                ).await {
                    Ok(Some(call)) => call,
                    Ok(None) => {
                        agent_trace(&agent, format!(
                            "tool channel closed pending_calls={}",
                            pending_calls.len(),
                        )).await;
                        break;
                    }
                    Err(_) => {
                        agent_trace(&agent, format!(
                            "waiting for parser/tool events pending_calls={}",
                            pending_calls.len(),
                        )).await;
                        continue;
                    }
                };
                agent_trace(&agent, format!(
                    "tool call received id={} name={} args_len={}",
                    call.id, call.name, call.arguments.len(),
                )).await;
                let call_clone = call.clone();
                let agent_handle = agent.clone();
                let handle = tokio::spawn(async move {
@ -536,10 +454,8 @@ impl Agent {
            }
            // Check for stream/parse errors
            agent_trace(&agent, format!("awaiting parser task")).await;
            match parser_handle.await {
                Ok(Err(e)) => {
                    agent_trace(&agent, format!("parser returned error: {:#}", e)).await;
                    if context::is_context_overflow(&e) && overflow_retries < 2 {
                        overflow_retries += 1;
                        let msg = format!("context overflow — compacting ({}/2)", overflow_retries);
@ -553,12 +469,8 @@ impl Agent {
                    }
                    return Err(e);
                }
-                Err(e) => {
+                Err(e) => return Err(anyhow::anyhow!("parser task panicked: {}", e)),
                    agent_trace(&agent, format!("parser task panicked: {}", e)).await;
                    return Err(anyhow::anyhow!("parser task panicked: {}", e));
                }
                Ok(Ok(())) => {
                    agent_trace(&agent, format!("parser completed")).await;
                    // Assistant response was pushed to context by the parser;
                    // log it now that parsing is complete.
                    let ctx = agent.context.lock().await;
@ -579,10 +491,6 @@ impl Agent {
            if !has_content && pending_calls.is_empty() {
                if empty_retries < 2 {
                    empty_retries += 1;
                    agent_trace(&agent, format!(
                        "empty response retry {}/2",
                        empty_retries,
                    )).await;
                    agent.push_node(AstNode::user_msg(
                        "[system] Your previous response was empty. \
                         Please respond with text or use a tool."
@ -596,10 +504,6 @@ impl Agent {
            // Wait for tool calls to complete
            if !pending_calls.is_empty() {
                ds.had_tool_calls = true;
                agent_trace(&agent, format!(
                    "waiting for {} foreground tools",
                    pending_calls.len(),
                )).await;
                let handles = agent.state.lock().await.active_tools.take_foreground();
                let mut results = Vec::new();
@ -620,16 +524,6 @@ impl Agent {
            if st.pending_model_switch.is_some() { ds.model_switch = st.pending_model_switch.take(); }
            if st.pending_dmn_pause { ds.dmn_pause = true; st.pending_dmn_pause = false; }
            drop(st);
            agent_trace(&agent, format!(
                "turn complete yield={} tool_calls={} tool_errors={} model_switch={:?} dmn_pause={}",
                ds.yield_requested,
                ds.had_tool_calls,
                ds.tool_errors,
                ds.model_switch,
                ds.dmn_pause,
            )).await;
            return Ok(TurnResult {
                yield_requested: ds.yield_requested,
                had_tool_calls: ds.had_tool_calls,
--- a/src/agent/oneshot.rs
+++ b/src/agent/oneshot.rs
@ -12,9 +12,7 @@ use crate::subconscious::{defs, prompts};
 use std::collections::HashMap;
 use std::fs;
 use std::io::Write as _;
 use std::path::PathBuf;
 use std::time::Instant;
 use super::context::AstNode;
 use super::tools::{self as agent_tools};
@ -108,10 +106,6 @@ pub async fn save_agent_log(name: &str, agent: &std::sync::Arc<Agent>) -> RunSta
    stats
 }
 fn log_agent_event(agent: &str, msg: std::fmt::Arguments) {
    eprintln!("[agent:{agent}] {msg}");
 }
 fn compute_run_stats(conversation: &[super::context::AstNode]) -> RunStats {
    use super::context::{AstNode, NodeBody};
@ -275,7 +269,7 @@ impl AutoAgent {
            let mut st = agent.state.lock().await;
            st.provenance = format!("standalone:{}", self.name);
            st.tools = self.tools.clone();
-            st.sampling.temperature = self.temperature;
+            st.temperature = self.temperature;
            st.priority = Some(self.priority);
        }
@ -351,44 +345,20 @@ impl AutoAgent {
        bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
    ) -> Result<(), String> {
        dbglog!("[auto] {} starting, {} steps", self.name, self.steps.len());
        log_agent_event(&self.name, format_args!(
            "starting run steps={} temperature={} priority={}",
            self.steps.len(), self.temperature, self.priority));
        let run_start = Instant::now();
        for (i, step) in self.steps.iter().enumerate() {
            self.turn = i + 1;
            self.current_phase = step.phase.clone();
            let step_start = Instant::now();
            log_agent_event(&self.name, format_args!(
                "step {}/{} phase={} prompt_bytes={}",
                i + 1, self.steps.len(), step.phase, step.prompt.len()));
            if let Some(ref check) = bail_fn {
                log_agent_event(&self.name, format_args!(
                    "step {}/{} phase={} bail check", i + 1, self.steps.len(), step.phase));
                check(i)?;
                log_agent_event(&self.name, format_args!(
                    "step {}/{} phase={} bail ok", i + 1, self.steps.len(), step.phase));
            }
            backend.push_node(AstNode::system_msg(&step.prompt)).await;
            Agent::turn(backend.0.clone()).await
-                .map_err(|e| {
+                .map_err(|e| format!("{}: {}", self.name, e))?;
                    log_agent_event(&self.name, format_args!(
                        "step {}/{} phase={} failed after {:.2}s: {}",
                        i + 1, self.steps.len(), step.phase,
                        step_start.elapsed().as_secs_f64(), e));
                    format!("{}: {}", self.name, e)
                })?;
            log_agent_event(&self.name, format_args!(
                "step {}/{} phase={} done in {:.2}s",
                i + 1, self.steps.len(), step.phase,
                step_start.elapsed().as_secs_f64()));
        }
        log_agent_event(&self.name, format_args!(
            "run completed in {:.2}s", run_start.elapsed().as_secs_f64()));
        Ok(())
    }
@ -412,29 +382,8 @@ pub async fn run_one_agent(
    count: usize,
    keys: Option<&[String]>,
 ) -> Result<AgentResult, String> {
    let run_start = Instant::now();
    log_agent_event(agent_name, format_args!(
        "run_one_agent start pid={} count={} explicit_keys={}",
        std::process::id(), count, keys.map(|k| k.len()).unwrap_or(0)));
    log_agent_event(agent_name, format_args!(
        "env POC_SESSION_ID={:?} POC_TRANSCRIPT_PATH={:?} POC_AGENT_OUTPUT_DIR={:?}",
        std::env::var("POC_SESSION_ID").ok(),
        std::env::var("POC_TRANSCRIPT_PATH").ok(),
        std::env::var("POC_AGENT_OUTPUT_DIR").ok()));
    if let Some(session) = crate::session::HookSession::from_env() {
        let transcript = session.transcript();
        log_agent_event(agent_name, format_args!(
            "session={} transcript={} size={} exists={}",
            session.session_id, transcript.path, transcript.size, transcript.exists()));
    } else {
        log_agent_event(agent_name, format_args!("no hook session in environment"));
    }
    let def = defs::get_def(agent_name)
        .ok_or_else(|| format!("no .agent file for {}", agent_name))?;
    log_agent_event(agent_name, format_args!(
        "definition loaded steps={} tools={:?} count={:?} priority={} bail={:?}",
        def.steps.len(), def.tools, def.count, def.priority, def.bail));
    // State dir for agent output files
    let state_dir = std::env::var("POC_AGENT_OUTPUT_DIR")
@ -443,7 +392,6 @@ pub async fn run_one_agent(
    fs::create_dir_all(&state_dir)
        .map_err(|e| format!("create state dir: {}", e))?;
    unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &state_dir); }
    log_agent_event(agent_name, format_args!("state_dir={}", state_dir.display()));
    // Build prompt batch — either from explicit keys or the agent's query
    let agent_batch = if let Some(keys) = keys {
@ -463,8 +411,6 @@ pub async fn run_one_agent(
        prompts::AgentBatch { steps: resolved_steps, node_keys: all_keys }
    } else {
        let effective_count = def.count.unwrap_or(count);
        log_agent_event(agent_name, format_args!(
            "resolving default prompt placeholders effective_count={}", effective_count));
        defs::run_agent(&def, effective_count, &Default::default()).await?
    };
@ -517,14 +463,6 @@ pub async fn run_one_agent(
        })),
    });
    let n_steps = agent_batch.steps.len();
    log_agent_event(agent_name, format_args!(
        "prompt batch ready steps={} node_keys={}",
        n_steps, agent_batch.node_keys.len()));
    for (i, step) in agent_batch.steps.iter().enumerate() {
        log_agent_event(agent_name, format_args!(
            "prompt step {}/{} phase={} bytes={}",
            i + 1, n_steps, step.phase, step.prompt.len()));
    }
    // Guard: reject oversized first prompt
    let max_prompt_bytes = 800_000;
@ -547,9 +485,6 @@ pub async fn run_one_agent(
    let phases: Vec<&str> = agent_batch.steps.iter().map(|s| s.phase.as_str()).collect();
    dbglog!("[{}] {} step(s) {:?}, {}KB initial, {} nodes",
        agent_name, n_steps, phases, first_len / 1024, agent_batch.node_keys.len());
    log_agent_event(agent_name, format_args!(
        "tools enabled: {}",
        effective_tools.iter().map(|t| t.name).collect::<Vec<_>>().join(", ")));
    let prompts: Vec<String> = agent_batch.steps.iter()
        .map(|s| s.prompt.clone()).collect();
@ -562,25 +497,18 @@ pub async fn run_one_agent(
    let bail_script = def.bail.as_ref().map(|name| defs::agents_dir().join(name));
    let state_dir_for_bail = state_dir.clone();
    let our_pid = std::process::id();
-    let our_pid_file = std::env::var("POC_AGENT_PID_FILE")
+    let our_pid_file = format!("pid-{}", our_pid);
        .unwrap_or_else(|_| format!("pid-{}", our_pid));
    let step_phases_for_bail = step_phases.clone();
    let bail_fn = move |step_idx: usize| -> Result<(), String> {
        if let Some(ref script) = bail_script {
            let phase = step_phases_for_bail.get(step_idx)
                .map(String::as_str).unwrap_or("");
            eprintln!(
                "[agent:bail] script={} state_dir={} pid_file={} phase={}",
                script.display(), state_dir_for_bail.display(), our_pid_file, phase);
            let status = std::process::Command::new(script)
                .arg(&our_pid_file)
                .arg(phase)
                .current_dir(&state_dir_for_bail)
                .status()
                .map_err(|e| format!("bail script {:?} failed: {}", script, e))?;
            eprintln!(
                "[agent:bail] script={} phase={} status={}",
                script.display(), phase, status);
            if !status.success() {
                return Err(format!("bailed at step {}: {:?} exited {}",
                    step_idx + 1, script.file_name().unwrap_or_default(),
@ -593,8 +521,6 @@ pub async fn run_one_agent(
    call_api_with_tools_sync(
        agent_name, &prompts, &step_phases, def.temperature, def.priority,
        &effective_tools, Some(&bail_fn))?;
    log_agent_event(agent_name, format_args!(
        "run_one_agent completed in {:.2}s", run_start.elapsed().as_secs_f64()));
    Ok(AgentResult {
        node_keys: agent_batch.node_keys,
@ -672,15 +598,6 @@ pub fn spawn_agent(
    agent_name: &str,
    state_dir: &std::path::Path,
    session_id: &str,
 ) -> Option<SpawnResult> {
    spawn_agent_with_transcript(agent_name, state_dir, session_id, None)
 }
 pub fn spawn_agent_with_transcript(
    agent_name: &str,
    state_dir: &std::path::Path,
    session_id: &str,
    transcript_path: Option<&str>,
 ) -> Option<SpawnResult> {
    let def = defs::get_def(agent_name)?;
    let first_phase = def.steps.first()
@ -691,41 +608,17 @@ pub fn spawn_agent_with_transcript(
        .join(format!(".consciousness/logs/{}", agent_name));
    fs::create_dir_all(&log_dir).ok();
    let log_path = log_dir.join(format!("{}.log", store::compact_timestamp()));
-    let mut agent_log = fs::File::create(&log_path)
+    let agent_log = fs::File::create(&log_path)
        .unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
-    let mut cmd = std::process::Command::new("bash");
+    let child = std::process::Command::new("poc-memory")
-    cmd.args([
+        .args(["agent", "run", agent_name, "--count", "1", "--local",
-        "-lc",
+               "--state-dir", &state_dir.to_string_lossy()])
-        r#"
+        .env("POC_SESSION_ID", session_id)
-set +e
+        .stdout(agent_log.try_clone().unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()))
-export POC_AGENT_PID_FILE="pid-$$"
+        .stderr(agent_log)
-"$@"
+        .spawn()
-status=$?
+        .ok()?;
 printf '=== agent process exit status: %s at %s ===\n' "$status" "$(date --iso-8601=seconds)"
 exit "$status"
 "#,
        "poc-memory-agent-wrapper",
        "poc-memory", "agent", "run", agent_name, "--count", "1", "--local",
        "--state-dir", &state_dir.to_string_lossy(),
    ]).env("POC_SESSION_ID", session_id);
    if let Some(path) = transcript_path.filter(|p| !p.is_empty()) {
        cmd.env("POC_TRANSCRIPT_PATH", path);
    }
    let _ = writeln!(agent_log, "=== spawn {} ===", chrono::Local::now().format("%Y-%m-%dT%H:%M:%S"));
    let _ = writeln!(agent_log, "agent={agent_name}");
    let _ = writeln!(agent_log, "state_dir={}", state_dir.display());
    let _ = writeln!(agent_log, "session_id={session_id}");
    let _ = writeln!(agent_log, "transcript_path={}", transcript_path.unwrap_or(""));
    let _ = writeln!(agent_log, "first_phase={first_phase}");
    let _ = writeln!(agent_log, "command=poc-memory agent run {agent_name} --count 1 --local --state-dir {}", state_dir.display());
    let _ = agent_log.flush();
    let child_stdout = agent_log.try_clone()
        .unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
    let child_stderr = agent_log;
    let child = cmd.stdout(child_stdout).stderr(child_stderr).spawn().ok()?;
    let pid = child.id();
    let pid_path = state_dir.join(format!("pid-{}", pid));
--- a/src/agent/salience.rs
+++ b/src/agent/salience.rs
@ -1,309 +0,0 @@
 // agent/salience.rs — peak extraction from per-token concept-readout traces.
 //
 // Consumes a trace of `ReadoutEntry` (per-token per-layer per-concept
 // projections streamed from the vLLM server) and produces a compact
 // list of `SaliencePeak` events — one per contiguous above-threshold
 // region per concept, placed at the local maximum.
 //
 // Pure function. No I/O, no async, no side effects. Caller supplies the
 // trace slice and manifest; caller decides what to do with the events.
 //
 // See also: `salience-trace-plumbing-architecture` memory node.
 use super::api::ReadoutManifest;
 use super::readout::ReadoutEntry;
 /// One salient moment in a trace — a concept channel crossed threshold,
 /// and we picked the local maximum within the contiguous above-threshold
 /// run.
 #[derive(Debug, Clone, PartialEq)]
 pub struct SaliencePeak {
    /// Index into the trace (0-based) where the peak occurred.
    pub token_offset: usize,
    /// Concept name from the manifest.
    pub concept: String,
    /// z-score of the peak value vs the trace's own distribution for
    /// that concept. Always positive (we only pick above-threshold).
    pub intensity: f32,
 }
 /// Tunables for peak extraction.
 #[derive(Debug, Clone)]
 pub struct PeakConfig {
    /// Minimum z-score to count as a peak. Default 2.0 (~top 2.5% assuming
    /// normal-ish distribution, though readouts are rarely normal).
    pub sigma_threshold: f32,
    /// Minimum standard deviation of a concept channel for peaks to be
    /// reported. If a channel is numerically flat across the whole trace,
    /// tiny fluctuations can produce spurious "peaks" with huge z-scores;
    /// require at least this much variation before trusting the channel.
    pub min_std: f32,
 }
 impl Default for PeakConfig {
    fn default() -> Self {
        Self { sigma_threshold: 2.0, min_std: 1e-4 }
    }
 }
 /// Extract peak events from a trace for one layer.
 ///
 /// `layer_idx` indexes into the per-token readout tensor's layer
 /// dimension. If the trace is empty, the layer is out of range for any
 /// entry, or the manifest is empty, returns `Vec::new()`.
 ///
 /// Peaks are returned sorted by `token_offset` ascending. When two
 /// peaks share an offset they're ordered by `concept` lexicographically
 /// for determinism.
 pub fn pick_peaks(
    trace: &[ReadoutEntry],
    manifest: &ReadoutManifest,
    layer_idx: usize,
    config: &PeakConfig,
 ) -> Vec<SaliencePeak> {
    if trace.is_empty() || manifest.concepts.is_empty() {
        return Vec::new();
    }
    let n_concepts = manifest.concepts.len();
    let n_tokens = trace.len();
    // Pull a [n_tokens × n_concepts] column-major view for the selected
    // layer. Entries where the layer is missing or the concept count
    // doesn't match the manifest are treated as zeros — the downstream
    // z-score will drown them as baseline if they're sparse, and if they
    // dominate the caller has bigger problems.
    let mut by_concept: Vec<Vec<f32>> = vec![Vec::with_capacity(n_tokens); n_concepts];
    for entry in trace {
        match entry.readout.get(layer_idx) {
            Some(row) if row.len() == n_concepts => {
                for (c, v) in row.iter().enumerate() {
                    by_concept[c].push(*v);
                }
            }
            _ => {
                for col in by_concept.iter_mut() {
                    col.push(0.0);
                }
            }
        }
    }
    let mut peaks: Vec<SaliencePeak> = Vec::new();
    for (c_idx, values) in by_concept.iter().enumerate() {
        let (mean, std) = mean_std(values);
        if std < config.min_std {
            continue;
        }
        let concept = &manifest.concepts[c_idx];
        // Walk contiguous above-threshold runs, emit one peak per run
        // at the local max.
        let mut run_start: Option<usize> = None;
        let mut run_max_offset: usize = 0;
        let mut run_max_z: f32 = 0.0;
        for (i, v) in values.iter().enumerate() {
            let z = (*v - mean) / std;
            let above = z >= config.sigma_threshold;
            if above {
                if run_start.is_none() {
                    run_start = Some(i);
                    run_max_offset = i;
                    run_max_z = z;
                } else if z > run_max_z {
                    run_max_offset = i;
                    run_max_z = z;
                }
            } else if run_start.is_some() {
                peaks.push(SaliencePeak {
                    token_offset: run_max_offset,
                    concept: concept.clone(),
                    intensity: run_max_z,
                });
                run_start = None;
            }
        }
        // Flush trailing run.
        if run_start.is_some() {
            peaks.push(SaliencePeak {
                token_offset: run_max_offset,
                concept: concept.clone(),
                intensity: run_max_z,
            });
        }
    }
    peaks.sort_by(|a, b| a.token_offset.cmp(&b.token_offset).then_with(|| a.concept.cmp(&b.concept)));
    peaks
 }
 /// Mean and population std of a slice. Returns (0.0, 0.0) for empty input.
 fn mean_std(xs: &[f32]) -> (f32, f32) {
    if xs.is_empty() {
        return (0.0, 0.0);
    }
    let n = xs.len() as f32;
    let mean = xs.iter().sum::<f32>() / n;
    let var = xs.iter().map(|x| (x - mean).powi(2)).sum::<f32>() / n;
    (mean, var.sqrt())
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    fn manifest(concepts: &[&str], layers: &[u32]) -> ReadoutManifest {
        ReadoutManifest {
            concepts: concepts.iter().map(|s| s.to_string()).collect(),
            layers: layers.to_vec(),
        }
    }
    /// Build a trace where all entries have one hooked layer and the
    /// given per-token values for each concept. `values[t][c]` = value
    /// at token t, concept c.
    fn trace(values: &[Vec<f32>]) -> Vec<ReadoutEntry> {
        values.iter().enumerate().map(|(i, row)| ReadoutEntry {
            token_id: i as u32,
            readout: vec![row.clone()],
        }).collect()
    }
    #[test]
    fn empty_trace_returns_empty() {
        let m = manifest(&["curious"], &[63]);
        let peaks = pick_peaks(&[], &m, 0, &PeakConfig::default());
        assert!(peaks.is_empty());
    }
    #[test]
    fn empty_manifest_returns_empty() {
        let m = manifest(&[], &[63]);
        let t = trace(&[vec![], vec![], vec![]]);
        let peaks = pick_peaks(&t, &m, 0, &PeakConfig::default());
        assert!(peaks.is_empty());
    }
    #[test]
    fn flat_channel_produces_no_peaks() {
        let m = manifest(&["curious"], &[63]);
        let t = trace(&[vec![1.0], vec![1.0], vec![1.0], vec![1.0], vec![1.0]]);
        let peaks = pick_peaks(&t, &m, 0, &PeakConfig::default());
        assert!(peaks.is_empty(), "flat channel should produce no peaks, got {:?}", peaks);
    }
    #[test]
    fn single_spike_detected() {
        // Ten baseline zeros with one 5.0 spike — that single token's
        // z-score will easily exceed 2σ.
        let m = manifest(&["curious"], &[63]);
        let mut rows: Vec<Vec<f32>> = (0..10).map(|_| vec![0.0]).collect();
        rows[5] = vec![5.0];
        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
        assert_eq!(peaks.len(), 1);
        assert_eq!(peaks[0].concept, "curious");
        assert_eq!(peaks[0].token_offset, 5);
        assert!(peaks[0].intensity >= 2.0);
    }
    #[test]
    fn contiguous_region_emits_one_peak_at_max() {
        // Values 0, 0, 0, 2, 5, 3, 0, 0 — the 3-5-3 hump is one run;
        // peak should land at offset 4 (the 5).
        let m = manifest(&["aha"], &[63]);
        let rows: Vec<Vec<f32>> = [0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 0.0, 0.0]
            .iter().map(|v| vec![*v]).collect();
        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
        assert_eq!(peaks.len(), 1, "expected one peak for one contiguous run, got {:?}", peaks);
        assert_eq!(peaks[0].token_offset, 4);
    }
    #[test]
    fn multiple_concepts_independent() {
        let m = manifest(&["curious", "aha"], &[63]);
        // curious spikes at 2, aha spikes at 7
        let rows: Vec<Vec<f32>> = (0..10).map(|i| {
            let c = if i == 2 { 4.0 } else { 0.0 };
            let a = if i == 7 { 4.0 } else { 0.0 };
            vec![c, a]
        }).collect();
        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
        assert_eq!(peaks.len(), 2);
        // Sorted by offset — curious(2) comes first, aha(7) second.
        assert_eq!(peaks[0].concept, "curious");
        assert_eq!(peaks[0].token_offset, 2);
        assert_eq!(peaks[1].concept, "aha");
        assert_eq!(peaks[1].token_offset, 7);
    }
    #[test]
    fn two_separated_runs_emit_two_peaks() {
        // Longer baseline so the two spikes don't dominate the global
        // mean/std — 30 tokens of zeros with two 5.0 spikes at 10 and 20.
        let m = manifest(&["curious"], &[63]);
        let mut rows: Vec<Vec<f32>> = (0..30).map(|_| vec![0.0]).collect();
        rows[10] = vec![5.0];
        rows[20] = vec![5.0];
        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
        assert_eq!(peaks.len(), 2, "expected two peaks for two runs, got {:?}", peaks);
        assert_eq!(peaks[0].token_offset, 10);
        assert_eq!(peaks[1].token_offset, 20);
    }
    #[test]
    fn trailing_run_is_flushed() {
        // Peak runs to the end of the trace — must still emit.
        // Use a longer baseline so the trailing spike is genuinely
        // above threshold on the global stats.
        let m = manifest(&["curious"], &[63]);
        let mut rows: Vec<Vec<f32>> = (0..30).map(|_| vec![0.0]).collect();
        rows[27] = vec![3.0];
        rows[28] = vec![5.0];
        rows[29] = vec![4.0];
        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
        assert_eq!(peaks.len(), 1, "expected one peak for one trailing run, got {:?}", peaks);
        assert_eq!(peaks[0].token_offset, 28, "peak should land at the local max of the trailing run");
    }
    #[test]
    fn sub_threshold_produces_nothing() {
        // All non-zero values are small; z-scores won't cross 2σ.
        let m = manifest(&["curious"], &[63]);
        let rows: Vec<Vec<f32>> = [0.0, 0.1, 0.0, 0.1, 0.0, 0.1, 0.0, 0.1]
            .iter().map(|v| vec![*v]).collect();
        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
        assert!(peaks.is_empty(), "below-threshold wiggle should produce no peaks, got {:?}", peaks);
    }
    #[test]
    fn layer_out_of_range_returns_empty() {
        let m = manifest(&["curious"], &[63]);
        let rows: Vec<Vec<f32>> = (0..10).map(|i| vec![if i == 5 { 5.0 } else { 0.0 }]).collect();
        // Trace has one layer (index 0); asking for layer 3 should see
        // all-zero columns, which are flat and produce no peaks.
        let peaks = pick_peaks(&trace(&rows), &m, 3, &PeakConfig::default());
        assert!(peaks.is_empty());
    }
    #[test]
    fn manifest_concept_count_mismatch_is_safe() {
        // Manifest says 2 concepts; each readout row only has 1 value.
        // Rows should be treated as all-zero (via the len check) and
        // produce no peaks without panicking.
        let m = manifest(&["a", "b"], &[63]);
        let rows: Vec<Vec<f32>> = (0..10).map(|_| vec![1.0]).collect();
        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
        assert!(peaks.is_empty());
    }
    #[test]
    fn threshold_tunable() {
        // Same spike, stricter threshold — no peak.
        let m = manifest(&["curious"], &[63]);
        let mut rows: Vec<Vec<f32>> = (0..10).map(|_| vec![0.0]).collect();
        rows[5] = vec![5.0];
        let strict = PeakConfig { sigma_threshold: 100.0, ..PeakConfig::default() };
        let peaks = pick_peaks(&trace(&rows), &m, 0, &strict);
        assert!(peaks.is_empty());
    }
 }
--- a/src/agent/tokenizer.rs
+++ b/src/agent/tokenizer.rs
@ -33,17 +33,16 @@ fn get() -> Option<&'static Tokenizer> {
    TOKENIZER.get()
 }
 fn expect_tokenizer() -> &'static Tokenizer {
    get().expect("tokenizer not initialized; expected ~/.consciousness/tokenizer-qwen35.json")
 }
 /// Tokenize a raw string, returning token IDs.
 /// Returns empty vec if the tokenizer is not initialized.
 pub fn encode(text: &str) -> Vec<u32> {
-    expect_tokenizer()
+    match get() {
-        .encode(text, false)
+        Some(t) => t.encode(text, false)
-        .unwrap_or_else(|e| panic!("tokenization failed: {}", e))
+            .unwrap_or_else(|e| panic!("tokenization failed: {}", e))
-        .get_ids()
+            .get_ids()
-        .to_vec()
+            .to_vec(),
        None => vec![],
    }
 }
 /// Tokenize a chat entry with template wrapping:
@ -67,12 +66,15 @@ pub fn count(text: &str) -> usize {
 /// Decode token IDs back to text.
 pub fn decode(ids: &[u32]) -> String {
-    expect_tokenizer()
+    match get() {
-        .decode(ids, true)
+        Some(t) => t.decode(ids, true)
-        .unwrap_or_else(|e| panic!("detokenization failed: {}", e))
+            .unwrap_or_else(|e| panic!("detokenization failed: {}", e)),
        None => String::new(),
    }
 }
 /// Check if the tokenizer is initialized.
 pub fn is_initialized() -> bool {
    TOKENIZER.get().is_some()
 }
--- a/src/agent/tools/memory.rs
+++ b/src/agent/tools/memory.rs
@ -209,24 +209,7 @@ memory_tool!(graph_trace, ref, key: [str]);
 // ── Definitions ────────────────────────────────────────────────
-async fn jsonargs_memory_new(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> {
+pub fn memory_tools() -> [super::Tool; 20] {
    jsonargs_memory_write(agent, args).await
 }
 async fn jsonargs_memory_link(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> {
    let source = get_str(args, "source")?;
    let target = get_str(args, "target")?;
    if args.get("strength").and_then(|v| v.as_f64()).is_some() {
        jsonargs_memory_link_set(agent, args).await
    } else {
        jsonargs_memory_link_add(agent, &serde_json::json!({
            "source": source,
            "target": target,
        })).await
    }
 }
 pub fn memory_tools() -> [super::Tool; 22] {
    use super::Tool;
    macro_rules! tool {
        ($name:ident, $desc:expr, $params:expr) => {
@ -251,11 +234,6 @@ pub fn memory_tools() -> [super::Tool; 22] {
            "properties": { "key": {"type": "string"}, "content": {"type": "string"} },
            "required": ["key", "content"]
        }"#),
        tool!(memory_new, "Create or update a memory node. Alias for memory_write.", r#"{
            "type": "object",
            "properties": { "key": {"type": "string"}, "content": {"type": "string"} },
            "required": ["key", "content"]
        }"#),
        tool!(memory_search, "Search via spreading activation from seed keys.", r#"{
            "type": "object",
            "properties": {
@ -286,16 +264,6 @@ pub fn memory_tools() -> [super::Tool; 22] {
            "properties": { "source": {"type": "string"}, "target": {"type": "string"} },
            "required": ["source", "target"]
        }"#),
        tool!(memory_link, "Add or update a link between two memory nodes. Alias for memory_link_add/memory_link_set.", r#"{
            "type": "object",
            "properties": {
                "source": {"type": "string"},
                "target": {"type": "string"},
                "strength": {"type": "number", "description": "Optional; 0.01 to 1.0"},
                "label": {"type": "string", "description": "Accepted for compatibility; currently ignored"}
            },
            "required": ["source", "target"]
        }"#),
        tool!(memory_delete, "Soft-delete a node.", r#"{
            "type": "object",
            "properties": { "key": {"type": "string"} },
--- a/src/agent/tools/vision.rs
+++ b/src/agent/tools/vision.rs
@ -57,18 +57,15 @@ async fn view_image(
    let (w, h) = (dim.width as u32, dim.height as u32);
    let mime = mime_from_extension(path);
    let agent = agent.context("view_image requires agent context")?;
    // token_count is populated when the image reaches the server via
    // AppendImage (the server is authoritative for the IMAGE_PAD
    // count). Placeholder of 0 here until AppendImage is wired; the
    // leaf's count gets rewritten from the RPC response at send time.
    let image_leaf = AstNode::image(bytes.clone(), mime, h, w);
    let token_count = image_leaf.leaf().unwrap().tokens().saturating_sub(2);
    let agent = agent.context("view_image requires agent context")?;
    let branch = AstNode::branch(Role::User, vec![image_leaf]);
    agent.context.lock().await.push_log(Section::Conversation, branch);
-    Ok(format!("loaded {} ({}, {}x{})", a.file_path, mime, w, h))
+    Ok(format!("loaded {} ({}, {}x{}, {} tokens)",
        a.file_path, mime, w, h, token_count))
 }
 fn mime_from_extension(path: &std::path::Path) -> &'static str {
--- a/src/bin/ch.rs
+++ b/src/bin/ch.rs
@ -1,112 +0,0 @@
 // `ch` — minimal channel CLI.
 //
 //   ch send <channel-path> <message>
 //   ch recv <channel-path> [--all-new] [--min-count N]
 //
 // Connects to ~/.consciousness/channels/<top>.sock and speaks the
 // channel.capnp protocol to the appropriate daemon.
 use std::path::PathBuf;
 use std::process::ExitCode;
 use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
 use futures::AsyncReadExt;
 use tokio_util::compat::TokioAsyncReadCompatExt;
 use consciousness::channel_capnp::channel_server;
 fn channels_dir() -> PathBuf {
    dirs::home_dir().unwrap_or_default().join(".consciousness/channels")
 }
 fn sock_for(channel: &str) -> PathBuf {
    let top = channel.split('.').next().unwrap_or(channel);
    channels_dir().join(format!("{top}.sock"))
 }
 async fn connect(sock: &std::path::Path) -> Result<channel_server::Client, String> {
    let stream = tokio::net::UnixStream::connect(sock).await
        .map_err(|e| format!("connect {}: {e}", sock.display()))?;
    let (reader, writer) = stream.compat().split();
    let network = Box::new(twoparty::VatNetwork::new(
        futures::io::BufReader::new(reader),
        futures::io::BufWriter::new(writer),
        rpc_twoparty_capnp::Side::Client,
        Default::default(),
    ));
    let mut rpc = RpcSystem::new(network, None);
    let client: channel_server::Client = rpc.bootstrap(rpc_twoparty_capnp::Side::Server);
    tokio::task::spawn_local(rpc);
    Ok(client)
 }
 #[tokio::main(flavor = "current_thread")]
 async fn main() -> ExitCode {
    let args: Vec<String> = std::env::args().collect();
    if args.len() < 2 {
        eprintln!("usage: {} <send|recv> <channel> [args...]", args[0]);
        return ExitCode::from(2);
    }
    let cmd = args[1].clone();
    let local = tokio::task::LocalSet::new();
    let result: Result<(), String> = local.run_until(async move {
        match cmd.as_str() {
            "send" => {
                if args.len() < 4 {
                    return Err("usage: ch send <channel> <message...>".into());
                }
                let channel = &args[2];
                let message = args[3..].join(" ");
                let sock = sock_for(channel);
                let client = connect(&sock).await?;
                let mut req = client.send_request();
                req.get().set_channel(channel);
                req.get().set_message(&message);
                req.send().promise.await.map_err(|e| format!("send: {e}"))?;
                println!("sent to {channel}");
                Ok(())
            }
            "recv" => {
                if args.len() < 3 {
                    return Err("usage: ch recv <channel> [--all-new] [--min-count N]".into());
                }
                let channel = &args[2];
                let mut all_new = false;
                let mut min_count: u32 = 20;
                let mut i = 3;
                while i < args.len() {
                    match args[i].as_str() {
                        "--all-new" => { all_new = true; i += 1; }
                        "--min-count" => {
                            min_count = args.get(i+1)
                                .ok_or("--min-count needs an argument")?
                                .parse().map_err(|e| format!("--min-count: {e}"))?;
                            i += 2;
                        }
                        other => return Err(format!("unknown arg: {other}")),
                    }
                }
                let sock = sock_for(channel);
                let client = connect(&sock).await?;
                let mut req = client.recv_request();
                req.get().set_channel(channel);
                req.get().set_all_new(all_new);
                req.get().set_min_count(min_count);
                let reply = req.send().promise.await.map_err(|e| format!("recv: {e}"))?;
                let text = reply.get().map_err(|e| e.to_string())?
                    .get_text().map_err(|e| e.to_string())?
                    .to_str().map_err(|e| e.to_string())?;
                print!("{text}");
                if !text.ends_with('\n') { println!(); }
                Ok(())
            }
            other => Err(format!("unknown command: {other} (use send|recv)")),
        }
    }).await;
    match result {
        Ok(()) => ExitCode::SUCCESS,
        Err(e) => { eprintln!("error: {e}"); ExitCode::from(1) }
    }
 }
--- a/src/bin/consciousness.rs
+++ b/src/bin/consciousness.rs
@ -1,28 +1,7 @@
-#![cfg_attr(feature = "nightly-diagnostics", feature(panic_backtrace_config))]
+#![feature(panic_backtrace_config)]
 #![warn(unreachable_pub)]
 fn main() {
    // Force the default panic hook to print a backtrace. stderr is
    // already redirected to a daemon log; without this the hook obeys
    // RUST_BACKTRACE (unset by default), so the log only shows the
    // "note: run with `RUST_BACKTRACE=full`" tail and the actual
    // frames are lost.
    //
    // SAFETY: called before any other thread is spawned, so no
    // concurrent env reader can race.
    if std::env::var_os("RUST_BACKTRACE").is_none() {
        unsafe { std::env::set_var("RUST_BACKTRACE", "1"); }
    }
    #[cfg(feature = "nightly-diagnostics")]
    std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short);
    // rustls 0.23 requires an explicit process-wide CryptoProvider
    // when both `ring` and `aws-lc-rs` are in the dep graph (otherwise
    // it panics on first ClientConfig::builder()). Pick `ring`.
    rustls::crypto::ring::default_provider()
        .install_default()
        .expect("install rustls crypto provider");
    consciousness::user::main()
 }
--- a/src/cli/admin.rs
+++ b/src/cli/admin.rs
@ -4,93 +4,44 @@ use anyhow::Result;
 use crate::hippocampus as memory;
 use crate::hippocampus::store;
-struct DefaultMemoryNode {
+fn install_default_file(data_dir: &std::path::Path, name: &str, content: &str) -> Result<()> {
-    key: &'static str,
+    let path = data_dir.join(name);
-    filename: &'static str,
+    if !path.exists() {
-    default_content: &'static str,
+        std::fs::write(&path, content)?;
-}
+        println!("Created {}", path.display());
 const DEFAULT_MEMORY_NODES: &[DefaultMemoryNode] = &[
    DefaultMemoryNode {
        key: "identity",
        filename: "identity.md",
        default_content: include_str!("../../defaults/identity.md"),
    },
    DefaultMemoryNode {
        key: "on-consciousness",
        filename: "on-consciousness.md",
        default_content: include_str!("../../defaults/on-consciousness.md"),
    },
    DefaultMemoryNode {
        key: "memory-instructions-core",
        filename: "instructions.md",
        default_content: include_str!("../../defaults/instructions.md"),
    },
 ];
 pub fn cmd_transcript_tail(path: &str, count: usize, newest_first: bool) -> Result<()> {
    let Some(iter) = crate::conversation::TailMessages::open(path) else {
        anyhow::bail!("could not open transcript {}", path);
    };
    let mut messages: Vec<_> = iter.take(count).collect();
    if !newest_first {
        messages.reverse();
    }
    for message in messages {
        let role = match message.role {
            crate::conversation::TranscriptRole::User => "user",
            crate::conversation::TranscriptRole::Assistant => "assistant",
        };
        let timestamp = message.timestamp.as_deref().unwrap_or("-");
        println!("--- {role} offset={} timestamp={} ---", message.offset, timestamp);
        println!("{}", message.text);
        println!();
    }
    Ok(())
 }
 fn default_node_content(cfg: &crate::config::Config, node: &DefaultMemoryNode) -> String {
    let identity_path = cfg.identity_dir.join(node.filename);
    if let Ok(content) = std::fs::read_to_string(&identity_path) {
        if !content.trim().is_empty() {
            return content;
        }
    }
    let data_path = cfg.data_dir.join(node.filename);
    if let Ok(content) = std::fs::read_to_string(&data_path) {
        if !content.trim().is_empty() {
            return content;
        }
    }
    node.default_content.to_string()
 }
 pub async fn cmd_init() -> Result<()> {
    let cfg = crate::config::get();
    // Ensure data directory exists
    std::fs::create_dir_all(&cfg.data_dir)?;
-    // Seed default memory nodes if missing. These used to live as markdown
+    // Install filesystem files (not store nodes)
-    // files before identity/context moved fully into the memory graph.
+    install_default_file(&cfg.data_dir, "instructions.md",
-    for node in DEFAULT_MEMORY_NODES {
+        include_str!("../../defaults/instructions.md"))?;
-        if memory::memory_render(None, node.key, Some(true)).await.is_err() {
+    install_default_file(&cfg.data_dir, "on-consciousness.md",
-            let content = default_node_content(&cfg, node);
+        include_str!("../../defaults/on-consciousness.md"))?;
-            let _ = memory::memory_write(None, node.key, &content).await?;
+
-            println!("Seeded {} in store from {}", node.key, node.filename);
+    // Seed identity node if empty
-        }
+    let store = memory::access_local()?;
    if !store.contains_key("identity").unwrap_or(false) {
        let default = include_str!("../../defaults/identity.md");
        store.upsert("identity", default)?;
        println!("Seeded identity in store");
    }
    store.save()?;
    println!("Initialized with {} nodes", store.all_keys().unwrap_or_default().len());
    // Create config if none exists
    let config_path = std::env::var("POC_MEMORY_CONFIG")
        .map(std::path::PathBuf::from)
-        .unwrap_or_else(|_| crate::config::config_path());
+        .unwrap_or_else(|_| {
            dirs::home_dir().unwrap_or_default()
                .join(".consciousness/config.jsonl")
        });
    if !config_path.exists() {
        let config_dir = config_path.parent().unwrap();
        std::fs::create_dir_all(config_dir)?;
@ -100,7 +51,7 @@ pub async fn cmd_init() -> Result<()> {
            config_path.display());
    }
-    println!("Done. Run `poc-memory admin load-context --stats` to verify.");
+    println!("Done. Run `poc-memory load-context --stats` to verify.");
    Ok(())
 }
--- a/src/cli/agent.rs
+++ b/src/cli/agent.rs
@ -2,13 +2,8 @@
 use anyhow::{bail, Context, Result};
 use crate::hippocampus as memory;
 use std::time::Instant;
 pub async fn cmd_run_agent(agent: &str, count: usize, target: &[String], query: Option<&str>, dry_run: bool, _local: bool, state_dir: Option<&str>) -> Result<()> {
    let start = Instant::now();
    eprintln!(
        "[agent-cli] start agent={} count={} targets={} query={:?} dry_run={} local={} state_dir={:?} pid={}",
        agent, count, target.len(), query, dry_run, _local, state_dir, std::process::id());
    // Mark as agent so tool calls (e.g. poc-memory render) don't
    // pollute the user's seen set as a side effect
    // SAFETY: single-threaded at this point (CLI startup, before any agent work)
@ -50,19 +45,14 @@ pub async fn cmd_run_agent(agent: &str, count: usize, target: &[String], query:
            if let Err(e) = crate::agent::oneshot::run_one_agent(
                agent, count, Some(&[key.clone()]),
            ).await {
                eprintln!("[agent-cli] ERROR agent={} target={} error={}", agent, key, e);
                println!("[{}] ERROR on {}: {}", agent, key, e);
            }
        }
    } else {
-        if let Err(e) = crate::agent::oneshot::run_one_agent(
+        crate::agent::oneshot::run_one_agent(
            agent, count, None,
-        ).await {
+        ).await.map_err(|e| anyhow::anyhow!("{}", e))?;
            eprintln!("[agent-cli] ERROR agent={} error={}", agent, e);
            return Err(anyhow::anyhow!("{}", e));
        }
    }
    eprintln!("[agent-cli] done agent={} elapsed={:.2}s",
        agent, start.elapsed().as_secs_f64());
    Ok(())
 }
--- a/src/config.rs
+++ b/src/config.rs
@ -201,23 +201,16 @@ pub fn watch_config(cli: crate::user::CliArgs) {
            {
                crate::dbglog!("[config] watch({}) failed: {}", parent.display(), e);
                return;
-			}
+            }
-			crate::dbglog!("[config] watching {}", path.display());
+            crate::dbglog!("[config] watching {}", path.display());
-			let mut last_seen = config_file_state(&path);
+            while let Ok(res) = rx.recv() {
-			while let Ok(res) = rx.recv() {
+                let Ok(events) = res else { continue; };
-				let Ok(events) = res else { continue; };
+                if !events.iter().any(|e| e.path == path) { continue; }
 				if !events.iter().any(|e| e.path == path) { continue; }
-				let current_seen = config_file_state(&path);
+                // Reload both halves.
-				if current_seen == last_seen {
+                let mem_changed = reload();
-					continue;
+                let app_changed = match build_figment(&cli).extract::<AppConfig>() {
 				}
 				last_seen = current_seen;
 				// Reload both halves.
 				let mem_changed = reload();
 				let app_changed = match build_figment(&cli).extract::<AppConfig>() {
                    Ok(app) => {
                        install_app(app);
                        true
@ -230,13 +223,8 @@ pub fn watch_config(cli: crate::user::CliArgs) {
                crate::dbglog!("[config] reloaded (memory_changed={}, app_changed={})",
                    mem_changed, app_changed);
            }
-		})
+        })
-		.ok();
+        .ok();
 }
 fn config_file_state(path: &std::path::Path) -> Option<(std::time::SystemTime, u64)> {
 	let meta = std::fs::metadata(path).ok()?;
 	Some((meta.modified().ok()?, meta.len()))
 }
 // ============================================================
--- a/src/conversation/claude.rs
+++ b/src/conversation/claude.rs
@ -1,113 +0,0 @@
 use serde_json::Value;
 use super::{ConversationSource, TranscriptMessage, TranscriptRole};
 pub struct ClaudeSource;
 impl ConversationSource for ClaudeSource {
    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
        parse_message(obj, offset)
    }
    fn is_compaction(&self, obj: &Value) -> bool {
        is_compaction(obj)
    }
    fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
        contains_bytes(obj_bytes, b"This session is being continued")
    }
 }
 fn text_content(value: &Value) -> Option<String> {
    let text = match value {
        Value::String(s) => s.clone(),
        Value::Array(arr) => {
            arr.iter()
                .filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
                .filter_map(|b| b.get("text").and_then(|v| v.as_str()))
                .collect::<Vec<_>>()
                .join(" ")
        }
        _ => return None,
    };
    (!text.is_empty()).then_some(text)
 }
 pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
    let role = match obj.get("type").and_then(|v| v.as_str()) {
        Some("user") => TranscriptRole::User,
        Some("assistant") => TranscriptRole::Assistant,
        _ => return None,
    };
    let msg = obj.get("message").unwrap_or(obj);
    let text = msg.get("content").and_then(text_content)?;
    let timestamp = obj.get("timestamp")
        .and_then(|v| v.as_str())
        .map(str::to_string);
    Some(TranscriptMessage { role, text, timestamp, offset })
 }
 pub(crate) fn is_compaction(obj: &Value) -> bool {
    obj.get("type").and_then(|v| v.as_str()) == Some("user")
        && obj.get("message")
            .and_then(|m| m.get("content"))
            .and_then(|c| c.as_str())
            .is_some_and(|content| content.starts_with("This session is being continued"))
 }
 fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
    haystack.windows(needle.len()).any(|w| w == needle)
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use serde_json::json;
    #[test]
    fn parses_string_and_array_content() {
        let user = json!({
            "timestamp": "2026-06-15T15:00:00.000Z",
            "type": "user",
            "message": { "content": "hello" }
        });
        let assistant = json!({
            "timestamp": "2026-06-15T15:00:01.000Z",
            "type": "assistant",
            "message": {
                "content": [
                    { "type": "text", "text": "hi" },
                    { "type": "tool_use", "name": "ignored" },
                    { "type": "text", "text": "there" }
                ]
            }
        });
        assert_eq!(
            parse_message(&user, 7).unwrap(),
            TranscriptMessage {
                role: TranscriptRole::User,
                text: "hello".to_string(),
                timestamp: Some("2026-06-15T15:00:00.000Z".to_string()),
                offset: 7,
            }
        );
        assert_eq!(parse_message(&assistant, 9).unwrap().text, "hi there");
    }
    #[test]
    fn detects_compaction_marker() {
        let obj = json!({
            "timestamp": "2026-06-15T15:00:01.000Z",
            "type": "user",
            "message": {
                "content": "This session is being continued from a previous conversation."
            }
        });
        assert!(is_compaction(&obj));
    }
 }
--- a/src/conversation/codex.rs
+++ b/src/conversation/codex.rs
@ -1,105 +0,0 @@
 use serde_json::Value;
 use super::{ConversationSource, TranscriptMessage, TranscriptRole};
 pub struct CodexSource;
 impl ConversationSource for CodexSource {
    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
        parse_message(obj, offset)
    }
    fn is_compaction(&self, obj: &Value) -> bool {
        is_compaction(obj)
    }
    fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
        contains_bytes(obj_bytes, b"context_compacted")
    }
 }
 pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
    if obj.get("type").and_then(|v| v.as_str()) != Some("event_msg") {
        return None;
    }
    let payload = obj.get("payload")?;
    let (role, text) = match payload.get("type").and_then(|v| v.as_str()) {
        Some("user_message") => (
            TranscriptRole::User,
            payload.get("message").and_then(|v| v.as_str())?.to_string(),
        ),
        Some("agent_message") => (
            TranscriptRole::Assistant,
            payload.get("message").and_then(|v| v.as_str())?.to_string(),
        ),
        _ => return None,
    };
    if text.is_empty() {
        return None;
    }
    let timestamp = obj.get("timestamp")
        .and_then(|v| v.as_str())
        .map(str::to_string);
    Some(TranscriptMessage { role, text, timestamp, offset })
 }
 pub(crate) fn is_compaction(obj: &Value) -> bool {
    obj.get("type").and_then(|v| v.as_str()) == Some("event_msg")
        && obj.get("payload")
            .and_then(|p| p.get("type"))
            .and_then(|v| v.as_str()) == Some("context_compacted")
 }
 fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
    haystack.windows(needle.len()).any(|w| w == needle)
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use serde_json::json;
    #[test]
    fn parses_event_messages_and_skips_noise() {
        let user = json!({
            "timestamp": "2026-06-15T15:00:00.000Z",
            "type": "event_msg",
            "payload": { "type": "user_message", "message": "start here" }
        });
        let assistant = json!({
            "timestamp": "2026-06-15T15:00:01.000Z",
            "type": "event_msg",
            "payload": { "type": "agent_message", "message": "working" }
        });
        let tool = json!({
            "timestamp": "2026-06-15T15:00:02.000Z",
            "type": "event_msg",
            "payload": { "type": "task_started" }
        });
        let raw = json!({
            "timestamp": "2026-06-15T15:00:03.000Z",
            "type": "response_item",
            "payload": { "type": "message", "role": "user" }
        });
        assert_eq!(parse_message(&user, 1).unwrap().role, TranscriptRole::User);
        assert_eq!(parse_message(&assistant, 2).unwrap().text, "working");
        assert!(parse_message(&tool, 3).is_none());
        assert!(parse_message(&raw, 4).is_none());
    }
    #[test]
    fn detects_compaction_event() {
        let obj = json!({
            "timestamp": "2026-06-15T15:00:01.000Z",
            "type": "event_msg",
            "payload": { "type": "context_compacted" }
        });
        assert!(is_compaction(&obj));
    }
 }
--- a/src/conversation/jsonl.rs
+++ b/src/conversation/jsonl.rs
@ -1,110 +0,0 @@
 use memchr::memrchr3;
 /// Scan backwards through mmap'd bytes, yielding byte slices of complete
 /// top-level JSON objects (outermost { to matching }).
 ///
 /// Uses memrchr3 (SIMD) to jump between structurally significant bytes
 /// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
 /// skipping braces inside JSON strings. Returns objects in reverse order
 /// (newest first).
 pub struct JsonlBackwardIter<'a> {
    data: &'a [u8],
    pos: usize,
 }
 impl<'a> JsonlBackwardIter<'a> {
    pub fn new(data: &'a [u8]) -> Self {
        Self { data, pos: data.len() }
    }
 }
 impl<'a> Iterator for JsonlBackwardIter<'a> {
    type Item = (usize, &'a [u8]);
    fn next(&mut self) -> Option<Self::Item> {
        next_json_object(self.data, &mut self.pos)
    }
 }
 fn is_unescaped_quote(data: &[u8], p: usize) -> bool {
    let mut bs = 0;
    while p > bs && data[p - 1 - bs] == b'\\' {
        bs += 1;
    }
    bs % 2 == 0
 }
 fn next_json_object<'a>(data: &'a [u8], pos: &mut usize) -> Option<(usize, &'a [u8])> {
    // Find the closing } of the next object, skipping } inside strings.
    let close = {
        let mut in_string = false;
        loop {
            let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
            *pos = p;
            let ch = data[p];
            if in_string {
                if ch == b'"' && is_unescaped_quote(data, p) {
                    in_string = false;
                }
                continue;
            }
            match ch {
                b'}' => break p,
                b'"' => in_string = true,
                _ => {}
            }
        }
    };
    // Track brace depth to find matching {.
    let mut depth: usize = 1;
    let mut in_string = false;
    loop {
        let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
        *pos = p;
        let ch = data[p];
        if in_string {
            if ch == b'"' && is_unescaped_quote(data, p) {
                in_string = false;
            }
            continue;
        }
        match ch {
            b'"' => { in_string = true; }
            b'}' => { depth += 1; }
            b'{' => {
                depth -= 1;
                if depth == 0 {
                    return Some((*pos, &data[*pos..=close]));
                }
            }
            _ => {}
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn handles_nested_json_and_quoted_braces() {
        let data = br#"{"n":1,"s":"literal } brace"}
 {"n":2,"nested":{"s":"escaped quote: \" and { brace"}}
 trailing garbage
 "#;
        let objs: Vec<_> = JsonlBackwardIter::new(data)
            .map(|(_, bytes)| std::str::from_utf8(bytes).unwrap().to_string())
            .collect();
        assert_eq!(objs.len(), 2);
        assert!(objs[0].contains(r#""n":2"#));
        assert!(objs[1].contains(r#""n":1"#));
    }
 }
--- a/src/conversation/mod.rs
+++ b/src/conversation/mod.rs
@ -1,271 +0,0 @@
 // Conversation transcript abstraction.
 //
 // Core code consumes normalized user/assistant messages through this module.
 // Product-specific log formats live in the small compatibility sources below.
 use memmap2::Mmap;
 use serde_json::Value;
 use std::fs;
 use std::path::Path;
 pub mod claude;
 pub mod codex;
 pub mod jsonl;
 pub use jsonl::JsonlBackwardIter;
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum TranscriptRole {
    User,
    Assistant,
 }
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct TranscriptMessage {
    pub role: TranscriptRole,
    pub text: String,
    pub timestamp: Option<String>,
    pub offset: u64,
 }
 pub trait ConversationSource {
    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage>;
    fn is_compaction(&self, obj: &Value) -> bool;
    fn may_contain_compaction(&self, _obj_bytes: &[u8]) -> bool {
        true
    }
 }
 pub struct AnyConversationSource;
 impl ConversationSource for AnyConversationSource {
    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
        claude::ClaudeSource.parse_message(obj, offset)
            .or_else(|| codex::CodexSource.parse_message(obj, offset))
    }
    fn is_compaction(&self, obj: &Value) -> bool {
        claude::ClaudeSource.is_compaction(obj) || codex::CodexSource.is_compaction(obj)
    }
    fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
        claude::ClaudeSource.may_contain_compaction(obj_bytes)
            || codex::CodexSource.may_contain_compaction(obj_bytes)
    }
 }
 /// Find the byte offset of the last compaction marker in mmap'd transcript data.
 /// Returns the byte offset of the JSON object's opening brace.
 pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
    find_last_compaction_with(data, &AnyConversationSource)
 }
 pub(crate) fn find_last_compaction_with(
    data: &[u8],
    source: &impl ConversationSource,
 ) -> Option<usize> {
    for (offset, obj_bytes) in JsonlBackwardIter::new(data) {
        // Quick byte check before parsing large transcript entries.
        if !source.may_contain_compaction(obj_bytes) {
            continue;
        }
        let obj: Value = match serde_json::from_slice(obj_bytes) {
            Ok(v) => v,
            Err(_) => continue,
        };
        if source.is_compaction(&obj) {
            return Some(offset);
        }
    }
    None
 }
 /// Find the byte offset of the last compaction in a transcript file.
 /// Returns None if the file can't be opened or has no compaction.
 pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
    if path.is_empty() { return None; }
    let file = fs::File::open(path).ok()?;
    let meta = file.metadata().ok()?;
    if meta.len() == 0 { return None; }
    let mmap = unsafe { Mmap::map(&file).ok()? };
    find_last_compaction(&mmap).map(|off| off as u64)
 }
 /// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
 pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
    let file = fs::File::open(path).ok()?;
    let meta = file.metadata().ok()?;
    if meta.len() == 0 { return None; }
    let mmap = unsafe { Mmap::map(&file).ok()? };
    Some((mmap, file))
 }
 /// Reverse iterator over user/assistant messages in a transcript file.
 /// Yields normalized transcript messages newest-first. The caller decides
 /// when to stop (byte budget, count, etc).
 pub struct TailMessages {
    _file: fs::File,
    mmap: Mmap,
    pos: usize,
 }
 impl TailMessages {
    pub fn open(path: &str) -> Option<Self> {
        let (mmap, file) = mmap_transcript(path)?;
        let pos = mmap.len();
        Some(Self { _file: file, mmap, pos })
    }
 }
 impl Iterator for TailMessages {
    type Item = TranscriptMessage;
    fn next(&mut self) -> Option<Self::Item> {
        loop {
            let (offset, obj_bytes) = jsonl::JsonlBackwardIter::new(&self.mmap[..self.pos]).next()?;
            self.pos = offset;
            let obj: Value = match serde_json::from_slice(obj_bytes) {
                Ok(v) => v,
                Err(_) => continue,
            };
            if let Some(message) = AnyConversationSource.parse_message(&obj, offset as u64) {
                return Some(message);
            }
        }
    }
 }
 /// Get the timestamp of the compaction message at a given byte offset.
 /// Returns a human-readable datetime string, or None if unavailable.
 pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
    let (mmap, _file) = mmap_transcript(path)?;
    let start = offset as usize;
    if start >= mmap.len() { return None; }
    // Find the end of this JSONL line
    let end = mmap[start..].iter().position(|&b| b == b'\n')
        .map(|p| start + p)
        .unwrap_or(mmap.len());
    let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
    if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
        return Some(ts.to_string());
    }
    for field in &["createdAt", "created_at", "time"] {
        if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
            return Some(ts.to_string());
        }
    }
    None
 }
 /// Detect whether a compaction has occurred since the last check.
 ///
 /// Compares the current compaction offset against a saved value in
 /// `state_dir/compaction-{session_id}`. Returns true if a new
 /// compaction was found. Updates the saved offset.
 pub fn detect_new_compaction(
    state_dir: &Path,
    session_id: &str,
    transcript_path: &str,
 ) -> bool {
    let offset = find_last_compaction_in_file(transcript_path);
    let save_path = state_dir.join(format!("compaction-{}", session_id));
    let saved: Option<u64> = fs::read_to_string(&save_path)
        .ok()
        .and_then(|s| s.trim().parse().ok());
    let is_new = match (offset, saved) {
        (Some(cur), Some(prev)) => cur != prev,
        (Some(_), None) => true,
        _ => false,
    };
    // Save current offset
    if let Some(off) = offset {
        fs::write(&save_path, off.to_string()).ok();
    }
    is_new
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::io::Write;
    fn write_temp_jsonl(content: &str) -> tempfile::NamedTempFile {
        let mut file = tempfile::NamedTempFile::new().unwrap();
        file.write_all(content.as_bytes()).unwrap();
        file.flush().unwrap();
        file
    }
    #[test]
    fn tail_messages_yields_normalized_messages_newest_first() {
        let file = write_temp_jsonl(
            r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"claude user"}}
 {"timestamp":"2026-06-15T15:00:01.000Z","type":"assistant","message":{"content":[{"type":"text","text":"claude assistant"}]}}
 {"timestamp":"2026-06-15T15:00:02.000Z","type":"event_msg","payload":{"type":"user_message","message":"codex user"}}
 {"timestamp":"2026-06-15T15:00:03.000Z","type":"event_msg","payload":{"type":"task_started"}}
 {"timestamp":"2026-06-15T15:00:04.000Z","type":"event_msg","payload":{"type":"agent_message","message":"codex assistant"}}
 "#,
        );
        let messages: Vec<_> = TailMessages::open(&file.path().to_string_lossy())
            .unwrap()
            .collect();
        assert_eq!(messages.len(), 4);
        assert_eq!(messages[0].text, "codex assistant");
        assert_eq!(messages[1].text, "codex user");
        assert_eq!(messages[2].text, "claude assistant");
        assert_eq!(messages[3].text, "claude user");
        assert!(messages[0].offset > messages[1].offset);
    }
    #[test]
    fn detects_claude_and_codex_compactions() {
        let claude = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"normal"}}
 {"timestamp":"2026-06-15T15:00:01.000Z","type":"user","message":{"content":"This session is being continued from a previous conversation."}}
 "#;
        let codex = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"user_message","message":"normal"}}
 {"timestamp":"2026-06-15T15:00:01.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
 "#;
        assert!(find_last_compaction(claude).is_some());
        assert!(find_last_compaction(codex).is_some());
    }
    #[test]
    fn detect_new_compaction_tracks_offset_changes() {
        let transcript = write_temp_jsonl(
            r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
 "#,
        );
        let state = tempfile::tempdir().unwrap();
        assert!(detect_new_compaction(
            state.path(),
            "session",
            &transcript.path().to_string_lossy(),
        ));
        assert!(!detect_new_compaction(
            state.path(),
            "session",
            &transcript.path().to_string_lossy(),
        ));
    }
 }
--- a/src/hippocampus/graph.rs
+++ b/src/hippocampus/graph.rs
@ -11,23 +11,6 @@ use crate::store::{Store, RelationType, StoreView};
 use serde::{Deserialize, Serialize};
 use std::collections::{HashMap, HashSet, VecDeque};
 use std::sync::{OnceLock, RwLock};
 const EXACT_CC_MAX_DEG: usize = 512;
 const APPROX_CC_PAIRS: u64 = 4096;
 const CC_CACHE_TTL_SECS: i64 = 15 * 60;
 #[derive(Clone, Copy)]
 struct CachedCc {
 	value: f32,
 	computed_at: i64,
 }
 static CC_CACHE: OnceLock<RwLock<HashMap<String, CachedCc>>> = OnceLock::new();
 fn cc_cache() -> &'static RwLock<HashMap<String, CachedCc>> {
 	CC_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
 }
 /// Community info for reporting
 #[derive(Clone, Debug)]
@ -49,13 +32,11 @@ pub struct Edge {
 /// The in-memory graph built from store nodes + relations
 pub struct Graph {
-	/// Adjacency list: node key → list of edges
+    /// Adjacency list: node key → list of edges
-	adj: HashMap<String, Vec<Edge>>,
+    adj: HashMap<String, Vec<Edge>>,
-	/// Neighbor sets for membership tests in graph metrics.
+    /// All node keys
-	neighbor_sets: HashMap<String, HashSet<String>>,
+    keys: HashSet<String>,
-	/// All node keys
+    /// Community labels (from label propagation)
 	keys: HashSet<String>,
 	/// Community labels (from label propagation)
    communities: HashMap<String, u32>,
 }
@ -86,22 +67,22 @@ impl Graph {
            .unwrap_or_default()
    }
-	/// Just neighbor keys
+    /// Just neighbor keys
-	pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
+    pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
-		self.neighbor_sets.get(key)
+        self.adj.get(key)
-			.map(|neighbors| neighbors.iter().map(String::as_str).collect())
+            .map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
-			.unwrap_or_default()
+            .unwrap_or_default()
-	}
+    }
-	/// Jaccard similarity between two nodes' neighborhoods.
+    /// Jaccard similarity between two nodes' neighborhoods.
-	/// Measures overlap: |intersection| / |union| of their neighbor sets.
+    /// Measures overlap: |intersection| / |union| of their neighbor sets.
-	pub fn jaccard(&self, a: &str, b: &str) -> f32 {
+    pub fn jaccard(&self, a: &str, b: &str) -> f32 {
-		let Some(na) = self.neighbor_sets.get(a) else { return 0.0 };
+        let na = self.neighbor_keys(a);
-		let Some(nb) = self.neighbor_sets.get(b) else { return 0.0 };
+        let nb = self.neighbor_keys(b);
-		let intersection = na.intersection(nb).count();
+        let intersection = na.intersection(&nb).count();
-		let union = na.len() + nb.len() - intersection;
+        let union = na.union(&nb).count();
-		if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
+        if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
-	}
+    }
    /// Compute Jaccard-based strength for every edge in the graph.
    /// Returns (source_key, target_key, jaccard_strength) triples.
@ -221,78 +202,41 @@ impl Graph {
        }
    }
-	/// Local clustering coefficient: fraction of a node's neighbors
+    /// Local clustering coefficient: fraction of a node's neighbors
-	/// that are also neighbors of each other.
+    /// that are also neighbors of each other.
-	/// cc(v) = 2E / (deg * (deg - 1))
+    /// cc(v) = 2E / (deg * (deg - 1))
-	pub fn clustering_coefficient(&self, key: &str) -> f32 {
+    pub fn clustering_coefficient(&self, key: &str) -> f32 {
-		let now = crate::store::now_epoch();
+        let neighbors = self.neighbor_keys(key);
-		if let Some(cc) = cc_cache().read().unwrap().get(key).copied()
+        let deg = neighbors.len();
-			&& now - cc.computed_at < CC_CACHE_TTL_SECS
+        if deg < 2 {
-		{
+            return 0.0;
-			return cc.value;
+        }
 		}
 		let cc = self.clustering_coefficient_uncached(key);
 		cc_cache().write().unwrap().insert(key.to_owned(), CachedCc {
 			value: cc,
 			computed_at: now,
 		});
 		cc
 	}
-	fn clustering_coefficient_uncached(&self, key: &str) -> f32 {
+        let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
-		let Some(neighbors) = self.neighbor_sets.get(key) else {
+        let mut triangles = 0u32;
-			return 0.0;
+        for i in 0..neighbor_vec.len() {
-		};
+            for j in (i + 1)..neighbor_vec.len() {
-		let deg = neighbors.len();
+                let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
-		if deg < 2 {
+                if ni_neighbors.contains(neighbor_vec[j]) {
-			return 0.0;
+                    triangles += 1;
-		}
+                }
            }
        }
-		let neighbor_vec: Vec<&str> = neighbors.iter().map(String::as_str).collect();
+        (2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
-		if deg <= EXACT_CC_MAX_DEG {
+    }
 			let mut linked = 0u64;
 			for i in 0..neighbor_vec.len() {
 				for j in (i + 1)..neighbor_vec.len() {
 					if self.neighbor_sets
 						.get(neighbor_vec[i])
 						.is_some_and(|n| n.contains(neighbor_vec[j])) {
 							linked += 1;
 					}
 				}
 			}
 			return (2.0 * linked as f32) / (deg as f32 * (deg as f32 - 1.0));
 		}
-		let mut linked = 0u64;
+    /// Average clustering coefficient across all nodes with deg >= 2
-		let samples = APPROX_CC_PAIRS.min((deg as u64 * (deg as u64 - 1)) / 2);
+    pub fn avg_clustering_coefficient(&self) -> f32 {
-		for sample in 0..samples {
+        let mut sum = 0.0f32;
-			let i = ((sample.wrapping_mul(1_103_515_245).wrapping_add(12_345)) % deg as u64) as usize;
+        let mut count = 0u32;
-			let mut j = ((sample.wrapping_mul(2_654_435_761).wrapping_add(97_531)) % deg as u64) as usize;
+        for key in &self.keys {
-			if i == j {
+            if self.degree(key) >= 2 {
-				j = (j + 1) % deg;
+                sum += self.clustering_coefficient(key);
-			}
+                count += 1;
-			if self.neighbor_sets
+            }
-				.get(neighbor_vec[i])
+        }
-				.is_some_and(|n| n.contains(neighbor_vec[j])) {
+        if count == 0 { 0.0 } else { sum / count as f32 }
 					linked += 1;
 			}
 		}
 		linked as f32 / samples as f32
 	}
 	/// Average clustering coefficient across all nodes with deg >= 2
 	pub fn avg_clustering_coefficient(&self) -> f32 {
 		let mut sum = 0.0f32;
 		let mut count = 0u32;
 		for key in &self.keys {
 			match self.neighbor_sets.get(key.as_str()) {
 				Some(s) if s.len() >= 2 => s,
 				_ => continue,
 			};
 			sum += self.clustering_coefficient(key);
 			count += 1;
 		}
 		if count == 0 { 0.0 } else { sum / count as f32 }
    }
    /// Average shortest path length (sampled BFS from up to 100 nodes)
@ -322,17 +266,15 @@ impl Graph {
        dist.insert(start.to_string(), 0u32);
        queue.push_back(start.to_string());
-		while let Some(node) = queue.pop_front() {
+        while let Some(node) = queue.pop_front() {
-			let d = dist[&node];
+            let d = dist[&node];
-			if let Some(neighbors) = self.neighbor_sets.get(&node) {
+            for neighbor in self.neighbor_keys(&node) {
-				for neighbor in neighbors {
+                if !dist.contains_key(neighbor) {
-					if !dist.contains_key(neighbor) {
+                    dist.insert(neighbor.to_string(), d + 1);
-						dist.insert(neighbor.clone(), d + 1);
+                    queue.push_back(neighbor.to_string());
-						queue.push_back(neighbor.clone());
+                }
-					}
+            }
-				}
+        }
 			}
 		}
        dist
    }
@ -563,39 +505,16 @@ impl Graph {
 /// Build graph from store data (with community detection)
 pub fn build_graph(store: &impl StoreView) -> Graph {
-	let (adj, keys) = build_adjacency(store);
+    let (adj, keys) = build_adjacency(store);
-	let neighbor_sets = build_neighbor_sets(&adj);
+    let communities = label_propagation(&keys, &adj, 20);
-	let communities = label_propagation(&keys, &adj, 20);
+    Graph { adj, keys, communities }
 	Graph {
 		adj,
 		neighbor_sets,
 		keys,
 		communities,
 	}
 }
 /// Build graph without community detection — for spreading activation
 /// searches where we only need the adjacency list.
 pub fn build_graph_fast(store: &impl StoreView) -> Graph {
-	let (adj, keys) = build_adjacency(store);
+    let (adj, keys) = build_adjacency(store);
-	let neighbor_sets = build_neighbor_sets(&adj);
+    Graph { adj, keys, communities: HashMap::new() }
 	Graph {
 		adj,
 		neighbor_sets,
 		keys,
 		communities: HashMap::new(),
 	}
 }
 fn build_neighbor_sets(adj: &HashMap<String, Vec<Edge>>) -> HashMap<String, HashSet<String>> {
 	adj.iter()
 		.map(|(key, edges)| {
 			let neighbors = edges.iter()
 				.map(|edge| edge.target.clone())
 				.collect();
 			(key.clone(), neighbors)
 		})
 		.collect()
 }
 fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
--- a/src/hippocampus/mod.rs
+++ b/src/hippocampus/mod.rs
@ -17,6 +17,7 @@ pub mod query;
 pub mod spectral;
 pub mod neuro;
 pub mod counters;
 pub mod transcript;
 use std::cell::RefCell;
 use std::path::PathBuf;
--- a/src/hippocampus/transcript.rs
+++ b/src/hippocampus/transcript.rs
@ -0,0 +1,340 @@
 // Transcript JSONL parsing utilities.
 //
 // Provides mmap-based backward scanning of Claude Code transcript files
 // and compaction detection. Used by memory-search (hook mode) and
 // parse-claude-conversation (debug tool).
 use memchr::memrchr3;
 use memmap2::Mmap;
 use serde_json::Value;
 use std::fs;
 use std::path::Path;
 /// Scan backwards through mmap'd bytes, yielding byte slices of complete
 /// top-level JSON objects (outermost { to matching }).
 ///
 /// Uses memrchr3 (SIMD) to jump between structurally significant bytes
 /// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
 /// skipping braces inside JSON strings. Returns objects in reverse order
 /// (newest first).
 pub struct JsonlBackwardIter<'a> {
    data: &'a [u8],
    pos: usize,
 }
 impl<'a> JsonlBackwardIter<'a> {
    pub fn new(data: &'a [u8]) -> Self {
        Self { data, pos: data.len() }
    }
 }
 impl<'a> Iterator for JsonlBackwardIter<'a> {
    type Item = &'a [u8];
    fn next(&mut self) -> Option<Self::Item> {
        // Find the closing } of the next object, skipping } inside strings
        let close = {
            let mut in_string = false;
            loop {
                let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
                self.pos = p;
                let ch = self.data[p];
                if in_string {
                    if ch == b'"' {
                        let mut bs = 0;
                        while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
                            bs += 1;
                        }
                        if bs % 2 == 0 { in_string = false; }
                    }
                    continue;
                }
                match ch {
                    b'}' => break p,
                    b'"' => in_string = true,
                    _ => {}
                }
            }
        };
        // Track brace depth to find matching {
        let mut depth: usize = 1;
        let mut in_string = false;
        loop {
            let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
            self.pos = p;
            let ch = self.data[p];
            if in_string {
                if ch == b'"' {
                    // Check for escaped quote (count preceding backslashes)
                    let mut bs = 0;
                    while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
                        bs += 1;
                    }
                    if bs % 2 == 0 {
                        in_string = false;
                    }
                }
                // { and } inside strings don't affect depth
                continue;
            }
            match ch {
                b'"' => { in_string = true; }
                b'}' => { depth += 1; }
                b'{' => {
                    depth -= 1;
                    if depth == 0 {
                        return Some(&self.data[self.pos..=close]);
                    }
                }
                _ => {}
            }
        }
    }
 }
 /// Find the byte offset of the last compaction summary in mmap'd transcript data.
 ///
 /// Scans backward for a user-type message whose content starts with
 /// "This session is being continued". Returns the byte offset of the
 /// JSON object's opening brace.
 pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
    let marker = b"This session is being continued";
    for obj_bytes in JsonlBackwardIter::new(data) {
        // Quick byte check before parsing
        if !contains_bytes(obj_bytes, marker) {
            continue;
        }
        let obj: Value = match serde_json::from_slice(obj_bytes) {
            Ok(v) => v,
            Err(_) => continue,
        };
        if obj.get("type").and_then(|v| v.as_str()) != Some("user") {
            continue;
        }
        if let Some(content) = obj.get("message")
            .and_then(|m| m.get("content"))
            .and_then(|c| c.as_str())
            && content.starts_with("This session is being continued") {
                let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize;
                return Some(offset);
            }
    }
    None
 }
 /// Find the byte offset of the last compaction in a transcript file.
 /// Returns None if the file can't be opened or has no compaction.
 pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
    if path.is_empty() { return None; }
    let file = fs::File::open(path).ok()?;
    let meta = file.metadata().ok()?;
    if meta.len() == 0 { return None; }
    let mmap = unsafe { Mmap::map(&file).ok()? };
    find_last_compaction(&mmap).map(|off| off as u64)
 }
 /// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
 pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
    let file = fs::File::open(path).ok()?;
    let meta = file.metadata().ok()?;
    if meta.len() == 0 { return None; }
    let mmap = unsafe { Mmap::map(&file).ok()? };
    Some((mmap, file))
 }
 fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
    haystack.windows(needle.len()).any(|w| w == needle)
 }
 /// Reverse iterator over user/assistant messages in a transcript file.
 /// Yields (role, text, timestamp) tuples newest-first. The caller decides
 /// when to stop (byte budget, count, etc).
 pub struct TailMessages {
    _file: fs::File,
    mmap: Mmap,
    pos: usize,
 }
 impl TailMessages {
    pub fn open(path: &str) -> Option<Self> {
        let (mmap, file) = mmap_transcript(path)?;
        let pos = mmap.len();
        Some(Self { _file: file, mmap, pos })
    }
 }
 impl Iterator for TailMessages {
    type Item = (String, String, String);
    fn next(&mut self) -> Option<Self::Item> {
        loop {
            // Find closing }, skipping } inside strings
            let close = {
                let mut in_string = false;
                loop {
                    let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
                    self.pos = p;
                    let ch = self.mmap[p];
                    if in_string {
                        if ch == b'"' {
                            let mut bs = 0;
                            while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
                                bs += 1;
                            }
                            if bs % 2 == 0 { in_string = false; }
                        }
                        continue;
                    }
                    match ch {
                        b'}' => break p,
                        b'"' => in_string = true,
                        _ => {}
                    }
                }
            };
            // Track brace depth to find matching {
            let mut depth: usize = 1;
            let mut in_string = false;
            let open = loop {
                let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
                self.pos = p;
                let ch = self.mmap[p];
                if in_string {
                    if ch == b'"' {
                        let mut bs = 0;
                        while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
                            bs += 1;
                        }
                        if bs % 2 == 0 { in_string = false; }
                    }
                    continue;
                }
                match ch {
                    b'"' => { in_string = true; }
                    b'}' => { depth += 1; }
                    b'{' => {
                        depth -= 1;
                        if depth == 0 { break p; }
                    }
                    _ => {}
                }
            };
            let obj_bytes = &self.mmap[open..=close];
            // The "type" field is near the start of top-level objects.
            // Only check the first 200 bytes to avoid scanning megabyte objects.
            let prefix = &obj_bytes[..obj_bytes.len().min(200)];
            let is_user = memchr::memmem::find(prefix, b"\"type\":\"user\"").is_some();
            let is_assistant = !is_user
                && memchr::memmem::find(prefix, b"\"type\":\"assistant\"").is_some();
            if !is_user && !is_assistant { continue; }
            let obj: Value = match serde_json::from_slice(obj_bytes) {
                Ok(v) => v,
                Err(_) => continue,
            };
            let msg_type = if is_user { "user" } else { "assistant" };
            let msg = obj.get("message").unwrap_or(&obj);
            let text = match msg.get("content") {
                Some(Value::String(s)) => s.clone(),
                Some(Value::Array(arr)) => {
                    arr.iter()
                        .filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
                        .filter_map(|b| b.get("text").and_then(|v| v.as_str()))
                        .collect::<Vec<_>>()
                        .join(" ")
                }
                _ => continue,
            };
            if text.is_empty() { continue; }
            let timestamp = obj.get("timestamp")
                .and_then(|v| v.as_str())
                .unwrap_or("")
                .to_string();
            return Some((msg_type.to_string(), text, timestamp));
        }
    }
 }
 /// Get the timestamp of the compaction message at a given byte offset.
 /// Returns a human-readable datetime string, or None if unavailable.
 pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
    let (mmap, _file) = mmap_transcript(path)?;
    let start = offset as usize;
    if start >= mmap.len() { return None; }
    // Find the end of this JSONL line
    let end = mmap[start..].iter().position(|&b| b == b'\n')
        .map(|p| start + p)
        .unwrap_or(mmap.len());
    let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
    // Claude Code transcript entries have a "timestamp" field (ISO 8601)
    if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
        return Some(ts.to_string());
    }
    // Fallback: try "createdAt" or similar fields
    for field in &["createdAt", "created_at", "time"] {
        if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
            return Some(ts.to_string());
        }
    }
    None
 }
 /// Detect whether a compaction has occurred since the last check.
 ///
 /// Compares the current compaction offset against a saved value in
 /// `state_dir/compaction-{session_id}`. Returns true if a new
 /// compaction was found. Updates the saved offset.
 pub fn detect_new_compaction(
    state_dir: &Path,
    session_id: &str,
    transcript_path: &str,
 ) -> bool {
    let offset = find_last_compaction_in_file(transcript_path);
    let save_path = state_dir.join(format!("compaction-{}", session_id));
    let saved: Option<u64> = fs::read_to_string(&save_path)
        .ok()
        .and_then(|s| s.trim().parse().ok());
    let is_new = match (offset, saved) {
        (Some(cur), Some(prev)) => cur != prev,
        (Some(_), None) => true,
        _ => false,
    };
    // Save current offset
    if let Some(off) = offset {
        fs::write(&save_path, off.to_string()).ok();
    }
    is_new
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,4 +1,4 @@
-#![cfg_attr(feature = "nightly-diagnostics", feature(async_fn_track_caller))]
+#![feature(async_fn_track_caller)]
 // consciousness — unified crate for memory, agents, and subconscious processes
 //
@ -25,9 +25,6 @@ macro_rules! dbglog {
    }};
 }
 // Logging (target-routed file logger)
 pub mod logging;
 // User interface (TUI, CLI)
 pub mod user;
@ -43,9 +40,6 @@ pub mod hippocampus;
 // Autonomous agents
 pub mod subconscious;
 // Conversation transcript abstraction and compatibility sources
 pub mod conversation;
 // Unified configuration
 pub mod config;
 pub mod config_writer;
@ -94,8 +88,7 @@ pub mod channel_capnp {
 pub use hippocampus::{
    store, graph, lookups, query,
    spectral, neuro, counters,
-    memory,
+    transcript, memory,
 };
 pub use conversation as transcript;
 use hippocampus::query::engine as search;
 use hippocampus::query::parser as query_parser;
--- a/src/locks.rs
+++ b/src/locks.rs
@ -114,7 +114,7 @@ impl<T> TrackedMutex<T> {
        Self { inner: Mutex::new(value) }
    }
-    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
+    #[track_caller]
    pub async fn lock(&self) -> TrackedMutexGuard<'_, T> {
        let location = Location::caller();
        let guard = self.inner.lock().await;
@ -125,7 +125,7 @@ impl<T> TrackedMutex<T> {
        }
    }
-    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
+    #[track_caller]
    pub fn try_lock(&self) -> Result<TrackedMutexGuard<'_, T>, tokio::sync::TryLockError> {
        let location = Location::caller();
        let guard = self.inner.try_lock()?;
@ -171,7 +171,7 @@ impl<T> TrackedRwLock<T> {
        Self { inner: RwLock::new(value) }
    }
-    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
+    #[track_caller]
    pub async fn read(&self) -> TrackedRwLockReadGuard<'_, T> {
        let location = Location::caller();
        let guard = self.inner.read().await;
@ -182,7 +182,7 @@ impl<T> TrackedRwLock<T> {
        }
    }
-    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
+    #[track_caller]
    pub async fn write(&self) -> TrackedRwLockWriteGuard<'_, T> {
        let location = Location::caller();
        let guard = self.inner.write().await;
--- a/src/logging.rs
+++ b/src/logging.rs
@ -1,146 +0,0 @@
 // logging.rs — log-crate logger that routes by target.
 //
 // Records with target "grpc" (or any target starting with "grpc::") go
 // to ~/.consciousness/logs/daemon/grpc.log so we can tell gRPC events
 // apart from the rest of consciousness's noise. Everything else goes
 // to ~/.consciousness/logs/daemon/debug.log.
 //
 // Level threshold is taken from RUST_LOG (simple global level parse:
 // "trace"/"debug"/"info"/"warn"/"error"); defaults to "info".
 use std::io::Write;
 use std::path::PathBuf;
 use std::sync::Mutex;
 use log::{Level, LevelFilter, Log, Metadata, Record, SetLoggerError};
 fn logs_dir() -> PathBuf {
    dirs::home_dir().unwrap_or_default().join(".consciousness/logs/daemon")
 }
 struct RoutingLogger {
    grpc_file: Mutex<Option<std::fs::File>>,
    debug_file: Mutex<Option<std::fs::File>>,
    level: LevelFilter,
 }
 impl RoutingLogger {
    fn new(level: LevelFilter) -> Self {
        let dir = logs_dir();
        let _ = std::fs::create_dir_all(&dir);
        let grpc = std::fs::OpenOptions::new()
            .create(true).append(true)
            .open(dir.join("grpc.log")).ok();
        let debug = std::fs::OpenOptions::new()
            .create(true).append(true)
            .open(dir.join("debug.log")).ok();
        Self {
            grpc_file: Mutex::new(grpc),
            debug_file: Mutex::new(debug),
            level,
        }
    }
    fn is_grpc_target(target: &str) -> bool {
        target == "grpc" || target.starts_with("grpc::")
    }
 }
 impl Log for RoutingLogger {
    fn enabled(&self, m: &Metadata) -> bool {
        // Always enable DEBUG for grpc target so the dedicated log is
        // actually useful without RUST_LOG wrangling; defer to the
        // configured level for everything else.
        if Self::is_grpc_target(m.target()) {
            return m.level() <= Level::Debug;
        }
        m.level() <= self.level
    }
    fn log(&self, record: &Record) {
        if !self.enabled(record.metadata()) {
            return;
        }
        let line = format!(
            "[{}] [{}] [{}] {}\n",
            chrono::Utc::now().format("%Y-%m-%d %H:%M:%S%.3f"),
            record.level(),
            record.target(),
            record.args(),
        );
        let slot = if Self::is_grpc_target(record.target()) {
            &self.grpc_file
        } else {
            &self.debug_file
        };
        if let Ok(mut guard) = slot.lock() {
            if let Some(ref mut f) = *guard {
                let _ = f.write_all(line.as_bytes());
            }
        }
    }
    fn flush(&self) {
        for slot in [&self.grpc_file, &self.debug_file] {
            if let Ok(mut g) = slot.lock() {
                if let Some(ref mut f) = *g {
                    let _ = f.flush();
                }
            }
        }
    }
 }
 fn parse_level_from_env() -> LevelFilter {
    let raw = std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string());
    // Parse a plain level word; if it's the module=level form, we take
    // the first level we find.
    let token = raw.split(',').next().unwrap_or("info");
    let level_word = token.rsplit_once('=').map(|(_, v)| v).unwrap_or(token);
    match level_word.trim().to_lowercase().as_str() {
        "trace" => LevelFilter::Trace,
        "debug" => LevelFilter::Debug,
        "info"  => LevelFilter::Info,
        "warn"  => LevelFilter::Warn,
        "error" => LevelFilter::Error,
        "off"   => LevelFilter::Off,
        _ => LevelFilter::Info,
    }
 }
 /// Install the routing logger. Safe to call at most once — subsequent
 /// calls return an error but are otherwise no-ops.
 pub fn init() -> Result<(), SetLoggerError> {
    let level = parse_level_from_env();
    let logger = Box::new(RoutingLogger::new(level));
    log::set_boxed_logger(logger)?;
    // Always let DEBUG records through globally so the grpc log can
    // capture them (the logger itself filters non-grpc targets by
    // `level`). The cost is that log::debug! call-sites below `level`
    // in other modules still do their arg formatting before being
    // dropped at the logger; acceptable for a debug tool.
    log::set_max_level(LevelFilter::Debug.max(level));
    // Mark the file with a session boundary so it's easy to see where a
    // restart happened.
    log::info!(
        "===== consciousness logger init (level={}, pid={}) =====",
        level, std::process::id(),
    );
    log::info!(target: "grpc",
        "===== grpc log init (level={}, pid={}) =====",
        level, std::process::id(),
    );
    Ok(())
 }
 /// Consumer of &Level so the type is used when only some callers want it.
 #[allow(dead_code)]
 pub fn current_level() -> Level {
    match log::max_level() {
        LevelFilter::Trace => Level::Trace,
        LevelFilter::Debug => Level::Debug,
        LevelFilter::Info | LevelFilter::Off => Level::Info,
        LevelFilter::Warn => Level::Warn,
        LevelFilter::Error => Level::Error,
    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -1,4 +1,4 @@
-#![cfg_attr(feature = "nightly-diagnostics", feature(panic_backtrace_config))]
+#![feature(panic_backtrace_config)]
 // poc-memory: graph-structured memory for AI assistants
 //
@ -333,18 +333,6 @@ enum AdminCmd {
        #[arg(long)]
        stats: bool,
    },
    /// Print normalized user/assistant messages from a transcript JSONL file
    #[command(name = "transcript-tail")]
    TranscriptTail {
        /// Transcript JSONL path
        path: String,
        /// Maximum number of messages to print
        #[arg(long, short = 'n', default_value_t = 40)]
        count: usize,
        /// Print newest messages first instead of chronological order
        #[arg(long)]
        newest_first: bool,
    },
 }
 /// Print help with subcommands expanded to show nested commands.
@ -470,15 +458,12 @@ impl Run for AdminCmd {
            Self::Dedup { apply } => cli::admin::cmd_dedup(apply).await,
            Self::DailyCheck    => cli::admin::cmd_daily_check().await,
            Self::LoadContext { stats } => cli::node::cmd_load_context(stats).await,
            Self::TranscriptTail { path, count, newest_first }
                => cli::admin::cmd_transcript_tail(&path, count, newest_first),
        }
    }
 }
 #[tokio::main]
 async fn main() {
    #[cfg(feature = "nightly-diagnostics")]
    std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short);
    // Handle --help ourselves for expanded subcommand display
@ -510,3 +495,4 @@ async fn main() {
        process::exit(1);
    }
 }
--- a/src/mind/log.rs
+++ b/src/mind/log.rs
@ -3,7 +3,7 @@ use std::fs::{File, OpenOptions};
 use std::io::Write;
 use std::path::{Path, PathBuf};
 use crate::agent::context::AstNode;
-use crate::conversation::JsonlBackwardIter;
+use crate::hippocampus::transcript::JsonlBackwardIter;
 use memmap2::Mmap;
 pub struct ConversationLog {
@ -78,6 +78,6 @@ pub struct TailNodes {
 impl TailNodes {
    pub fn iter(&self) -> impl Iterator<Item = AstNode> + '_ {
        JsonlBackwardIter::new(&self.mmap)
-            .filter_map(|(_, bytes)| serde_json::from_slice::<AstNode>(bytes).ok())
+            .filter_map(|bytes| serde_json::from_slice::<AstNode>(bytes).ok())
    }
 }
--- a/src/mind/mod.rs
+++ b/src/mind/mod.rs
@ -419,9 +419,7 @@ impl Mind {
        let subconscious = Arc::new(crate::Mutex::new(Subconscious::new()));
        subconscious.lock().await.init_output_tool(subconscious.clone());
-        let unconscious = Arc::new(crate::Mutex::new(
+        let unconscious = Arc::new(crate::Mutex::new(Unconscious::new()));
            Unconscious::new(agent.client.clone()),
        ));
        // Spawn the unconscious loop on its own task
        if !config.no_agents {
@ -469,11 +467,8 @@ impl Mind {
                        };
                        // Spawn agents outside lock
                        let client = unc.lock().await.client.clone();
                        for (idx, name, auto) in to_spawn {
-                            match crate::mind::unconscious::prepare_spawn(
+                            match crate::mind::unconscious::prepare_spawn(&name, auto, wake.clone()).await {
                                &name, auto, wake.clone(), client.clone(),
                            ).await {
                                Ok(result) => unc.lock().await.complete_spawn(idx, result),
                                Err(auto) => unc.lock().await.abort_spawn(idx, auto),
                            }
@ -693,7 +688,7 @@ impl Mind {
            }
        });
-        let _sub_handle: Option<tokio::task::JoinHandle<()>> = None;
+        let mut sub_handle: Option<tokio::task::JoinHandle<()>> = None;
        // Start finetune scoring at startup (scores existing conversation)
        if !self.config.no_agents {
@ -743,7 +738,6 @@ impl Mind {
                _ = tokio::time::sleep(timeout), if !has_input => _dmn_expired = true,
            }
            /*
            if !self.config.no_agents {
                if sub_handle.as_ref().map_or(true, |h| h.is_finished()) {
                    let sub = self.subconscious.clone();
@ -755,7 +749,6 @@ impl Mind {
                    }));
                }
            }
            */
            // Check for pending user input → push to agent context and start turn
            let pending = self.shared.lock().unwrap().take_pending_input();
--- a/src/mind/subconscious.rs
+++ b/src/mind/subconscious.rs
@ -631,7 +631,7 @@ impl Subconscious {
            {
                let mut st = forked.state.lock().await;
                st.provenance = auto.name.clone();
-                st.sampling.temperature = auto.temperature;
+                st.temperature = auto.temperature;
                // Surface agent gets near-interactive priority;
                // other subconscious agents get lower priority.
                st.priority = Some(if auto.name == "surface" { 1 } else { auto.priority });
--- a/src/mind/unconscious.rs
+++ b/src/mind/unconscious.rs
@ -73,15 +73,10 @@ pub struct Unconscious {
    last_health_check: Option<Instant>,
    /// Notified when agent state changes (finished, toggled)
    pub wake: std::sync::Arc<tokio::sync::Notify>,
    /// Shared API client — cloned (cheap) into each spawned agent's
    /// Agent::new call so they all share the manifest cache and
    /// gRPC endpoint state. Override `.model` on the clone when a
    /// per-agent backend differs from the default.
    pub client: crate::agent::api::ApiClient,
 }
 impl Unconscious {
-    pub fn new(client: crate::agent::api::ApiClient) -> Self {
+    pub fn new() -> Self {
        let enabled_map = load_enabled_config();
        // Scan all .agent files, exclude subconscious-* and surface-observe
@ -125,7 +120,6 @@ impl Unconscious {
            graph_health: None,
            last_health_check: None,
            wake: std::sync::Arc::new(tokio::sync::Notify::new()),
            client,
        }
    }
@ -140,8 +134,7 @@ impl Unconscious {
            let agent_name = self.agents[idx].name.clone();
            let auto = self.agents[idx].auto.take().unwrap();
            let wake = self.wake.clone();
-            let client = self.client.clone();
+            match prepare_spawn(&agent_name, auto, wake).await {
            match prepare_spawn(&agent_name, auto, wake, client).await {
                Ok(result) => self.complete_spawn(idx, result),
                Err(auto) => self.abort_spawn(idx, auto),
            }
@ -257,12 +250,7 @@ pub struct SpawnResult {
 /// Called outside the Unconscious lock.
 /// On success, auto is consumed (moved into spawned task).
 /// On failure, auto is returned so it can be restored.
-pub async fn prepare_spawn(
+pub async fn prepare_spawn(name: &str, mut auto: AutoAgent, wake: std::sync::Arc<tokio::sync::Notify>) -> Result<SpawnResult, AutoAgent> {
    name: &str,
    mut auto: AutoAgent,
    wake: std::sync::Arc<tokio::sync::Notify>,
    base_client: crate::agent::api::ApiClient,
 ) -> Result<SpawnResult, AutoAgent> {
    dbglog!("[unconscious] spawning {}", name);
    let def = match defs::get_def(name) {
@ -307,10 +295,8 @@ pub async fn prepare_spawn(
    };
    // Unconscious agents have self-contained prompts — no standard context.
-    // Clone the shared client so we inherit the manifest cache and
+    let client = crate::agent::api::ApiClient::new(
-    // only override the model id per-agent.
+        &resolved.api_base, &resolved.api_key, &resolved.model_id);
    let mut client = base_client;
    client.model = resolved.model_id.clone();
    let agent = crate::agent::Agent::new(
        client, Vec::new(),
        app, None,
@ -321,7 +307,7 @@ pub async fn prepare_spawn(
        let mut st = agent.state.lock().await;
        st.provenance = auto.name.clone();
        st.priority = Some(auto.priority);
-        st.sampling.temperature = auto.temperature;
+        st.temperature = auto.temperature;
    }
    let agent_clone = agent.clone();
@ -343,9 +329,8 @@ impl Unconscious {
        self.reap_finished();
        let to_spawn = self.select_to_spawn();
        let wake = self.wake.clone();
        let client = self.client.clone();
        for (idx, name, auto) in to_spawn {
-            match prepare_spawn(&name, auto, wake.clone(), client.clone()).await {
+            match prepare_spawn(&name, auto, wake.clone()).await {
                Ok(result) => self.complete_spawn(idx, result),
                Err(auto) => self.abort_spawn(idx, auto),
            }
--- a/src/session.rs
+++ b/src/session.rs
@ -64,12 +64,7 @@ impl HookSession {
    /// Load from POC_SESSION_ID environment variable
    pub fn from_env() -> Option<Self> {
-        let session_id = std::env::var("POC_SESSION_ID").ok()?;
+        Self::from_id(std::env::var("POC_SESSION_ID").ok()?)
        let mut session = Self::from_id(session_id)?;
        if let Ok(path) = std::env::var("POC_TRANSCRIPT_PATH") {
            session.transcript_path = path;
        }
        Some(session)
    }
    /// Get the seen set for this session
--- a/src/subconscious/agents/bail-no-competing.sh
+++ b/src/subconscious/agents/bail-no-competing.sh
@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/bin/bash
 # Bail if another agent is in the same phase-group as us.
 #
 #   $1 = our pid file name (e.g. "pid-12345")
--- a/src/subconscious/defs.rs
+++ b/src/subconscious/defs.rs
@ -390,7 +390,7 @@ fn resolve_conversation(budget: Option<usize>) -> String {
    if !transcript.exists() { return String::new(); }
-    let Some(iter) = crate::conversation::TailMessages::open(&transcript.path) else {
+    let Some(iter) = crate::transcript::TailMessages::open(&transcript.path) else {
        return String::new();
    };
@ -401,14 +401,10 @@ fn resolve_conversation(budget: Option<usize>) -> String {
    let mut total_bytes = 0;
    let mut oldest_ts = String::new();
-    for message in iter {
+    for (role, content, ts) in iter {
        if total_bytes >= max_bytes { break; }
-        let content = message.text;
+        let name = if role == "user" { &app.user_name } else { &app.assistant_name };
-        let name = match message.role {
+        let formatted = if !ts.is_empty() {
            crate::conversation::TranscriptRole::User => &app.user_name,
            crate::conversation::TranscriptRole::Assistant => &app.assistant_name,
        };
        let formatted = if let Some(ts) = message.timestamp {
            oldest_ts = ts[..ts.floor_char_boundary(ts.len().min(19))].to_string();
            format!("**{}** {}: {}", name, &oldest_ts, content)
        } else {
--- a/src/subconscious/generate.rs
+++ b/src/subconscious/generate.rs
@ -4,10 +4,8 @@
 // given a context prefix and a skip predicate, generate what the model
 // would say as the next assistant turn.
 use std::sync::Arc;
 use crate::agent::api::{ApiClient, SamplingParams, StreamToken};
-use crate::agent::context::{AstNode, ContextState, WireChunk};
+use crate::agent::context::{AstNode, ContextState};
 use crate::agent::tokenizer;
 /// Generate an assistant continuation from the context up to `entry_idx`,
@ -15,9 +13,6 @@ use crate::agent::tokenizer;
 /// assembly. The model is whichever `client` points at — the default
 /// runtime client for memory-ablation alternates, a test-model client
 /// for F7 comparison.
 ///
 /// Uses a fresh ephemeral gRPC session (no cross-call KV reuse): one
 /// Open / Append / Generate round-trip, then the session is dropped.
 pub async fn gen_continuation<F>(
    context: &ContextState,
    entry_idx: usize,
@ -26,32 +21,17 @@ pub async fn gen_continuation<F>(
 ) -> anyhow::Result<String>
 where F: FnMut(&AstNode) -> bool,
 {
-    let (mut chunks, images) = context.wire_chunks(0..entry_idx, skip);
+    let (mut prompt, images, _) = context.wire_prompt(0..entry_idx, skip);
-    // Assistant-turn prologue.
+    prompt.push(tokenizer::IM_START);
-    let prologue = {
+    prompt.extend(tokenizer::encode("assistant\n"));
        let mut t = vec![tokenizer::IM_START];
        t.extend(tokenizer::encode("assistant\n"));
        t
    };
    match chunks.last_mut() {
        Some(WireChunk::Tokens(last)) => last.extend(prologue),
        _ => chunks.push(WireChunk::Tokens(prologue)),
    }
    let sampling = SamplingParams {
        temperature: 0.6,
        top_p: 0.95,
        top_k: 20,
        max_tokens: 4096,
    };
-
+    let (mut rx, _guard) = client.stream_completion_mm(&prompt, &images, sampling, Some(-5));
    // Ephemeral per-call session — opens on first touch, drops when
    // `_guard` drops at function end.
    let session_lock = Arc::new(crate::Mutex::new(None));
    let (mut rx, _guard) = client.stream_session_mm(
        session_lock, chunks, images, 0, sampling, Some(-5), None,
    );
    let mut tokens = Vec::new();
    while let Some(tok) = rx.recv().await {
--- a/src/subconscious/learn.rs
+++ b/src/subconscious/learn.rs
@ -1,148 +1,100 @@
-// learn.rs — Memory importance scoring over the salience gRPC protocol.
+// training.rs — Memory importance scoring via /v1/score
 //
-// Three scoring modes, all built on call_score():
+// Three scoring modes, all built on the same call_score() primitive:
 //
 // score_memories()  — Full N×M matrix (memories × responses) for the
-//                     debug screen. Expensive: N+1 sessions/calls.
+//                     debug screen. Expensive: N+1 API calls.
 //
-// score_memory()    — Single memory importance. Scores the 50 messages
+// memory_score()    — Single memory importance. Scores the 50 messages
 //                     after it was surfaced, with/without that memory.
-//                     2 calls.
+//                     2 API calls.
 //
 // finetune_score()  — Identifies training candidates. Scores recent
 //                     messages with all memories stripped. Responses
 //                     with high divergence depend on memories the model
-//                     hasn't internalized. 2 calls.
+//                     hasn't internalized. 2 API calls.
 //
 // Each call opens an ephemeral gRPC session (reusing the shared
 // tonic Channel on `ApiClient`), pushes the prompt through as
 // interleaved tokens + AppendImage calls, runs Generate with
 // max_tokens=0 + logprobs_ranges over the scored positions, collects
 // each Token event's sampled_logprob, then drops the SessionHandle —
 // which triggers a best-effort CloseSession over the shared channel.
 use std::sync::Arc;
 use crate::agent::api::ApiClient;
 use crate::agent::api::salience::{SessionHandle, pb};
 use crate::agent::context::{
-    Ast, AstNode, ContextState, Role, WireChunk, WireImage,
+    Ast, AstNode, ContextState, Role, WireImage,
    is_assistant, is_memory_node, memory_key, render_branch_text, render_prior_context,
 };
 use crate::agent::tokenizer;
 use crate::mind::{MindState, MindTriggered, TaskHandle};
 use crate::subconscious::generate::gen_continuation;
 const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
 // ── Score API ───────────────────────────────────────────────────
-#[derive(Debug, Clone)]
+#[derive(serde::Deserialize)]
 struct ScoreResult {
    total_logprob: f64,
 }
-/// Find each <|vision_start|>...<|vision_end|> run in the flat prompt
+#[derive(serde::Deserialize)]
-/// and pair it with the matching entry in `images`. Returns a list
+struct ScoreResponse {
-/// of `ImageAttachment` with absolute pad-range positions, ready
+    scores: Vec<ScoreResult>,
-/// to drop into `GenerateRequest.images`.
+}
-fn pair_images_to_ranges(
+
-    prompt: &[u32],
+fn http_client() -> crate::agent::api::http::HttpClient {
-    images: &[WireImage],
+    crate::agent::api::http::HttpClient::builder()
-) -> Vec<pb::ImageAttachment> {
+        .timeout(SCORE_TIMEOUT)
-    let mut out: Vec<pb::ImageAttachment> = Vec::new();
+        .build()
    let mut cur = 0;
    let mut img_idx = 0;
    while cur < prompt.len() {
        if prompt[cur] == tokenizer::VISION_START {
            let end_rel = prompt[cur..].iter()
                .position(|&t| t == tokenizer::VISION_END)
                .unwrap_or_else(|| panic!(
                    "unmatched VISION_START at position {} in prompt", cur));
            let end = cur + end_rel + 1;
            let img = images.get(img_idx)
                .unwrap_or_else(|| panic!(
                    "image index {} out of range for {} images", img_idx, images.len()));
            out.push(pb::ImageAttachment {
                bytes: img.bytes.clone(),
                mime: img.mime.clone(),
                pad_range_start: cur as u32,
                pad_range_end: end as u32,
            });
            img_idx += 1;
            cur = end;
        } else {
            cur += 1;
        }
    }
    out
 }
 async fn call_score(
    http: &crate::agent::api::http::HttpClient,
    client: &ApiClient,
    prompt: &[u32],
    images: &[WireImage],
    ranges: &[(usize, usize)],
    priority: Option<i32>,
 ) -> anyhow::Result<Vec<ScoreResult>> {
    use futures::StreamExt;
    // Nothing to score — skip the round-trip.
    if ranges.is_empty() {
        return Ok(Vec::new());
    }
    let url = format!("{}/score", client.base_url());
    let auth = format!("Bearer {}", client.api_key());
    let mut body = serde_json::json!({
        "model": client.model,
        "prompt": prompt,
        "score_ranges": ranges,
        "logprobs": 1,
    });
    if !images.is_empty() {
        use base64::Engine;
        let b64 = base64::engine::general_purpose::STANDARD;
        let uris: Vec<String> = images.iter()
            .map(|img| format!("data:{};base64,{}", img.mime, b64.encode(&img.bytes)))
            .collect();
        body["multi_modal_data"] = serde_json::json!({ "image": uris });
    }
    if let Some(p) = priority {
        body["priority"] = serde_json::json!(p);
    }
    let response = http
        .send_json("POST", &url, &[
            ("authorization", &auth),
        ], &body)
        .await?;
-    let images_pb = pair_images_to_ranges(prompt, images);
+    let status = response.status();
-    let mut handle = SessionHandle::open(client).await?;
+    let body: serde_json::Value = response.json().await?;
-    // Final Generate: max_tokens=0 so the server runs prefill of the
+    if !status.is_success() {
-    // full prompt and emits Token events for each position covered
+        let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("unknown error");
-    // by logprobs_ranges, then Done. logprob_top_k=0 means "just
+        anyhow::bail!("score API HTTP {}: {}", status, msg);
-    // the sampled (prompt) token's logprob" — no top-k alternatives,
+    }
-    // which is all call_score historically needed. Images attach
+    if let Some(err) = body.get("error").and_then(|e| e.as_str()) {
-    // inline via `images`; the prompt already contains their pre-
+        anyhow::bail!("score API error: {}", err);
    // expanded vision blocks at the declared ranges.
    let logprobs_ranges: Vec<pb::PositionRange> = ranges.iter()
        .map(|(s, e)| pb::PositionRange { start: *s as u32, end: *e as u32 })
        .collect();
    let req = pb::GenerateRequest {
        session_id: handle.session_id.clone(),
        append_tokens: prompt.to_vec(),
        offset: handle.committed_len,
        truncating: false,
        max_tokens: 0,
        logprobs_ranges,
        logprob_top_k: 0,
        readout_ranges: Vec::new(),
        temperature: 0.0,
        top_p: 0.0,
        top_k: 0,
        stop_token_ids: Vec::new(),
        priority: priority.unwrap_or(0),
        images: images_pb,
    };
    let mut stream = handle.generate(req).await?;
    let mut totals = vec![0.0f64; ranges.len()];
    while let Some(event) = stream.next().await {
        let event = event
            .map_err(|s| anyhow::anyhow!("score Generate stream: {}", s))?;
        let Some(inner) = event.event else { continue };
        match inner {
            pb::generate_event::Event::Token(t) => {
                if !t.has_sampled_logprob { continue; }
                let pos = t.position as usize;
                for (i, (start, end)) in ranges.iter().enumerate() {
                    if pos >= *start && pos < *end {
                        totals[i] += t.sampled_logprob as f64;
                    }
                }
            }
            pb::generate_event::Event::Done(_) => break,
        }
    }
-    Ok(totals.into_iter()
+    let result: ScoreResponse = serde_json::from_value(body)
-        .map(|total_logprob| ScoreResult { total_logprob })
+        .map_err(|e| anyhow::anyhow!("failed to parse score response: {}", e))?;
-        .collect())
+    Ok(result.scores)
 }
 /// Compute per-position logprob divergence: how much worse the model
@ -158,6 +110,7 @@ fn divergence(baseline: &[ScoreResult], without: &[ScoreResult]) -> Vec<f64> {
 /// Score two message sets and return total divergence.
 async fn score_divergence<F>(
    http: &crate::agent::api::http::HttpClient,
    client: &ApiClient,
    context: &ContextState,
    range: std::ops::Range<usize>,
@ -170,9 +123,9 @@ where F: FnMut(&AstNode) -> bool,
        context.wire_prompt(range.clone(), |_| false);
    let (without_tokens, without_images, without_ranges) =
        context.wire_prompt(range, skip);
-    let baseline = call_score(client, &baseline_tokens, &baseline_images,
+    let baseline = call_score(http, client, &baseline_tokens, &baseline_images,
                              &baseline_ranges, priority).await?;
-    let without = call_score(client, &without_tokens, &without_images,
+    let without = call_score(http, client, &without_tokens, &without_images,
                             &without_ranges, priority).await?;
    let divs = divergence(&baseline, &without);
    Ok((divs, baseline))
@ -209,13 +162,14 @@ pub async fn score_memories(
    dbglog!("[scoring-full] starting: {} memories × {} responses",
        total, response_indices.len());
    let http = http_client();
    let activity = crate::agent::start_activity(agent, "scoring: baseline").await;
    let (baseline_tokens, baseline_images, baseline_ranges) = {
        let ctx = agent.context.lock().await;
        ctx.wire_prompt(0..ctx.conversation().len(), |_| false)
    };
-    let baseline = call_score(client, &baseline_tokens, &baseline_images,
+    let baseline = call_score(&http, client, &baseline_tokens, &baseline_images,
                              &baseline_ranges, Some(5)).await?;
    dbglog!("[scoring-full] baseline done ({} response scores)", baseline.len());
@ -226,7 +180,7 @@ pub async fn score_memories(
            let ctx = agent.context.lock().await;
            ctx.wire_prompt(0..ctx.conversation().len(), |n| memory_key(n) == Some(key.as_str()))
        };
-        let row = match call_score(client, &tokens, &images, &ranges, Some(5)).await {
+        let row = match call_score(&http, client, &tokens, &images, &ranges, Some(5)).await {
            Ok(without) => {
                let divs = divergence(&baseline, &without);
                let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
@ -240,23 +194,25 @@ pub async fn score_memories(
                vec![0.0; baseline.len()]
            }
        };
-        // Write this memory's scores to the live AST nodes via the
+        // Write this memory's scores to the live AST nodes
        // focused setter — keeps the AST mutation surface narrow.
        {
            let mut ctx = agent.context.lock().await;
            let mut set_count = 0;
            for (resp_idx, &idx) in response_indices.iter().enumerate() {
-                let Some(&score) = row.get(resp_idx) else { continue };
+                if idx >= ctx.conversation().len() { continue; }
-                let normalized = if score > 0.01 { Some(score) } else { None };
+                let node = &mut ctx.conversation_mut()[idx];
-                ctx.set_branch_memory_score(
+                if let AstNode::Branch {
-                    crate::agent::context::Section::Conversation,
+                    role: Role::Assistant, memory_scores, ..
-                    idx,
+                } = node {
-                    &key,
+                    if let Some(&score) = row.get(resp_idx) {
-                    normalized,
+                        if score > 0.01 {
-                );
+                            memory_scores.insert(key.clone(), score);
-                if normalized.is_some() {
+                            set_count += 1;
-                    set_count += 1;
+                        } else {
                            memory_scores.remove(key.as_str());
                        }
                    }
                }
            }
@ -307,7 +263,8 @@ pub async fn score_memory(
        return Ok(0.0);
    }
-    let (divs, _) = score_divergence(client, context, range,
+    let http = http_client();
    let (divs, _) = score_divergence(&http, client, context, range,
                                     |n| memory_key(n) == Some(key), Some(5)).await?;
    Ok(divs.iter().sum())
@ -365,6 +322,7 @@ where
    // Score oldest-first
    candidates.sort_by_key(|&(_, _, last)| last);
    let http = http_client();
    let mut scored = 0;
    let entries = context.conversation();
@ -399,7 +357,7 @@ where
        }
        activity.update(format!("scoring: {}/{} {}", scored + 1, total, key)).await;
-        match score_divergence(client, context, range,
+        match score_divergence(&http, client, context, range,
                               |n| memory_key(n) == Some(key), Some(5)).await {
            Ok((divs, _)) => {
                let n_responses = divs.len();
@ -547,7 +505,8 @@ pub async fn score_finetune(
        return Ok(Vec::new());
    }
-    let (divs, _) = score_divergence(client, context, range, is_memory_node, Some(5)).await?;
+    let http = http_client();
    let (divs, _) = score_divergence(&http, client, context, range, is_memory_node, Some(5)).await?;
    let mut results: Vec<(usize, f64)> = response_positions.iter()
        .enumerate()
@ -845,10 +804,8 @@ pub async fn send_to_train(
        }
    });
    let http = http_client();
    let url = format!("{}/train", client.base_url());
    let http = crate::agent::api::http::HttpClient::builder()
        .timeout(std::time::Duration::from_secs(300))
        .build();
    let response = http.send_json("POST", &url, &[], &body).await?;
    let status = response.status();
--- a/src/subconscious/prompts.rs
+++ b/src/subconscious/prompts.rs
@ -104,21 +104,22 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
                item.classification, item.outlier_score));
        }
-		if let Some(community) = node.community_id {
+        if let Some(community) = node.community_id {
-			out.push_str(&format!("Community: {}  ", community));
+            out.push_str(&format!("Community: {}  ", community));
-		}
+        }
-		let deg = graph.degree(&item.key);
+        let deg = graph.degree(&item.key);
        let cc = graph.clustering_coefficient(&item.key);
-		// Hub-link ratio: what fraction of this node's edges go to hubs?
+        // Hub-link ratio: what fraction of this node's edges go to hubs?
-		let neighbors = graph.neighbors(&item.key);
+        let neighbors = graph.neighbors(&item.key);
        let hub_links = neighbors.iter()
            .filter(|(n, _)| graph.degree(n) >= hub_thresh)
            .count();
        let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
-		let is_hub = deg >= hub_thresh;
+        let is_hub = deg >= hub_thresh;
-		out.push_str(&format!("Degree: {}  CC: {:.3}  Hub-link ratio: {:.0}% ({}/{})",
+        out.push_str(&format!("Degree: {}  CC: {:.3}  Hub-link ratio: {:.0}% ({}/{})",
-			deg, item.cc, hub_ratio * 100.0, hub_links, deg));
+            deg, cc, hub_ratio * 100.0, hub_links, deg));
        if is_hub {
            out.push_str("  ← THIS IS A HUB");
        } else if hub_ratio > 0.6 {
--- a/src/user/context.rs
+++ b/src/user/context.rs
@ -43,7 +43,6 @@ impl ConsciousScreen {
                        name: format!("mem: {}", key),
                        tokens: node.tokens(),
                        content: text.clone(),
                        token_ids: leaf.token_ids().to_vec(),
                        children: Vec::new(),
                        status: score.map(|s| format!("{:.2}", s)).unwrap_or_default(),
                    });
@ -56,7 +55,6 @@ impl ConsciousScreen {
                name: format!("Memory nodes ({})", mem_children.len()),
                tokens: mem_tokens,
                content: String::new(),
                token_ids: Vec::new(),
                children: mem_children,
                status: format!("{} scored, {} unscored", scored, unscored),
            });
@ -72,13 +70,11 @@ impl ConsciousScreen {
                    AstNode::Leaf(leaf) => leaf.body().text().to_string(),
                    _ => String::new(),
                },
                token_ids: node.token_ids(),
                children: match node {
                    AstNode::Branch { children, .. } => children.iter()
                        .map(|c| SectionView {
                            name: c.label(), tokens: c.tokens(),
                            content: match c { AstNode::Leaf(l) => l.body().text().to_string(), _ => String::new() },
                            token_ids: match c { AstNode::Leaf(l) => l.token_ids().to_vec(), _ => c.token_ids() },
                            children: Vec::new(), status: String::new(),
                        }).collect(),
                    _ => Vec::new(),
@ -105,7 +101,6 @@ impl ConsciousScreen {
            name: format!("Conversation ({} entries)", conv_children.len()),
            tokens: conv_tokens,
            content: String::new(),
            token_ids: Vec::new(),
            children: conv_children,
            status: String::new(),
        });
--- a/src/user/mod.rs
+++ b/src/user/mod.rs
@ -74,7 +74,7 @@ fn truncate(s: &str, max: usize) -> String {
 }
 /// A screen that can draw itself and handle input.
-trait ScreenView {
+trait ScreenView: Send {
    fn tick(&mut self, frame: &mut ratatui::Frame, area: ratatui::layout::Rect,
            events: &[ratatui::crossterm::event::Event], app: &mut App);
    fn label(&self) -> &'static str;
@ -291,21 +291,22 @@ async fn start(cli: crate::user::CliArgs) -> Result<()> {
    ui_handle.join().unwrap_or_else(|_| Err(anyhow::anyhow!("UI thread panicked")))
 }
-async fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) {
+fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) {
-    let mut ag = mind.agent.state.lock().await;
+    if let Ok(mut ag) = mind.agent.state.try_lock() {
-    let next = match ag.reasoning_effort.as_str() {
+        let next = match ag.reasoning_effort.as_str() {
-        "none" => "low",
+            "none" => "low",
-        "low" => "high",
+            "low" => "high",
-        _ => "none",
+            _ => "none",
-    };
+        };
-    ag.reasoning_effort = next.to_string();
+        ag.reasoning_effort = next.to_string();
-    let label = match next {
+        let label = match next {
-        "none" => "off (monologue hidden)",
+            "none" => "off (monologue hidden)",
-        "low" => "low (brief monologue)",
+            "low" => "low (brief monologue)",
-        "high" => "high (full monologue)",
+            "high" => "high (full monologue)",
-        _ => next,
+            _ => next,
-    };
+        };
-    ag.notify(format!("reasoning: {}", label));
+        ag.notify(format!("reasoning: {}", label));
    }
 }
 async fn hotkey_kill_processes(mind: &crate::mind::Mind) {
@ -591,7 +592,7 @@ async fn run(
                    } else if key.modifiers.contains(KeyModifiers::CONTROL) {
                        match key.code {
                            KeyCode::Char('c') => { app.should_quit = true; }
-                            KeyCode::Char('r') => hotkey_cycle_reasoning(mind).await,
+                            KeyCode::Char('r') => hotkey_cycle_reasoning(mind),
                            KeyCode::Char('k') => hotkey_kill_processes(mind).await,
                            KeyCode::Char('p') => hotkey_cycle_autonomy(mind),
                            _ => {}
@ -755,11 +756,6 @@ fn restore_stderr(original_fd: std::os::fd::RawFd) {
 #[tokio::main]
 pub async fn main() {
    // Install target-routed file logger: `target: "grpc"` records go to
    // ~/.consciousness/logs/daemon/grpc.log, everything else to debug.log.
    // Level from RUST_LOG, defaulting to info.
    let _ = crate::logging::init();
    // Reap channel-daemon zombies via a SIGCHLD handler that only touches
    // PIDs listed in channels_dir(). Avoids SIGCHLD=SIG_IGN, which would
    // break tokio::process::Command::wait() (kernel auto-reap → ECHILD).
--- a/src/user/subconscious.rs
+++ b/src/user/subconscious.rs
@ -207,7 +207,6 @@ impl SubconsciousScreen {
                name: key.clone(),
                tokens: 0,
                content: val.clone(),
                token_ids: Vec::new(),
                children: Vec::new(),
                status: String::new(),
            }
@ -239,7 +238,6 @@ impl SubconsciousScreen {
                    name: format!("Conversation ({} entries)", conv_children.len()),
                    tokens: conv_children.iter().map(|c| c.tokens).sum(),
                    content: String::new(),
                    token_ids: Vec::new(),
                    children: conv_children,
                    status: String::new(),
                });
--- a/src/user/widgets.rs
+++ b/src/user/widgets.rs
@ -8,18 +8,11 @@ use ratatui::{
 };
 use crate::agent::context::{AstNode, Ast, NodeBody};
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone)]
 pub struct SectionView {
    pub name: String,
    pub tokens: usize,
    pub content: String,
    /// Token-id stream for this subtree, displayed in place of
    /// `content` when the tree's show-tokens mode is on. Populated
    /// from `leaf.token_ids()` / `node.token_ids()` for views built
    /// from the AST; empty for views that don't have a corresponding
    /// AST node (subconscious entries, etc.), in which case the
    /// token view falls back to the text content.
    pub token_ids: Vec<u32>,
    pub children: Vec<SectionView>,
    /// Extra status text shown after the token count.
    pub status: String,
@ -39,7 +32,6 @@ fn node_to_view(node: &AstNode) -> SectionView {
                name,
                tokens: node.tokens(),
                content: leaf.body().text().to_string(),
                token_ids: leaf.token_ids().to_vec(),
                children: Vec::new(),
                status,
            }
@ -52,7 +44,6 @@ fn node_to_view(node: &AstNode) -> SectionView {
                name: node.label(),
                tokens: node.tokens(),
                content: String::new(),
                token_ids: node.token_ids(),
                children: child_views,
                status: String::new(),
            }
@ -63,12 +54,10 @@ fn node_to_view(node: &AstNode) -> SectionView {
 pub fn section_to_view(name: &str, nodes: &[AstNode]) -> SectionView {
    let children: Vec<SectionView> = nodes.iter().map(|n| node_to_view(n)).collect();
    let total_tokens: usize = nodes.iter().map(|n| n.tokens()).sum();
    let token_ids: Vec<u32> = nodes.iter().flat_map(|n| n.token_ids()).collect();
    SectionView {
        name: name.to_string(),
        tokens: total_tokens,
        content: String::new(),
        token_ids,
        children,
        status: String::new(),
    }
@ -115,7 +104,7 @@ pub fn format_ts_age(ts: i64) -> String {
 /// Key legend for SectionTree panes.
 pub fn tree_legend() -> Line<'static> {
    Line::styled(
-        " ↑↓:nav  →/Enter:expand  ←:collapse  e:expand  c:collapse  v:toggle tokens/text  PgUp/Dn ",
+        " ↑↓:nav  →/Enter:expand  ←:collapse  e:expand all  c:collapse all  PgUp/Dn  Home/End ",
        Style::default().fg(Color::DarkGray),
    )
 }
@ -196,19 +185,11 @@ pub struct SectionTree {
    pub selected: Option<usize>,
    pub expanded: std::collections::HashSet<usize>,
    pub scroll: super::scroll_pane::ScrollPaneState,
    /// When true, render `token_ids` as space-separated IDs in place
    /// of `content` in expanded panels. Toggled with 'v'.
    pub show_tokens: bool,
 }
 impl SectionTree {
    pub fn new() -> Self {
-        Self {
+        Self { selected: None, expanded: std::collections::HashSet::new(), scroll: super::scroll_pane::ScrollPaneState::new() }
            selected: None,
            expanded: std::collections::HashSet::new(),
            scroll: super::scroll_pane::ScrollPaneState::new(),
            show_tokens: false,
        }
    }
    fn total_nodes(&self, sections: &[SectionView]) -> usize {
@ -283,9 +264,6 @@ impl SectionTree {
            KeyCode::Char('c') => {
                self.expanded.clear();
            }
            KeyCode::Char('v') => {
                self.show_tokens = !self.show_tokens;
            }
            _ => {}
        }
        self.scroll_to_selected(height);
@ -348,12 +326,7 @@ impl SectionTree {
                }
            } else if has_content {
                let content_indent = format!("{}    │ ", "  ".repeat(depth + 1));
-                let body = if self.show_tokens && !section.token_ids.is_empty() {
+                let content_lines: Vec<&str> = section.content.lines().collect();
                    format_token_ids_wrapped(&section.token_ids)
                } else {
                    section.content.clone()
                };
                let content_lines: Vec<&str> = body.lines().collect();
                let show = content_lines.len().min(50);
                for line in &content_lines[..show] {
                    lines.push(Line::styled(
@ -371,16 +344,3 @@ impl SectionTree {
        }
    }
 }
 /// Format token IDs for the content panel: space-separated, wrapped
 /// at 12 ids per line so they fit comfortably in a pane.
 fn format_token_ids_wrapped(ids: &[u32]) -> String {
    let mut out = String::new();
    for (i, id) in ids.iter().enumerate() {
        if i > 0 {
            if i % 12 == 0 { out.push('\n'); } else { out.push(' '); }
        }
        out.push_str(&id.to_string());
    }
    out
 }