consciousness: cache expensive graph metrics

Seed default identity nodes during init
Improve hook agent diagnostics
2026-06-15 21:16:40 -05:00 · 2026-06-15 16:10:02 -05:00 · 2026-06-15 15:47:59 -05:00 · 2026-06-15 13:51:22 -05:00 · 2026-06-15 13:51:22 -05:00 · 2026-06-15 13:51:22 -05:00
49 changed files with 4680 additions and 1180 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -165,6 +165,39 @@ dependencies = [
 "tree-sitter-yaml",
 ]

+[[package]]
+name = "async-stream"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
+dependencies = [
+ "async-stream-impl",
+ "futures-core",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "async-stream-impl"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "async-trait"
+version = "0.1.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
 [[package]]
 name = "atomic"
 version = "0.6.1"
@ -208,6 +241,53 @@ dependencies = [
 "fs_extra",
 ]

+[[package]]
+name = "axum"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
+dependencies = [
+ "async-trait",
+ "axum-core",
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "sync_wrapper",
+ "tower 0.5.3",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+]
+
 [[package]]
 name = "base64"
 version = "0.13.1"
@ -491,6 +571,7 @@ dependencies = [
 "anyhow",
 "ast-grep-core",
 "ast-grep-language",
+ "async-stream",
 "base64 0.22.1",
 "bytes",
 "capnp",
@ -518,11 +599,14 @@ dependencies = [
 "notify-debouncer-mini",
 "paste",
 "peg",
+ "prost",
+ "protoc-bin-vendored",
 "ratatui",
 "redb",
 "regex",
 "rustls",
 "rustls-native-certs",
+ "rustls-pemfile",
 "serde",
 "serde_json",
 "serde_urlencoded",
@ -531,7 +615,10 @@ dependencies = [
 "tokenizers",
 "tokio",
 "tokio-rustls",
+ "tokio-stream",
 "tokio-util",
+ "tonic",
+ "tonic-build",
 "tui-markdown",
 "tui-textarea-2",
 "uuid",
@ -1064,6 +1151,12 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"

+[[package]]
+name = "fixedbitset"
+version = "0.5.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
+
 [[package]]
 name = "flate2"
 version = "1.1.9"
@ -1288,6 +1381,31 @@ dependencies = [
 "regex-syntax",
 ]

+[[package]]
+name = "h2"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http",
+ "indexmap 2.14.0",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
 [[package]]
 name = "hashbrown"
 version = "0.15.5"
@ -1393,6 +1511,12 @@ version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"

+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
 [[package]]
 name = "hyper"
 version = "1.9.0"
@ -1403,9 +1527,11 @@ dependencies = [
 "bytes",
 "futures-channel",
 "futures-core",
+ "h2",
 "http",
 "http-body",
 "httparse",
+ "httpdate",
 "itoa",
 "pin-project-lite",
 "smallvec",
@ -1413,6 +1539,19 @@ dependencies = [
 "want",
 ]

+[[package]]
+name = "hyper-timeout"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
+dependencies = [
+ "hyper",
+ "hyper-util",
+ "pin-project-lite",
+ "tokio",
+ "tower-service",
+]
+
 [[package]]
 name = "hyper-util"
 version = "0.1.20"
@ -1420,11 +1559,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
 dependencies = [
 "bytes",
+ "futures-channel",
+ "futures-util",
 "http",
 "http-body",
 "hyper",
+ "libc",
 "pin-project-lite",
+ "socket2 0.6.3",
 "tokio",
+ "tower-service",
+ "tracing",
 ]

 [[package]]
@ -1485,6 +1630,16 @@ version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09e54e57b4c48b40f7aec75635392b12b3421fa26fe8b4332e63138ed278459c"

+[[package]]
+name = "indexmap"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
+dependencies = [
+ "autocfg",
+ "hashbrown 0.12.3",
+]
+
 [[package]]
 name = "indexmap"
 version = "2.14.0"
@ -1858,6 +2013,12 @@ dependencies = [
 "xml5ever",
 ]

+[[package]]
+name = "matchit"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
+
 [[package]]
 name = "memchr"
 version = "2.8.0"
@ -1888,6 +2049,12 @@ dependencies = [
 "autocfg",
 ]

+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@ -1938,6 +2105,12 @@ dependencies = [
 "syn 2.0.117",
 ]

+[[package]]
+name = "multimap"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
+
 [[package]]
 name = "new_debug_unreachable"
 version = "1.0.6"
@ -2233,6 +2406,16 @@ dependencies = [
 "sha2",
 ]

+[[package]]
+name = "petgraph"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
+dependencies = [
+ "fixedbitset 0.5.7",
+ "indexmap 2.14.0",
+]
+
 [[package]]
 name = "phf"
 version = "0.11.3"
@ -2285,6 +2468,26 @@ dependencies = [
 "siphasher",
 ]

+[[package]]
+name = "pin-project"
+version = "1.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517"
+dependencies = [
+ "pin-project-internal",
+]
+
+[[package]]
+name = "pin-project-internal"
+version = "1.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
 [[package]]
 name = "pin-project-lite"
 version = "0.2.17"
@ -2304,7 +2507,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07"
 dependencies = [
 "base64 0.22.1",
- "indexmap",
+ "indexmap 2.14.0",
 "quick-xml",
 "serde",
 "time",
@ -2378,6 +2581,122 @@ dependencies = [
 "yansi",
 ]

+[[package]]
+name = "prost"
+version = "0.13.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
+dependencies = [
+ "bytes",
+ "prost-derive",
+]
+
+[[package]]
+name = "prost-build"
+version = "0.13.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
+dependencies = [
+ "heck",
+ "itertools",
+ "log",
+ "multimap",
+ "once_cell",
+ "petgraph",
+ "prettyplease",
+ "prost",
+ "prost-types",
+ "regex",
+ "syn 2.0.117",
+ "tempfile",
+]
+
+[[package]]
+name = "prost-derive"
+version = "0.13.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
+dependencies = [
+ "anyhow",
+ "itertools",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "prost-types"
+version = "0.13.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
+dependencies = [
+ "prost",
+]
+
+[[package]]
+name = "protoc-bin-vendored"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1c381df33c98266b5f08186583660090a4ffa0889e76c7e9a5e175f645a67fa"
+dependencies = [
+ "protoc-bin-vendored-linux-aarch_64",
+ "protoc-bin-vendored-linux-ppcle_64",
+ "protoc-bin-vendored-linux-s390_64",
+ "protoc-bin-vendored-linux-x86_32",
+ "protoc-bin-vendored-linux-x86_64",
+ "protoc-bin-vendored-macos-aarch_64",
+ "protoc-bin-vendored-macos-x86_64",
+ "protoc-bin-vendored-win32",
+]
+
+[[package]]
+name = "protoc-bin-vendored-linux-aarch_64"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c350df4d49b5b9e3ca79f7e646fde2377b199e13cfa87320308397e1f37e1a4c"
+
+[[package]]
+name = "protoc-bin-vendored-linux-ppcle_64"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a55a63e6c7244f19b5c6393f025017eb5d793fd5467823a099740a7a4222440c"
+
+[[package]]
+name = "protoc-bin-vendored-linux-s390_64"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1dba5565db4288e935d5330a07c264a4ee8e4a5b4a4e6f4e83fad824cc32f3b0"
+
+[[package]]
+name = "protoc-bin-vendored-linux-x86_32"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8854774b24ee28b7868cd71dccaae8e02a2365e67a4a87a6cd11ee6cdbdf9cf5"
+
+[[package]]
+name = "protoc-bin-vendored-linux-x86_64"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b38b07546580df720fa464ce124c4b03630a6fb83e05c336fea2a241df7e5d78"
+
+[[package]]
+name = "protoc-bin-vendored-macos-aarch_64"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89278a9926ce312e51f1d999fee8825d324d603213344a9a706daa009f1d8092"
+
+[[package]]
+name = "protoc-bin-vendored-macos-x86_64"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81745feda7ccfb9471d7a4de888f0652e806d5795b61480605d4943176299756"
+
+[[package]]
+name = "protoc-bin-vendored-win32"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95067976aca6421a523e491fce939a3e65249bac4b977adee0ee9771568e8aa3"
+
 [[package]]
 name = "pulldown-cmark"
 version = "0.13.3"
@ -2433,6 +2752,8 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
 dependencies = [
+ "libc",
+ "rand_chacha 0.3.1",
 "rand_core 0.6.4",
 ]

@ -2442,10 +2763,20 @@ version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
 dependencies = [
- "rand_chacha",
+ "rand_chacha 0.9.0",
 "rand_core 0.9.5",
 ]

+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.6.4",
+]
+
 [[package]]
 name = "rand_chacha"
 version = "0.9.0"
@ -2461,6 +2792,9 @@ name = "rand_core"
 version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.17",
+]

 [[package]]
 name = "rand_core"
@ -2709,6 +3043,15 @@ dependencies = [
 "security-framework",
 ]

+[[package]]
+name = "rustls-pemfile"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
+dependencies = [
+ "rustls-pki-types",
+]
+
 [[package]]
 name = "rustls-pki-types"
 version = "1.14.0"
@ -2831,7 +3174,7 @@ version = "1.0.149"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
 dependencies = [
- "indexmap",
+ "indexmap 2.14.0",
 "itoa",
 "memchr",
 "serde",
@ -2935,6 +3278,16 @@ version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"

+[[package]]
+name = "socket2"
+version = "0.5.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "socket2"
 version = "0.6.3"
@ -3049,6 +3402,12 @@ dependencies = [
 "unicode-ident",
 ]

+[[package]]
+name = "sync_wrapper"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+
 [[package]]
 name = "syntect"
 version = "5.3.0"
@ -3127,7 +3486,7 @@ dependencies = [
 "fancy-regex",
 "filedescriptor",
 "finl_unicode",
- "fixedbitset",
+ "fixedbitset 0.4.2",
 "hex",
 "lazy_static",
 "libc",
@ -3287,7 +3646,7 @@ dependencies = [
 "parking_lot",
 "pin-project-lite",
 "signal-hook-registry",
- "socket2",
+ "socket2 0.6.3",
 "tokio-macros",
 "windows-sys 0.61.2",
 ]
@ -3313,6 +3672,17 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "tokio-stream"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+
 [[package]]
 name = "tokio-util"
 version = "0.7.18"
@ -3327,6 +3697,130 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "tonic"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
+dependencies = [
+ "async-stream",
+ "async-trait",
+ "axum",
+ "base64 0.22.1",
+ "bytes",
+ "h2",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-timeout",
+ "hyper-util",
+ "percent-encoding",
+ "pin-project",
+ "prost",
+ "rustls-native-certs",
+ "rustls-pemfile",
+ "socket2 0.5.10",
+ "tokio",
+ "tokio-rustls",
+ "tokio-stream",
+ "tower 0.4.13",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tonic-build"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11"
+dependencies = [
+ "prettyplease",
+ "proc-macro2",
+ "prost-build",
+ "prost-types",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "tower"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "indexmap 1.9.3",
+ "pin-project",
+ "pin-project-lite",
+ "rand 0.8.5",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+]
+
 [[package]]
 name = "tree-sitter"
 version = "0.26.8"
@ -3885,7 +4379,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
 dependencies = [
 "anyhow",
- "indexmap",
+ "indexmap 2.14.0",
 "wasm-encoder",
 "wasmparser",
 ]
@ -3898,7 +4392,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
 dependencies = [
 "bitflags 2.11.0",
 "hashbrown 0.15.5",
- "indexmap",
+ "indexmap 2.14.0",
 "semver",
 ]

@ -4267,7 +4761,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
 dependencies = [
 "anyhow",
 "heck",
- "indexmap",
+ "indexmap 2.14.0",
 "prettyplease",
 "syn 2.0.117",
 "wasm-metadata",
@ -4298,7 +4792,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
 dependencies = [
 "anyhow",
 "bitflags 2.11.0",
- "indexmap",
+ "indexmap 2.14.0",
 "log",
 "serde",
 "serde_derive",
@ -4317,7 +4811,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
 dependencies = [
 "anyhow",
 "id-arena",
- "indexmap",
+ "indexmap 2.14.0",
 "log",
 "semver",
 "serde",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -18,6 +18,9 @@ name = "consciousness"
 version.workspace = true
 edition.workspace = true

+[features]
+nightly-diagnostics = []
+
 [dependencies]
 anyhow = "1"
 html2md = "0.2"
@ -61,6 +64,11 @@ futures = "0.3"
 capnp = "0.25"
 capnp-rpc = "0.25"

+tonic = { version = "0.12", features = ["tls", "tls-roots"] }
+prost = "0.13"
+async-stream = "0.3"
+tokio-stream = "0.1"
+
 tokenizers = "0.22"

 http = "1"
@ -74,10 +82,13 @@ imagesize = "0.14"
 rustls = "0.23"
 tokio-rustls = "0.26"
 rustls-native-certs = "0.8"
+rustls-pemfile = "2"
 serde_urlencoded = "0.7"

 [build-dependencies]
 capnpc = "0.25"
+tonic-build = { version = "0.12", default-features = false, features = ["prost", "transport"] }
+protoc-bin-vendored = "3"

 [lib]
 name = "consciousness"
--- a/build.rs
+++ b/build.rs
@ -13,4 +13,21 @@ fn main() {
        .file("schema/channel.capnp")
        .run()
        .expect("capnp compile failed (channel.capnp)");
+
+    // Generate salience.v1 gRPC client + message types from proto.
+    // Server side (python) is generated separately via grpcio-tools.
+    // Use vendored protoc so we don't require a system install.
+    let protoc = protoc_bin_vendored::protoc_bin_path()
+        .expect("vendored protoc not available for this platform");
+    // SAFETY: build script is single-threaded at this point; setting env
+    // before invoking tonic_build is the documented way to point it at a
+    // non-PATH protoc.
+    unsafe { std::env::set_var("PROTOC", protoc); }
+    tonic_build::configure()
+        .build_server(false)
+        .build_client(true)
+        .compile_protos(&["proto/salience.proto"], &["proto"])
+        .expect("tonic_build compile failed (salience.proto)");
+
+    println!("cargo:rerun-if-changed=proto/salience.proto");
 }
--- a/channels/telegram/src/main.rs
+++ b/channels/telegram/src/main.rs
@ -181,6 +181,8 @@ struct TelegramMessage {
    chat_id: i64,
    sender: String,
    text: String,
+    /// Absolute path to a downloaded media file (photo, etc.), if any.
+    media_path: Option<String>,
 }

 /// Fetch and parse pending updates from Telegram via long polling.
@ -206,19 +208,115 @@ async fn get_updates(
            let sender = msg["from"]["first_name"].as_str().unwrap_or("unknown").to_string();
            let chat_id = msg["chat"]["id"].as_i64().unwrap_or(0);

-            if let Some(text) = msg["text"].as_str() {
-                messages.push(TelegramMessage {
-                    update_id,
-                    chat_id,
-                    sender,
-                    text: text.to_string(),
-                });
-            }
+            // Photo: array of PhotoSize, largest is last. Download largest,
+            // surface message with [image: <path>] marker so the multimodal
+            // model can Read the image.
+            let (text, media_path) = if let Some(sizes) = msg["photo"].as_array() {
+                let caption = msg["caption"].as_str().unwrap_or("").to_string();
+                let largest = sizes.last();
+                let file_id = largest
+                    .and_then(|s| s["file_id"].as_str())
+                    .unwrap_or("");
+                if file_id.is_empty() {
+                    error!("telegram photo: missing file_id in update {update_id}");
+                    (caption, None)
+                } else {
+                    // Bound the download — HttpClient::request_timeout only covers
+                    // send_request, not body collect, so an indefinitely-slow body
+                    // would otherwise stall every subsequent poll.
+                    let dl = tokio::time::timeout(
+                        std::time::Duration::from_secs(60),
+                        download_telegram_file(client, token, file_id),
+                    ).await
+                        .unwrap_or_else(|_| Err("download timed out after 60s".into()));
+                    match dl {
+                        Ok(path) => (caption, Some(path)),
+                        Err(e) => {
+                            error!("telegram photo download failed (file_id={file_id}): {e}");
+                            // Surface what we have: caption plus a marker that
+                            // a photo was sent but couldn't be fetched.
+                            let marker = format!("[image: download failed: {e}]");
+                            let combined = if caption.is_empty() {
+                                marker
+                            } else {
+                                format!("{marker}\n{caption}")
+                            };
+                            (combined, None)
+                        }
+                    }
+                }
+            } else if let Some(text) = msg["text"].as_str() {
+                (text.to_string(), None)
+            } else {
+                // Other media types (voice, video, sticker, etc.) — skip for now,
+                // but log so we can extend later.
+                let kind = ["voice", "video", "sticker", "document", "audio", "animation"]
+                    .iter()
+                    .find(|k| !msg[**k].is_null())
+                    .copied()
+                    .unwrap_or("unknown");
+                info!("telegram: skipping non-text/photo message (kind={kind}, update_id={update_id})");
+                continue;
+            };
+
+            messages.push(TelegramMessage {
+                update_id,
+                chat_id,
+                sender,
+                text,
+                media_path,
+            });
        }
    }
    Ok(messages)
 }

+/// Resolve a Telegram file_id to a downloadable URL path via getFile.
+async fn get_file_path(
+    client: &HttpClient,
+    token: &str,
+    file_id: &str,
+) -> Result<String, Box<dyn std::error::Error>> {
+    let url = format!(
+        "https://api.telegram.org/bot{}/getFile?file_id={}",
+        token, file_id,
+    );
+    let response = client.get(&url).await?;
+    let body = response.text().await?;
+    let resp: serde_json::Value = serde_json::from_str(&body)
+        .map_err(|e| format!("getFile JSON parse error: {e}"))?;
+    if !resp["ok"].as_bool().unwrap_or(false) {
+        return Err(format!("getFile failed: {}", resp["description"].as_str().unwrap_or("?")).into());
+    }
+    let file_path = resp["result"]["file_path"].as_str()
+        .ok_or("getFile: missing result.file_path")?;
+    Ok(file_path.to_string())
+}
+
+/// Download a Telegram file by file_id into the channel media dir.
+/// Returns the absolute local path on success.
+async fn download_telegram_file(
+    client: &HttpClient,
+    token: &str,
+    file_id: &str,
+) -> Result<String, Box<dyn std::error::Error>> {
+    let file_path = get_file_path(client, token, file_id).await?;
+    let url = format!("https://api.telegram.org/file/bot{}/{}", token, file_path);
+    let response = client.get(&url).await?;
+    let status = response.status();
+    if !status.is_success() {
+        return Err(format!("file download failed: {status}").into());
+    }
+    let bytes = response.bytes().await?;
+
+    let ext = file_path.rsplit('.').next().filter(|e| !e.contains('/')).unwrap_or("dat");
+    let media_dir = log_dir().join("media");
+    std::fs::create_dir_all(&media_dir)?;
+    let dest = media_dir.join(format!("{file_id}.{ext}"));
+    std::fs::write(&dest, &bytes)?;
+    Ok(dest.to_string_lossy().to_string())
+}
+
 /// Send a text message to a Telegram chat.
 async fn send_message(
    client: &HttpClient,
@ -369,11 +467,19 @@ async fn poll_once(
        let sender_lower = msg.sender.to_lowercase();
        let channel = format!("telegram.{}", sender_lower);

-        channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &msg.text);
+        // If the message has media, prepend an [image: <abs_path>] marker
+        // so the multimodal model can Read the file directly.
+        let body = match &msg.media_path {
+            Some(path) if msg.text.is_empty() => format!("[image: {path}]"),
+            Some(path) => format!("[image: {path}]\n{}", msg.text),
+            None => msg.text.clone(),
+        };
+
+        channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &body);

        let mut s = state.borrow_mut();
        s.config.chat_ids.insert(sender_lower, msg.chat_id);
-        let line = format!("[{}] {}", msg.sender, msg.text);
+        let line = format!("[{}] {}", msg.sender, body);
        s.push_message(line, 2, &channel);
    }

--- a/channels/tmux/src/main.rs
+++ b/channels/tmux/src/main.rs
@ -26,10 +26,12 @@ use consciousness::thalamus::channel_log::ChannelLog;

 #[derive(Clone, serde::Serialize, serde::Deserialize)]
 struct PaneConfig {
-    /// Human-readable label, becomes the channel name "tmux.<label>"
+    /// Human-readable label: becomes the channel name "tmux.<label>",
+    /// and the tmux pane title / window name the live pane id is
+    /// resolved from. The pane id is deliberately not stored — it is
+    /// ephemeral (recycled across pane and tmux-server restarts), so it
+    /// is looked up fresh on every connect attempt.
    label: String,
-    /// Tmux pane ID, e.g. "%5"
-    pane_id: String,
 }

 #[derive(Clone, serde::Serialize, serde::Deserialize)]
@ -86,11 +88,9 @@ impl State {
        }
    }

-    /// Get pane_id for a label
-    fn get_pane(&self, label: &str) -> Option<&str> {
-        self.config.panes.iter()
-            .find(|p| p.label == label)
-            .map(|p| p.pane_id.as_str())
+    /// Whether a pane with this label is registered.
+    fn has_pane(&self, label: &str) -> bool {
+        self.config.panes.iter().any(|p| p.label == label)
    }

    /// Check if a pane is connected
@ -103,98 +103,124 @@ impl State {
        self.connected.insert(label.to_string(), connected);
    }

-    /// Add a pane and persist
-    fn add_pane(&mut self, label: String, pane_id: String) {
+    /// Register a pane and persist.
+    fn add_pane(&mut self, label: String) {
        if !self.config.panes.iter().any(|p| p.label == label) {
-            self.config.panes.push(PaneConfig { label, pane_id });
+            self.config.panes.push(PaneConfig { label });
            save_config(&self.config);
        }
    }

-    /// Remove a pane and persist
-    fn remove_pane(&mut self, label: &str) -> Option<String> {
+    /// Unregister a pane and persist. Returns whether it was registered.
+    fn remove_pane(&mut self, label: &str) -> bool {
        if let Some(idx) = self.config.panes.iter().position(|p| p.label == label) {
-            let pane = self.config.panes.remove(idx);
+            self.config.panes.remove(idx);
            self.connected.remove(label);
            save_config(&self.config);
-            Some(pane.pane_id)
+            true
        } else {
-            None
+            false
        }
    }
 }

 // ── Pipe-Pane Reader ──────────────────────────────────────────

-/// Set up pipe-pane for a single pane, reading output into the channel log.
-async fn pipe_pane_reader(state: SharedState, pane: PaneConfig) {
+/// Wait between connect attempts for a pane that is not yet reachable.
+const RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_secs(2);
+
+/// Keep a pane streamed into its channel log for as long as it stays
+/// registered. The pane id is resolved fresh by label on every connect
+/// attempt — tmux pane ids are ephemeral, so the label (pane title /
+/// window name) is the durable identity. Retries until the pane exists
+/// and pipe-pane succeeds, and reconnects the same way if the pipe
+/// later drops. Returns once close() unregisters the pane.
+async fn pipe_pane_reader(state: SharedState, label: String) {
    let pipe_dir = dirs::home_dir()
        .unwrap_or_default()
        .join(".consciousness/channels/tmux-pipes");
    std::fs::create_dir_all(&pipe_dir).ok();
+    let pipe_path = pipe_dir.join(format!("{}.pipe", label));
+    let channel_key = format!("tmux.{}", label);

-    let pipe_path = pipe_dir.join(format!("{}.pipe", pane.label));
-    let _ = std::fs::remove_file(&pipe_path);
+    loop {
+        if !state.borrow().has_pane(&label) {
+            return;
+        }

-    // Create a named pipe (FIFO)
+        connect_and_stream(&state, &label, &pipe_path, &channel_key).await;
+        state.borrow_mut().set_connected(&label, false);
+
+        if !state.borrow().has_pane(&label) {
+            return;
+        }
+        tokio::time::sleep(RETRY_INTERVAL).await;
+    }
+}
+
+/// One connect attempt: resolve the pane's live id by label, point its
+/// output at the FIFO with pipe-pane, and stream lines into the channel
+/// log. Returns on the first failure, or when the stream ends.
+async fn connect_and_stream(
+    state: &SharedState,
+    label: &str,
+    pipe_path: &std::path::Path,
+    channel_key: &str,
+) {
+    let pane_id = match find_pane_by_name(label) {
+        Some(id) => id,
+        None => return,
+    };
+
+    // Fresh FIFO for this attempt.
+    let _ = std::fs::remove_file(pipe_path);
    unsafe {
        let c_path = std::ffi::CString::new(pipe_path.to_str().unwrap()).unwrap();
        libc::mkfifo(c_path.as_ptr(), 0o644);
    }

-    // Tell tmux to pipe this pane's output to our FIFO
-    let pipe_path_str = pipe_path.to_string_lossy().to_string();
-    let result = std::process::Command::new("tmux")
-        .args(["pipe-pane", "-t", &pane.pane_id, &format!("cat >> {}", pipe_path_str)])
-        .output();
-
-    match result {
-        Ok(output) if output.status.success() => {
-            info!("pipe-pane set up for {} ({})", pane.label, pane.pane_id);
-        }
-        Ok(output) => {
-            error!("pipe-pane failed for {}: {}", pane.label,
-                   String::from_utf8_lossy(&output.stderr));
-            state.borrow_mut().set_connected(&pane.label, false);
+    // Point the pane's output at our FIFO.
+    let pipe_cmd = format!("cat >> {}", pipe_path.to_string_lossy());
+    match std::process::Command::new("tmux")
+        .args(["pipe-pane", "-t", &pane_id, &pipe_cmd])
+        .output()
+    {
+        Ok(o) if o.status.success() => {}
+        Ok(o) => {
+            warn!("pipe-pane failed for {} ({}): {}", label, pane_id,
+                  String::from_utf8_lossy(&o.stderr));
            return;
        }
        Err(e) => {
-            error!("failed to run tmux pipe-pane for {}: {}", pane.label, e);
-            state.borrow_mut().set_connected(&pane.label, false);
+            error!("running tmux pipe-pane for {}: {}", label, e);
            return;
        }
    }

-    // Open the FIFO and read lines
-    let file = match tokio::fs::File::open(&pipe_path).await {
+    let file = match tokio::fs::File::open(pipe_path).await {
        Ok(f) => f,
        Err(e) => {
-            error!("failed to open pipe for {}: {}", pane.label, e);
-            state.borrow_mut().set_connected(&pane.label, false);
+            warn!("opening pipe for {}: {}", label, e);
            return;
        }
    };

-    // Mark as connected once pipe is open
-    state.borrow_mut().set_connected(&pane.label, true);
-
-    let reader = tokio::io::BufReader::new(file);
-    let mut lines = reader.lines();
-    let channel_key = format!("tmux.{}", pane.label);
+    info!("connected channel tmux.{} (pane {})", label, pane_id);
+    state.borrow_mut().set_connected(label, true);

+    let mut lines = tokio::io::BufReader::new(file).lines();
    while let Ok(Some(line)) = lines.next_line().await {
        if line.trim().is_empty() {
            continue;
        }
        let mut s = state.borrow_mut();
-        let log = s.channel_logs
-            .entry(channel_key.clone())
-            .or_insert_with(ChannelLog::new);
-        log.push(line);
+        s.channel_logs
+            .entry(channel_key.to_string())
+            .or_insert_with(ChannelLog::new)
+            .push(line);
    }

-    warn!("pipe-pane reader ended for {}", pane.label);
-    state.borrow_mut().set_connected(&pane.label, false);
+    warn!("pipe-pane stream ended for {}", label);
 }

 // ── ChannelServer Implementation ───────────────────────────────
@ -244,10 +270,10 @@ impl channel_server::Server for ChannelServerImpl {
        let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
        let message = pry!(pry!(params.get_message()).to_str()).to_string();

-        // Send to tmux pane via send-keys
+        // Send to tmux pane via send-keys — resolve the live pane id by
+        // label (it is not stored).
        let label = channel.strip_prefix("tmux.").unwrap_or(&channel);
-        let pane_id = self.state.borrow().get_pane(label).map(String::from);
-        if let Some(pane_id) = pane_id {
+        if let Some(pane_id) = find_pane_by_name(label) {
            let _ = std::process::Command::new("tmux")
                .args(["send-keys", "-t", &pane_id, &message, "Enter"])
                .output();
@ -302,28 +328,22 @@ impl channel_server::Server for ChannelServerImpl {
        let params = pry!(params.get());
        let label = pry!(pry!(params.get_label()).to_str()).to_string();

-        // Check if already open
-        if self.state.borrow().get_pane(&label).is_some() {
+        // Already registered — nothing to do.
+        if self.state.borrow().has_pane(&label) {
            return std::future::ready(Ok(()));
        }

-        // Find the tmux pane by name (window or pane title)
-        let pane_id = match find_pane_by_name(&label) {
-            Some(id) => id,
-            None => return std::future::ready(Err(capnp::Error::failed(
-                format!("no tmux pane named '{}'", label)))),
-        };
+        info!("opening channel tmux.{}", label);

-        info!("opening channel tmux.{} (pane {})", label, pane_id);
+        // Register the label and persist. The pane id is not stored —
+        // the reader resolves it by label on every connect attempt, so
+        // this succeeds even if the pane does not exist yet; the reader
+        // connects once it appears.
+        self.state.borrow_mut().add_pane(label.clone());

-        // Register in state and persist
-        self.state.borrow_mut().add_pane(label.clone(), pane_id.clone());
-
-        // Start pipe-pane reader
-        let pane = PaneConfig { label, pane_id };
        let reader_state = self.state.clone();
        tokio::task::spawn_local(async move {
-            pipe_pane_reader(reader_state, pane).await;
+            pipe_pane_reader(reader_state, label).await;
        });

        std::future::ready(Ok(()))
@ -339,14 +359,18 @@ impl channel_server::Server for ChannelServerImpl {
        let label = channel.strip_prefix("tmux.").unwrap_or(&channel).to_string();

        let mut s = self.state.borrow_mut();
-        if let Some(pane_id) = s.remove_pane(&label) {
+        if s.remove_pane(&label) {
            info!("closing channel tmux.{}", label);
            s.channel_logs.remove(&format!("tmux.{}", label));

-            // Disconnect pipe-pane
-            let _ = std::process::Command::new("tmux")
-                .args(["pipe-pane", "-t", &pane_id])
-                .output();
+            // Stop piping if the pane is still around (if it is gone the
+            // pipe is already dead). The reader then sees the pane
+            // unregistered and exits.
+            if let Some(pane_id) = find_pane_by_name(&label) {
+                let _ = std::process::Command::new("tmux")
+                    .args(["pipe-pane", "-t", &pane_id])
+                    .output();
+            }
        }

        std::future::ready(Ok(()))
@ -397,11 +421,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {

    tokio::task::LocalSet::new()
        .run_until(async move {
-            // Start a pipe-pane reader for each configured pane
+            // Start a pipe-pane reader for each configured pane; each
+            // resolves its live pane id by label and retries until
+            // connected.
            for pane in state.borrow().config.panes.clone() {
                let reader_state = state.clone();
                tokio::task::spawn_local(async move {
-                    pipe_pane_reader(reader_state, pane).await;
+                    pipe_pane_reader(reader_state, pane.label).await;
                });
            }

--- a/flake.lock
+++ b/flake.lock
@ -0,0 +1,27 @@
+{
+  "nodes": {
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1781074563,
+        "narHash": "sha256-md8WlXOlfnIeHeOScMTTHFyf2d6iaTwPl2apR5EQ3P4=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "9ae611a455b90cf061d8f332b977e387bda8e1ca",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "nixpkgs": "nixpkgs"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
--- a/flake.nix
+++ b/flake.nix
@ -0,0 +1,42 @@
+{
+  description = "Development shell for consciousness";
+
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+  };
+
+  outputs = { nixpkgs, ... }:
+    let
+      systems = [
+        "x86_64-linux"
+        "aarch64-linux"
+      ];
+      forAllSystems = nixpkgs.lib.genAttrs systems;
+    in
+    {
+      devShells = forAllSystems (system:
+        let
+          pkgs = import nixpkgs { inherit system; };
+        in
+        {
+          default = pkgs.mkShell {
+            packages = with pkgs; [
+              cargo
+              rustc
+              rustfmt
+              clippy
+              rust-analyzer
+
+              capnproto
+              pkg-config
+
+              jq
+              sqlite
+              python3
+            ];
+
+            RUST_BACKTRACE = "1";
+          };
+        });
+    };
+}
--- a/proto/salience.proto
+++ b/proto/salience.proto
@ -0,0 +1,276 @@
+// salience.proto — stateful generation + per-token concept readout over gRPC.
+//
+// Shape:
+//   - One server-streaming RPC (Generate) for inference. Every other
+//     operation is unary. This is the minimum streaming we need —
+//     tokens arrive one at a time with optional readouts / logprobs —
+//     and keeping everything else unary makes the client dramatically
+//     simpler than a single bidi state machine did.
+//
+//   - Server-side sessions hold the token list and image binaries.
+//     Sessions exist for bandwidth: at 200K tokens we'd otherwise
+//     re-ship ~800KB every turn, which hurts badly over a WAN link.
+//     vLLM's prefix cache holds the KV; the session just gives the
+//     client a handle so it can send deltas.
+//
+//   - The client is the source of truth for prompt content. The server
+//     is the source of truth for image token expansion (how many
+//     IMAGE_PAD tokens an image becomes under this model). The client
+//     never writes vision tokens itself — AppendImage appends the whole
+//     <|vision_start|> + IMAGE_PAD×N + <|vision_end|> block server-side.
+//
+//   - Every mutation carries (offset, truncating): the client's view of
+//     the server's current length, plus whether the client is deliberately
+//     rewriting history. Server validates on each call and rejects drift.
+//     No silent divergence, no migration bugs.
+//
+//   - Errors use gRPC status codes. NOT_FOUND for missing sessions,
+//     FAILED_PRECONDITION for offset drift or image-block splits,
+//     RESOURCE_EXHAUSTED for context overflow, ABORTED for "session busy".
+//
+// Not in v1:
+//   - Authentication beyond a shared bearer token in gRPC metadata.
+//   - Multi-tenant session namespacing.
+//   - Sampling traces beyond top-k logprobs.
+
+syntax = "proto3";
+
+package salience.v1;
+
+// ============================================================
+//  Service
+// ============================================================
+
+service Salience {
+  // Create a fresh session. Client uses session_id on every subsequent
+  // RPC until CloseSession or TTL eviction (default 30 min idle). To
+  // refresh TTL across a long pause, issue a no-op Generate (empty
+  // append_tokens, max_tokens=0, no ranges).
+  rpc OpenSession(OpenSessionRequest) returns (OpenSessionResponse);
+
+  // Release the session's tokens + images. Idempotent.
+  rpc CloseSession(CloseSessionRequest) returns (CloseSessionResponse);
+
+  // Branch a session at a given token position. The new session
+  // inherits tokens [0, at_position) and any images whose vision
+  // block lies fully in that range. Rejected with FAILED_PRECONDITION
+  // if at_position falls inside an image block (client picks a clean
+  // boundary).
+  rpc ForkSession(ForkSessionRequest) returns (ForkSessionResponse);
+
+  // Prefill + optionally decode. Images are attached inline via
+  // `GenerateRequest.images`; the client writes its own pre-expanded
+  // <|vision_start|> + N*<|image_pad|> + <|vision_end|> runs into
+  // `append_tokens` and declares each run's range in `images[i]`.
+  // Server validates run length against the actual vision-encoder
+  // feature count and returns INVALID_ARGUMENT on mismatch. Stream
+  // yields Token events (with optional readouts / logprobs per
+  // position) followed by a terminating Done.
+  rpc Generate(GenerateRequest) returns (stream GenerateEvent);
+
+  // Readout manifest for the currently-loaded model — concept names,
+  // layer indices, tensor dtype. Stateless; fetch once at client
+  // startup and cache.
+  rpc GetReadoutManifest(GetReadoutManifestRequest) returns (ReadoutManifest);
+
+  // Dump the full token stream of a session. Debug-only: used by the
+  // client to verify its local accounting against the server's
+  // session.tokens byte-for-byte when divergence is suspected. Not
+  // cheap — copies the whole sequence across the wire.
+  rpc DumpSession(DumpSessionRequest) returns (DumpSessionResponse);
+}
+
+// ============================================================
+//  Lifecycle
+// ============================================================
+
+message OpenSessionRequest {
+  // Model identifier, must match vLLM's served model. The server
+  // only has one model loaded; this is a safety check on what the
+  // client thinks it's talking to.
+  string model = 1;
+}
+
+message OpenSessionResponse {
+  string session_id = 1;
+  uint32 max_model_len = 2;
+}
+
+message CloseSessionRequest {
+  string session_id = 1;
+}
+
+message CloseSessionResponse {}
+
+message ForkSessionRequest {
+  string session_id = 1;    // source session
+  uint32 at_position = 2;   // new session inherits tokens [0, at_position)
+}
+
+message ForkSessionResponse {
+  string session_id = 1;    // new session
+}
+
+// ============================================================
+//  Inference
+// ============================================================
+
+// One image attached to a Generate call. The client is responsible
+// for writing the expanded placeholder run (VISION_START +
+// N*IMAGE_PAD + VISION_END) into `GenerateRequest.append_tokens` at
+// positions [pad_range_start, pad_range_end) and pairing it with
+// the corresponding `ImageAttachment` entry. Server validates that
+// the declared range's pad count matches what the vision encoder
+// produces, and returns INVALID_ARGUMENT if they disagree.
+message ImageAttachment {
+  // Image bytes (PNG / JPEG / WebP / …).
+  bytes  bytes = 1;
+
+  // MIME type, e.g. "image/png".
+  string mime = 2;
+
+  // Absolute token positions (in `session.tokens` AFTER `append_tokens`
+  // is applied) spanning the full vision block —  `[vision_start,
+  // pad*N, vision_end]`. end is exclusive, so end - start == N + 2.
+  uint32 pad_range_start = 3;
+  uint32 pad_range_end = 4;
+}
+
+message GenerateRequest {
+  string session_id = 1;
+
+  // Tokens to append before prefill. May be empty. Client writes the
+  // full vision block (VISION_START + N*IMAGE_PAD + VISION_END) for
+  // any newly-attached image directly into this stream; each such
+  // block must be paired with a matching entry in `images`. The
+  // server validates that the declared ranges all point at IMAGE_PAD
+  // runs and that each run's length matches what the vision encoder
+  // produces for the corresponding image.
+  repeated uint32 append_tokens = 2;
+
+  // Client's view of session.tokens length at the time of the call.
+  // Must equal server's actual length, OR be strictly less when
+  // truncating=true (server rewinds before appending). Any other
+  // mismatch is FAILED_PRECONDITION.
+  uint32 offset = 3;
+  bool   truncating = 4;
+
+  // Decode budget. 0 = prefill only (no decode, emit Token events
+  // for positions covered by logprobs_ranges / readout_ranges, then
+  // Done; replaces the old /score endpoint). >0 = decode up to this
+  // many tokens, stopping early on EOS / stop_token_ids.
+  uint32 max_tokens = 5;
+
+  // Position ranges (absolute, within the session's post-append
+  // token list) at which to emit logprobs on Token events. Empty =
+  // no logprobs. `logprob_top_k > 0` returns the top-k alternative
+  // tokens at each covered position; `logprob_top_k == 0` returns
+  // only the sampled-token's logprob.
+  repeated PositionRange logprobs_ranges = 6;
+  uint32                 logprob_top_k = 7;
+
+  // Position ranges at which to emit concept-readout vectors. Empty
+  // = no readouts. Logical shape per position is
+  // [n_layers][n_concepts] — see GetReadoutManifest.
+  repeated PositionRange readout_ranges = 8;
+
+  // Sampling parameters. Meaningful only when max_tokens > 0.
+  float           temperature = 9;      // default 1.0 when zero
+  float           top_p = 10;           // default 1.0 when zero
+  uint32          top_k = 11;           // default 0 (disabled)
+  repeated uint32 stop_token_ids = 12;
+
+  // vLLM scheduler priority (0 = interactive, 10 = batch).
+  int32 priority = 13;
+
+  // Images newly attached on this call. Each entry describes one
+  // image's binary bytes, its mime type, and the exact token-position
+  // range of its pre-expanded placeholder run inside `session.tokens`
+  // after `append_tokens` is applied. See `ImageAttachment`.
+  repeated ImageAttachment images = 14;
+}
+
+message PositionRange {
+  uint32 start = 1;   // inclusive
+  uint32 end = 2;     // exclusive
+}
+
+message GenerateEvent {
+  oneof event {
+    Token        token = 1;
+    GenerateDone done = 2;
+  }
+}
+
+message Token {
+  // Token id at this position. For prefill this is the prompt token;
+  // for decode it's the sampled token.
+  uint32 id = 1;
+
+  // Absolute position in the session's token list.
+  uint32 position = 2;
+
+  // True for prefill positions, false for decode.
+  bool   is_prefill = 3;
+
+  // Concept readout at this position. Empty if the position wasn't
+  // covered by readout_ranges.
+  repeated float readout = 4 [packed = true];
+
+  // Top-k alternative tokens' logprobs at this position — populated
+  // when the position is covered by logprobs_ranges and
+  // logprob_top_k > 0.
+  repeated TokenLogprob logprobs = 5;
+
+  // Logprob of the token at `position` (the prompt token for
+  // prefill, the sampled token for decode). Populated when the
+  // position is covered by logprobs_ranges.
+  float sampled_logprob = 6;
+  bool  has_sampled_logprob = 7;
+}
+
+message TokenLogprob {
+  uint32 id = 1;
+  float  logprob = 2;
+}
+
+message GenerateDone {
+  uint32 prompt_tokens = 1;
+  uint32 completion_tokens = 2;
+  uint32 total_tokens = 3;
+
+  enum FinishReason {
+    FINISH_REASON_UNSPECIFIED = 0;
+    FINISH_REASON_EOS = 1;              // emitted EOS / stop token
+    FINISH_REASON_LENGTH = 2;           // hit max_tokens
+    FINISH_REASON_CANCELLED = 3;        // client cancelled
+    FINISH_REASON_STOP_STRING = 4;      // matched a stop string
+  }
+  FinishReason finish_reason = 4;
+}
+
+// ============================================================
+//  Readout manifest
+// ============================================================
+
+message GetReadoutManifestRequest {}
+
+message ReadoutManifest {
+  repeated string concepts = 1;
+  repeated uint32 layers = 2;
+  uint32          hidden_size = 3;
+  string          dtype = 4;
+}
+
+// ============================================================
+//  Debug
+// ============================================================
+
+message DumpSessionRequest {
+  string session_id = 1;
+}
+
+message DumpSessionResponse {
+  // The full session.tokens sequence, verbatim.
+  repeated uint32 tokens = 1 [packed = true];
+}
--- a/scripts/quantize_qwen3_6_mm.py
+++ b/scripts/quantize_qwen3_6_mm.py
@ -0,0 +1,327 @@
+"""Quantize Qwen3.6-27B (multimodal) to FP8 for vLLM serving.
+
+Why this exists
+---------------
+The earlier `quantize_qwen3_6.py` (in shell history, never committed)
+loaded the model with `AutoModelForCausalLM`, which silently strips
+the multimodal arch. Result: an FP8 checkpoint with no vision tower
+weights at all. vLLM happily instantiated the vision tower from the
+config and ran it with default/uninitialized weights, producing
+gibberish image features and `!!!!!!`-style output. We chased that
+through the protocol layer for a long time before tracing it back
+to the quant. This script avoids that trap by loading via the
+config-declared class explicitly.
+
+Recipe
+------
+FP8_DYNAMIC (per-channel weight scales, per-token dynamic activation
+scales, both E4M3) for Linear weights, with an `ignore` list derived
+from Unsloth's UD-Q8_K_XL (`unsloth/Qwen3.6-27B-GGUF`). Their
+sensitivity sweep flagged specific layers as quantization-fragile;
+we honor those layer indices even though their algorithm is
+GGUF-native Q8_K and ours is FP8 — sensitivity is a layer property,
+not an algorithm property.
+
+vLLM fusion constraint
+~~~~~~~~~~~~~~~~~~~~~~
+vLLM's Qwen3.5/3.6 model code fuses sub-modules at load time:
+  qkv_proj      ← q_proj, k_proj, v_proj
+  gate_up_proj  ← gate_proj, up_proj
+  in_proj_qkvz  ← in_proj_qkv, in_proj_z
+  in_proj_ba    ← in_proj_b, in_proj_a
+compressed_tensors rejects checkpoints where sub-modules of a fused
+layer have different quantization schemes. Our ignore list is shaped
+around this — within any fused layer, all components share a scheme.
+That's the reason `in_proj_qkv` is ignored even though Unsloth's
+sweep doesn't single it out, and the reason late-stack attn override
+covers q/k/v rather than just q/k.
+
+MTP merge
+---------
+`Qwen3_5ForConditionalGeneration` doesn't expose the MTP submodule,
+so `oneshot()` produces a checkpoint with the 15 `mtp.*` tensors
+silently dropped. After quantization we read the MTP weights back
+out of the upstream cached snapshot and splice them into the saved
+safetensors at BF16. They're small (~850 MB) so quantizing them
+isn't worth the calibration risk; speculative-decoding code paths
+in vLLM expect the MTP head present.
+
+Output
+------
+`OUTPUT_DIR` gets the FP8 model.safetensors + config + processor +
+recipe.yaml. Vision tower stays BF16 (in `ignore`); LM Linears go
+to FP8; norms, SSM internals (not Linear), and MTP tensors stay
+BF16 untouched.
+
+Verification at end: re-opens the saved safetensors and asserts
+- vision .weight tensors present (>= 150; full count is 167)
+- lm_head + embed_tokens at fp16/bf16 (NOT FP8)
+- a sampled FP8'd Linear actually has float8 dtype
+- 15 mtp.* tensors present
+
+Run
+---
+    ~/vllm-venv/bin/python quantize_qwen3_6_mm.py
+"""
+from __future__ import annotations
+
+import glob
+import json
+import sys
+from pathlib import Path
+
+import torch
+from huggingface_hub import snapshot_download
+from llmcompressor import oneshot
+from llmcompressor.modifiers.quantization import QuantizationModifier
+from safetensors import safe_open
+from safetensors.torch import save_file
+from transformers import AutoProcessor
+from transformers.models.qwen3_5.modeling_qwen3_5 import (
+    Qwen3_5ForConditionalGeneration,
+)
+
+
+MODEL = "Qwen/Qwen3.6-27B"
+OUTPUT_DIR = "/home/ubuntu/amygdala-training/Qwen3.6-27B-FP8-mm"
+
+
+# Layers Unsloth's UD-Q8_K_XL keeps at F16 (perplexity-sensitive
+# in their sweep). Late-stack clustering is consistent with the
+# general finding that errors near the output propagate directly
+# to logits.
+LATE_FFN_LAYERS = (50, 51, 59, 62, 63)
+LATE_ATTN_LAYERS = (51, 59, 63)
+
+
+# Build the ignore regex list. Note: llmcompressor matches these
+# patterns against MODULE names (no `.weight` suffix) when walking
+# `named_modules()` for `targets=["Linear"]`. The first pass of
+# this script used `\.weight$` patterns and silently quantized
+# lm_head + every linear_attn projection — verified post-hoc by
+# inspecting the saved safetensors. Patterns now anchor on `$`
+# at the module name.
+IGNORE_PATTERNS: list[str] = [
+    # Original recipe: lm_head and embeddings always full-precision.
+    # (embed_tokens is an Embedding, not a Linear, so it's already
+    # ignored by `targets=["Linear"]`. Pattern kept as belt-and-
+    # suspenders in case future llmcompressor versions widen the
+    # target set.)
+    "re:lm_head$",
+    "re:.*embed_tokens$",
+
+    # Vision tower — entire `model.visual.*` subtree (vision
+    # transformer blocks + merger + patch_embed + pos_embed).
+    # Unsloth ships the vision tower as a separate `mmproj-BF16.gguf`
+    # for GGUF consumers; in our single-file FP8 setup we just leave
+    # them at BF16.
+    "re:model\\.visual\\..*",
+
+    # MTP (multi-token prediction) module — Unsloth's GGUF doesn't
+    # carry MTP weights so we have no precision signal from them;
+    # safest to keep BF16.
+    "re:mtp\\..*",
+
+    # Linear-attention block — keep ENTIRELY at BF16. vLLM fuses
+    # `in_proj_qkv` and `in_proj_z` into a single `in_proj_qkvz`
+    # layer, and compressed_tensors rejects mixed schemes within a
+    # fused layer. Unsloth's recipe keeps z, a, b, out at F16/F32
+    # (gate/SSM internals are quantization-fragile in the GatedDeltaNet
+    # update), so the principled choice is to also keep `in_proj_qkv`
+    # at BF16 rather than FP8'ing the gate to match. We give up ~1 GB
+    # of FP8 coverage; in exchange we follow Unsloth's quality intent
+    # and load cleanly under vLLM. (`in_proj_a` + `in_proj_b` are
+    # likewise fused as `in_proj_ba` — both ignored, consistent.)
+    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_qkv$",
+    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_z$",
+    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_a$",
+    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_b$",
+    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.out_proj$",
+
+    # Per-layer high-precision MLP (Unsloth flagged exactly these
+    # late-stack indices in their UD-Q8_K_XL sensitivity sweep, all
+    # three of {gate, up, down} per layer). vLLM fuses gate+up into
+    # `gate_up_proj`; ignoring both keeps the fused layer consistent.
+    # `down_proj` is its own (non-fused) layer.
+    "re:model\\.language_model\\.layers\\.("
+    + "|".join(str(n) for n in LATE_FFN_LAYERS)
+    + ")\\.mlp\\.(down|gate|up)_proj$",
+
+    # Per-layer high-precision attention q/k/v (Unsloth's sweep upgrades
+    # only q and k; we extend to v because vLLM fuses q/k/v into
+    # `qkv_proj` and rejects mixed schemes. `o_proj` is its own
+    # non-fused layer and stays at FP8.
+    "re:model\\.language_model\\.layers\\.("
+    + "|".join(str(n) for n in LATE_ATTN_LAYERS)
+    + ")\\.self_attn\\.(q|k|v)_proj$",
+]
+
+
+def main() -> None:
+    print(f"Loading {MODEL} as multimodal "
+          f"(Qwen3_5ForConditionalGeneration)...", flush=True)
+    model = Qwen3_5ForConditionalGeneration.from_pretrained(
+        MODEL,
+        dtype=torch.bfloat16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    print(f"  loaded: {model.__class__.__name__}", flush=True)
+
+    print(f"Loading processor (text + image preprocessing)...", flush=True)
+    processor = AutoProcessor.from_pretrained(MODEL, trust_remote_code=True)
+
+    print("Running FP8_DYNAMIC oneshot quantization...", flush=True)
+    print(f"  ignore list: {len(IGNORE_PATTERNS)} patterns",
+          flush=True)
+    recipe = QuantizationModifier(
+        targets=["Linear"],
+        scheme="FP8_DYNAMIC",
+        ignore=IGNORE_PATTERNS,
+    )
+    oneshot(model=model, recipe=recipe, output_dir=OUTPUT_DIR)
+    processor.save_pretrained(OUTPUT_DIR)
+    print(f"  wrote model + processor to {OUTPUT_DIR}", flush=True)
+
+    merge_mtp(OUTPUT_DIR)
+    verify_output(OUTPUT_DIR)
+
+
+def merge_mtp(out_dir: str) -> None:
+    """Splice upstream MTP tensors into the saved FP8 safetensors.
+
+    `Qwen3_5ForConditionalGeneration` skips the MTP submodule on load,
+    so oneshot's output is missing the 15 `mtp.*` tensors. We resolve
+    the upstream snapshot via the HF cache (already populated by
+    from_pretrained), pull just the MTP tensors out at BF16, and
+    rewrite the safetensors with them merged in. The compressed_tensors
+    metadata header (which carries the FP8 format identifier vLLM
+    needs to dequantize) is preserved verbatim.
+
+    Atomic-rename is used so a crash mid-write doesn't corrupt the
+    33+ GB checkpoint we just spent minutes producing.
+    """
+    print("\nMerging upstream MTP tensors...", flush=True)
+    upstream_dir = Path(snapshot_download(
+        MODEL,
+        allow_patterns=["model.safetensors.index.json",
+                        "model-*-of-*.safetensors"],
+    ))
+
+    with open(upstream_dir / "model.safetensors.index.json") as f:
+        idx = json.load(f)
+    mtp_shards = sorted({v for k, v in idx["weight_map"].items()
+                         if k.startswith("mtp.")})
+    print(f"  MTP tensors live in shards: {mtp_shards}", flush=True)
+
+    mtp_tensors: dict[str, torch.Tensor] = {}
+    for shard in mtp_shards:
+        with safe_open(upstream_dir / shard, framework="pt") as f:
+            for k in f.keys():
+                if k.startswith("mtp."):
+                    mtp_tensors[k] = f.get_tensor(k).contiguous()
+    mtp_bytes = sum(t.numel() * t.element_size()
+                    for t in mtp_tensors.values())
+    print(f"  loaded {len(mtp_tensors)} mtp tensors "
+          f"({mtp_bytes/1e6:.1f} MB)", flush=True)
+
+    fp8_files = sorted(Path(out_dir).glob("*.safetensors"))
+    if len(fp8_files) != 1:
+        sys.exit(f"FAIL: expected single safetensors shard, "
+                 f"got {fp8_files}")
+    existing_path = fp8_files[0]
+
+    with safe_open(existing_path, framework="pt") as f:
+        metadata = f.metadata() or {}
+        all_tensors = {k: f.get_tensor(k) for k in f.keys()}
+
+    overlap = set(all_tensors) & set(mtp_tensors)
+    if overlap:
+        sys.exit(f"FAIL: MTP key collision with FP8 output: "
+                 f"{sorted(overlap)[:5]}")
+    all_tensors.update(mtp_tensors)
+
+    tmp_path = existing_path.with_name(existing_path.name + ".new")
+    print(f"  rewriting {existing_path.name} "
+          f"({len(all_tensors)} tensors)...", flush=True)
+    save_file(all_tensors, str(tmp_path), metadata=metadata)
+    tmp_path.replace(existing_path)
+    print("  done", flush=True)
+
+
+def verify_output(out_dir: str) -> None:
+    """Open the saved safetensors and assert the recipe actually
+    landed: vision tower present at BF16, FP8 dtype on at least one
+    quantized Linear, lm_head not FP8."""
+    print(f"\nVerifying {out_dir}...", flush=True)
+
+    files = sorted(glob.glob(f"{out_dir}/*.safetensors"))
+    if not files:
+        sys.exit(f"FAIL: no safetensors in {out_dir}")
+
+    vision_keys: list[tuple[str, str]] = []
+    fp8_sample: tuple[str, str] | None = None
+    lm_head_dtype: str | None = None
+    mtp_keys: list[str] = []
+
+    for fp in files:
+        with safe_open(fp, framework="pt") as f:
+            for k in f.keys():
+                if k.startswith("mtp."):
+                    mtp_keys.append(k)
+                # Some FP8 quants write a sibling `_scale` / `_zero_point`;
+                # we just care about the .weight tensors.
+                if not k.endswith(".weight"):
+                    continue
+                t = f.get_tensor(k)
+                dtype = str(t.dtype).replace("torch.", "")
+                if "model.visual." in k:
+                    vision_keys.append((k, dtype))
+                if k == "lm_head.weight":
+                    lm_head_dtype = dtype
+                if (fp8_sample is None
+                        and "float8" in dtype
+                        and "language_model.layers" in k):
+                    fp8_sample = (k, dtype)
+
+    # Qwen3.6-27B has 167 vision `.weight` tensors (333 vision tensors
+    # total, the rest are `.bias` and per-block norms). 150 is a
+    # sanity floor that catches "vision tower didn't make it through"
+    # without being brittle to minor arch revisions.
+    if len(vision_keys) < 150:
+        sys.exit(f"FAIL: only {len(vision_keys)} vision tensors found "
+                 f"(expected >= 150). Vision tower didn't make it "
+                 f"through the quant.")
+
+    bad_vision = [(k, d) for k, d in vision_keys if "float8" in d]
+    if bad_vision:
+        sys.exit(f"FAIL: vision weights got quantized to FP8: "
+                 f"{bad_vision[:3]}...")
+
+    if lm_head_dtype is None:
+        sys.exit("FAIL: lm_head.weight not found in output.")
+    if "float8" in lm_head_dtype:
+        sys.exit(f"FAIL: lm_head.weight is FP8 ({lm_head_dtype}); "
+                 f"should be BF16/FP16.")
+
+    if fp8_sample is None:
+        sys.exit("FAIL: no FP8 weights found in language_model.layers — "
+                 "the recipe didn't quantize anything.")
+
+    # Upstream Qwen3.6-27B has exactly 15 mtp.* tensors (1 fused
+    # transformer block + projection + norms). merge_mtp() should
+    # have spliced all of them in.
+    if len(mtp_keys) != 15:
+        sys.exit(f"FAIL: expected 15 mtp.* tensors, found "
+                 f"{len(mtp_keys)}. merge_mtp() missed some.")
+
+    print(f"  ✓ {len(vision_keys)} vision tensors at "
+          f"{vision_keys[0][1]} (not FP8)")
+    print(f"  ✓ lm_head.weight at {lm_head_dtype} (not FP8)")
+    print(f"  ✓ FP8 sample: {fp8_sample[0]} = {fp8_sample[1]}")
+    print(f"  ✓ {len(mtp_keys)} mtp.* tensors present")
+    print("DONE")
+
+
+if __name__ == "__main__":
+    main()
--- a/src/agent/api/http.rs
+++ b/src/agent/api/http.rs
@ -100,7 +100,7 @@ impl HttpClient {
                .map_err(|e| anyhow::anyhow!("invalid server name: {e}"))?;
            let connector = tokio_rustls::TlsConnector::from(self.tls.clone());
            let tls = connector.connect(server_name.to_owned(), tcp).await
-                .context("TLS handshake")?;
+                .map_err(|e| anyhow::anyhow!("TLS handshake to {host}: {e}"))?;
            TokioIo::new(Box::new(tls) as Box<dyn IoStream>)
        } else {
            TokioIo::new(Box::new(tcp) as Box<dyn IoStream>)
@ -154,6 +154,14 @@ impl HttpResponse {
        Ok(String::from_utf8_lossy(&bytes).into_owned())
    }

+    /// Read the entire body as raw bytes (for binary downloads).
+    pub async fn bytes(self) -> Result<Bytes> {
+        let bytes = self.body.collect().await
+            .context("reading response body")?
+            .to_bytes();
+        Ok(bytes)
+    }
+
    /// Read the entire body and deserialize as JSON.
    pub async fn json<T: serde::de::DeserializeOwned>(self) -> Result<T> {
        let bytes = self.body.collect().await
@ -190,6 +198,7 @@ impl HttpClientBuilder {
    }

    pub fn build(self) -> HttpClient {
+        install_rustls_crypto_provider();
        let certs = rustls_native_certs::load_native_certs()
            .certs.into_iter()
            .collect::<Vec<_>>();
@ -197,6 +206,13 @@ impl HttpClientBuilder {
        for cert in certs {
            root_store.add(cert).ok();
        }
+        // Also trust any `.pem` files under `~/.consciousness/certs/` —
+        // self-signed server certs for our own vllm hosts live there.
+        // Drop a new `<host>.pem` in the dir to trust a new server; no
+        // code change needed.
+        for cert in load_user_certs() {
+            root_store.add(cert).ok();
+        }
        let tls = Arc::new(
            ClientConfig::builder()
                .with_root_certificates(root_store)
@ -210,6 +226,65 @@ impl HttpClientBuilder {
    }
 }

+/// Install rustls' default crypto provider exactly once per process.
+/// rustls 0.23 doesn't pick one automatically when multiple features
+/// could provide it (e.g. when tonic pulls in both ring and aws-lc-rs
+/// via transitive deps). Idempotent via OnceLock; safe to call from
+/// multiple callers.
+fn install_rustls_crypto_provider() {
+    static ONCE: std::sync::OnceLock<()> = std::sync::OnceLock::new();
+    ONCE.get_or_init(|| {
+        let _ = rustls::crypto::ring::default_provider().install_default();
+    });
+}
+
+/// Load every `.pem` file under `~/.consciousness/certs/` as a DER
+/// certificate and return them. Silent on missing dir, missing files,
+/// or parse errors — those are "no extra certs trusted" rather than
+/// hard failures, to keep startup robust.
+/// Load the concatenated PEM bytes of every `.pem` file under
+/// `~/.consciousness/certs/` — suitable for passing to a tonic
+/// `ClientTlsConfig::ca_certificate(Certificate::from_pem(...))` call
+/// so gRPC connections trust the same self-signed servers the HTTP
+/// path does.
+pub(crate) fn load_user_certs_pem_bytes() -> Vec<u8> {
+    let mut out = Vec::new();
+    let Some(home) = dirs::home_dir() else { return out };
+    let dir = home.join(".consciousness").join("certs");
+    let Ok(entries) = std::fs::read_dir(&dir) else { return out };
+    for entry in entries.flatten() {
+        let path = entry.path();
+        if path.extension().and_then(|e| e.to_str()) != Some("pem") {
+            continue;
+        }
+        if let Ok(bytes) = std::fs::read(&path) {
+            out.extend_from_slice(&bytes);
+            if !bytes.ends_with(b"\n") {
+                out.push(b'\n');
+            }
+        }
+    }
+    out
+}
+
+fn load_user_certs() -> Vec<rustls::pki_types::CertificateDer<'static>> {
+    let mut out = Vec::new();
+    let Some(home) = dirs::home_dir() else { return out };
+    let dir = home.join(".consciousness").join("certs");
+    let Ok(entries) = std::fs::read_dir(&dir) else { return out };
+    for entry in entries.flatten() {
+        let path = entry.path();
+        if path.extension().and_then(|e| e.to_str()) != Some("pem") {
+            continue;
+        }
+        let Ok(bytes) = std::fs::read(&path) else { continue };
+        for cert in rustls_pemfile::certs(&mut bytes.as_slice()).flatten() {
+            out.push(cert);
+        }
+    }
+    out
+}
+
 /// Trait alias for streams that work with hyper's IO adapter.
 trait IoStream: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static {}
 impl<T: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static> IoStream for T {}
--- a/src/agent/api/mod.rs
+++ b/src/agent/api/mod.rs
@ -7,13 +7,14 @@
 // Set POC_DEBUG=1 for verbose per-turn logging.

 pub mod http;
+pub mod salience;

-use std::time::{Duration, Instant};
+use std::time::Duration;
 use anyhow::Result;
 use tokio::sync::mpsc;
 use serde::Deserialize;

-use http::{HttpClient, HttpResponse};
+use http::HttpClient;

 #[derive(Debug, Clone, Deserialize)]
 pub struct Usage {
@ -37,6 +38,21 @@ pub struct ReadoutManifest {
 /// from pairing with the manifest fetched at startup.
 pub type TokenReadout = Vec<Vec<f32>>;

+/// Client-side sampling state. Mirrors the wire-level fields in
+/// `GenerateRequest` (proto flattened its `SamplingParams` submessage
+/// in so the server handler reads them directly), but stays as a
+/// grouped struct on the client because UI / config / tests pass
+/// these around together.
+#[derive(Clone, Copy)]
+pub struct SamplingParams {
+    pub temperature: f32,
+    pub top_p: f32,
+    pub top_k: u32,
+    /// Decode budget. 0 = prefill only; >0 = decode up to this many
+    /// tokens, stopping early on EOS / stop_token_ids.
+    pub max_tokens: u32,
+}
+
 /// A JoinHandle that aborts its task when dropped.
 pub(crate) struct AbortOnDrop(tokio::task::JoinHandle<()>);

@ -46,13 +62,6 @@ impl Drop for AbortOnDrop {
    }
 }

-/// Sampling parameters for model generation.
-#[derive(Clone, Copy)]
-pub(crate) struct SamplingParams {
-    pub temperature: f32,
-    pub top_p: f32,
-    pub top_k: u32,
-}

 // ─────────────────────────────────────────────────────────────
 //  Stream events — yielded by backends, consumed by the runner
@ -74,6 +83,17 @@ pub struct ApiClient {
    api_key: String,
    pub model: String,
    base_url: String,
+    /// Cached readout manifest — fetched once per process and shared
+    /// across ApiClient clones (every Agent/fork gets the same cell).
+    /// `None` after fetch means the server has readout disabled (404).
+    manifest: std::sync::Arc<tokio::sync::OnceCell<Option<ReadoutManifest>>>,
+    /// Shared tonic Channel to the salience gRPC endpoint. Opened on
+    /// first use and reused across every SessionHandle / RPC call
+    /// derived from this ApiClient. tonic multiplexes concurrent
+    /// requests over the HTTP/2 connection automatically.
+    salience_channel: std::sync::Arc<
+        tokio::sync::OnceCell<tonic::transport::Channel>
+    >,
 }

 impl ApiClient {
@ -88,33 +108,69 @@ impl ApiClient {
            api_key: api_key.to_string(),
            model: model.to_string(),
            base_url: base_url.trim_end_matches('/').to_string(),
+            manifest: std::sync::Arc::new(tokio::sync::OnceCell::new()),
+            salience_channel: std::sync::Arc::new(tokio::sync::OnceCell::new()),
        }
    }

-    pub(crate) fn stream_completion_mm(
+    /// Return a `SalienceClient` on the shared gRPC channel — opens
+    /// the channel on first call and reuses it thereafter across
+    /// every ApiClient clone. All scoring / inference / session
+    /// RPCs flow through this single multiplexed HTTP/2 connection.
+    ///
+    /// Bumps tonic's default 4 MiB encode/decode caps to 64 MiB on
+    /// every client. Multimodal Generate requests carry pre-encoded
+    /// image bytes inline (Qwen3.6's 768×768 patches at high res
+    /// land around 5–8 MiB per turn), and Done events with full
+    /// per-token readout vectors can also exceed 4 MiB on long runs.
+    pub async fn salience_client(&self) -> Result<
+        salience::pb::salience_client::SalienceClient<tonic::transport::Channel>
+    > {
+        let ch = self.salience_channel.get_or_try_init(|| async {
+            let grpc_url = salience::derive_grpc_url(&self.base_url);
+            log::debug!(target: "grpc",
+                "opening shared salience channel: http_base={} -> grpc_url={}",
+                self.base_url, grpc_url);
+            salience::connect_channel(&grpc_url).await
+        }).await?;
+        const MAX_GRPC_MESSAGE_BYTES: usize = 64 * 1024 * 1024;
+        Ok(salience::pb::salience_client::SalienceClient::new(ch.clone())
+            .max_decoding_message_size(MAX_GRPC_MESSAGE_BYTES)
+            .max_encoding_message_size(MAX_GRPC_MESSAGE_BYTES))
+    }
+
+    /// Stream generation via a gRPC session. Walks the prompt chunks
+    /// comparing against the session's `committed_len`, sends the
+    /// delta as interleaved `AppendImage` + intermediate
+    /// `Generate(max_tokens=0)` (for text runs separating images) +
+    /// a final `Generate(max_tokens=sampling.max_tokens, ...)` whose
+    /// Token events stream back through the channel.
+    ///
+    /// On any gRPC error the session is dropped; the next call
+    /// reopens fresh. Happy-path ordering: Token* Done. Error paths
+    /// emit `StreamToken::Error` and close.
+    pub(crate) fn stream_session_mm(
        &self,
-        prompt_tokens: &[u32],
-        images: &[super::context::WireImage],
+        session_lock: std::sync::Arc<crate::Mutex<Option<salience::SessionHandle>>>,
+        chunks: Vec<super::context::WireChunk>,
+        images: Vec<super::context::WireImage>,
+        match_upto: u32,
        sampling: SamplingParams,
        priority: Option<i32>,
+        readout_shape: Option<(u32, u32)>,
    ) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
        let (tx, rx) = mpsc::unbounded_channel();
-        let client = self.client.clone();
-        let api_key = self.api_key.clone();
-        let model = self.model.clone();
-        let prompt_tokens = prompt_tokens.to_vec();
-        let images: Vec<(Vec<u8>, String)> = images.iter()
-            .map(|i| (i.bytes.clone(), i.mime.clone()))
-            .collect();
-        let base_url = self.base_url.clone();
+        let client = self.clone();

        let handle = tokio::spawn(async move {
-            let result = stream_completions(
-                &client, &base_url, &api_key, &model,
-                &prompt_tokens, &images, &tx, sampling, priority,
+            let result = run_session_generate(
+                session_lock, &client, chunks, images, match_upto, sampling,
+                priority, readout_shape, &tx,
            ).await;
            if let Err(e) = result {
-                let _ = tx.send(StreamToken::Error(e.to_string()));
+                log::warn!(target: "grpc",
+                    "stream_session_mm error, forwarding to UI: {:#}", e);
+                let _ = tx.send(StreamToken::Error(format!("{:#}", e)));
            }
        });

@ -128,386 +184,243 @@ impl ApiClient {
    /// readout is enabled on the server, `Ok(None)` on 404 (disabled),
    /// or an error on any other failure.
    ///
-    /// Call once at startup and cache the result; the manifest doesn't
-    /// change during a server run.
+    /// First call performs the HTTP fetch; subsequent calls (including
+    /// across ApiClient clones sharing the same cell) return the
+    /// cached result. The manifest doesn't change during a server run.
+    pub fn model_str(&self) -> &str { &self.model }
+
    pub async fn fetch_readout_manifest(&self) -> Result<Option<ReadoutManifest>> {
-        let url = format!("{}/readout/manifest", self.base_url);
-        let auth = format!("Bearer {}", self.api_key);
-        let response = self
-            .client
-            .get_with_headers(&url, &[("Authorization", &auth)])
-            .await
-            .map_err(|e| anyhow::anyhow!("readout manifest fetch ({}): {}", url, e))?;
-        let status = response.status();
-        if status.as_u16() == 404 {
-            return Ok(None);
-        }
-        if !status.is_success() {
-            let body = response.text().await.unwrap_or_default();
-            let n = body.floor_char_boundary(body.len().min(500));
-            anyhow::bail!("readout manifest HTTP {} ({}): {}", status, url, &body[..n]);
-        }
-        Ok(Some(response.json().await?))
+        let manifest = self.manifest.get_or_try_init(|| async {
+            let url = format!("{}/readout/manifest", self.base_url);
+            let auth = format!("Bearer {}", self.api_key);
+            let response = self
+                .client
+                .get_with_headers(&url, &[("Authorization", &auth)])
+                .await
+                .map_err(|e| anyhow::anyhow!("readout manifest fetch ({}): {}", url, e))?;
+            let status = response.status();
+            if status.as_u16() == 404 {
+                return Ok::<_, anyhow::Error>(None);
+            }
+            if !status.is_success() {
+                let body = response.text().await.unwrap_or_default();
+                let n = body.floor_char_boundary(body.len().min(500));
+                anyhow::bail!("readout manifest HTTP {} ({}): {}", status, url, &body[..n]);
+            }
+            Ok(Some(response.json().await?))
+        }).await?;
+        Ok(manifest.clone())
    }

 }

-async fn stream_completions(
-    client: &HttpClient,
-    base_url: &str,
-    api_key: &str,
-    model: &str,
-    prompt_tokens: &[u32],
-    images: &[(Vec<u8>, String)],
-    tx: &mpsc::UnboundedSender<StreamToken>,
+/// Body of the gRPC-path streaming task. Walks the wire chunks
+/// against the session's `committed_len`, sends the delta via
+/// AppendImage / intermediate prefill-only Generates / final decode
+/// Generate, and translates the final Generate's Token events into
+/// StreamTokens on `tx`. On success the session handle is returned
+/// to `session_lock` with an updated `committed_len`; on error the
+/// handle is dropped so the next call reopens.
+async fn run_session_generate(
+    session_lock: std::sync::Arc<crate::Mutex<Option<salience::SessionHandle>>>,
+    client: &ApiClient,
+    chunks: Vec<super::context::WireChunk>,
+    images: Vec<super::context::WireImage>,
+    match_upto: u32,
    sampling: SamplingParams,
    priority: Option<i32>,
-) -> anyhow::Result<()> {
-    let mut request = serde_json::json!({
-        "model": model,
-        "prompt": prompt_tokens,
-        "max_tokens": 16384,
-        "temperature": sampling.temperature,
-        "top_p": sampling.top_p,
-        "top_k": sampling.top_k,
-        "stream": true,
-        "return_token_ids": true,
-        "skip_special_tokens": false,
-        "stop_token_ids": [super::tokenizer::IM_END],
-    });
-    if !images.is_empty() {
-        use base64::Engine;
-        let b64 = base64::engine::general_purpose::STANDARD;
-        let uris: Vec<String> = images.iter()
-            .map(|(bytes, mime)| format!("data:{};base64,{}", mime, b64.encode(bytes)))
-            .collect();
-        request["multi_modal_data"] = serde_json::json!({ "image": uris });
-    }
-    if let Some(p) = priority {
-        request["priority"] = serde_json::json!(p);
-    }
+    readout_shape: Option<(u32, u32)>,
+    tx: &mpsc::UnboundedSender<StreamToken>,
+) -> Result<()> {
+    use std::time::Instant;
+    use futures::StreamExt;
+    use super::context::WireChunk;
+    use salience::pb;

-    let url = format!("{}/completions", base_url);
-    let debug_label = format!("{} prompt tokens, model={}", prompt_tokens.len(), model);
-
-    let mut response = send_and_check(
-        client, &url, &request,
-        ("Authorization", &format!("Bearer {}", api_key)),
-        &[], &debug_label, None,
-    ).await?;
-
-    let mut reader = SseReader::new();
-    let mut usage = None;
-
-    while let Some(event) = reader.next_event(&mut response).await? {
-        if let Some(err_msg) = event["error"]["message"].as_str() {
-            anyhow::bail!("API error in stream: {}", err_msg);
-        }
-
-        if let Some(u) = event["usage"].as_object() {
-            if let Ok(u) = serde_json::from_value::<Usage>(serde_json::Value::Object(u.clone())) {
-                usage = Some(u);
+    let mut handle: salience::SessionHandle = {
+        let mut guard = session_lock.lock().await;
+        match guard.take() {
+            Some(h) => h,
+            None => {
+                drop(guard);
+                log::debug!(target: "grpc", "run_session_generate: opening new session");
+                salience::SessionHandle::open(client).await?
            }
        }
+    };

-        let choices = match event["choices"].as_array() {
-            Some(c) => c,
-            None => continue,
+    // If the client believes the match extends only up to `match_upto`
+    // but the server has more, we need to rewind. For v1 the match is
+    // either whole or broken — `match_upto` is always 0 on any mutation
+    // — so the cheapest correct recovery is to drop the session and
+    // open a fresh one.
+    if match_upto < handle.committed_len {
+        log::warn!(target: "grpc",
+            "session rewind: match_upto={} < committed_len={} — reopening session (resending {} bytes)",
+            match_upto, handle.committed_len, handle.committed_len - match_upto);
+        drop(handle);
+        handle = salience::SessionHandle::open(client).await?;
+    }
+
+    // Walk chunks at byte-level, taking everything past `match_upto`
+    // as the delta. Token chunks can be split mid-way; images live
+    // inline in the token stream, so there's no separate image-chunk
+    // case anymore.
+    let mut acc: u32 = 0;
+    let mut pending: Vec<u32> = Vec::new();
+    for chunk in chunks.iter() {
+        match chunk {
+            WireChunk::Tokens(t) => {
+                let len = t.len() as u32;
+                let chunk_end = acc + len;
+                if chunk_end <= match_upto {
+                    acc = chunk_end;
+                } else if acc < match_upto {
+                    let skip = (match_upto - acc) as usize;
+                    pending.extend_from_slice(&t[skip..]);
+                    acc = chunk_end;
+                } else {
+                    pending.extend_from_slice(t);
+                    acc = chunk_end;
+                }
+            }
+        }
+    }
+
+    // Filter images to those entirely past `match_upto` — anything
+    // before is on the server already (prior turn), anything
+    // straddling is a hard divergence (image partially-sent shouldn't
+    // happen with our atomic AppendImage history; with images-inline
+    // it can only happen if mark_dirty cleared match_upto mid-block,
+    // which the AST mutators prevent).
+    let mut new_images: Vec<pb::ImageAttachment> = Vec::new();
+    for img in &images {
+        if img.pad_end <= match_upto {
+            continue; // already sent on a prior turn
+        }
+        if img.pad_start < match_upto {
+            anyhow::bail!(
+                "session divergence: image at [{},{}) straddles match_upto={}",
+                img.pad_start, img.pad_end, match_upto,
+            );
+        }
+        new_images.push(pb::ImageAttachment {
+            bytes: img.bytes.clone(),
+            mime: img.mime.clone(),
+            pad_range_start: img.pad_start,
+            pad_range_end: img.pad_end,
+        });
+    }
+
+    // Final Generate: pending holds any trailing text; decode up to
+    // sampling.max_tokens. Request readouts on all decode positions
+    // via a catch-all range ending at u32::MAX — decode never
+    // reaches it.
+    let prompt_len_after_append = handle.committed_len + pending.len() as u32;
+    let readout_ranges = if readout_shape.is_some() {
+        vec![pb::PositionRange {
+            start: prompt_len_after_append,
+            end: u32::MAX,
+        }]
+    } else {
+        Vec::new()
+    };
+    let req = pb::GenerateRequest {
+        session_id: handle.session_id.clone(),
+        append_tokens: pending,
+        offset: handle.committed_len,
+        truncating: false,
+        max_tokens: sampling.max_tokens,
+        logprobs_ranges: Vec::new(),
+        logprob_top_k: 0,
+        readout_ranges,
+        temperature: sampling.temperature,
+        top_p: sampling.top_p,
+        top_k: sampling.top_k,
+        stop_token_ids: Vec::new(),
+        priority: priority.unwrap_or(0),
+        images: new_images,
+    };
+    let session_id_for_log = handle.session_id.clone();
+    let t_generate = Instant::now();
+    log::debug!(target: "grpc",
+        "session {} Generate: offset={} append={} max_tokens={} priority={}",
+        session_id_for_log, req.offset, req.append_tokens.len(),
+        req.max_tokens, req.priority);
+
+    let mut stream = handle.generate(req).await?;
+    let (n_layers, n_concepts) = readout_shape.unwrap_or((0, 0));
+    let mut session_terminated = false;
+    let mut first_token_at: Option<Instant> = None;
+
+    while let Some(event) = stream.next().await {
+        let event = match event {
+            Ok(e) => e,
+            Err(status) => {
+                log::warn!(target: "grpc",
+                    "session {} Generate stream error: {} — dropping session",
+                    session_id_for_log, status);
+                session_terminated = true;
+                let _ = tx.send(StreamToken::Error(format!(
+                    "Generate stream error: {}", status,
+                )));
+                break;
+            }
        };
-
-        for choice in choices {
-            // `readout`, if present, is a nested list
-            // `[num_tokens][n_layers][n_concepts]`. Parse it once per
-            // chunk and pair rows with token ids by index — the rows
-            // are in the same order as `token_ids`.
-            let readouts: Option<Vec<TokenReadout>> = choice["readout"]
-                .as_array()
-                .map(|outer| {
-                    outer.iter().filter_map(|per_token| {
-                        per_token.as_array().map(|layers| {
-                            layers.iter().filter_map(|per_layer| {
-                                per_layer.as_array().map(|vals| {
-                                    vals.iter()
-                                        .filter_map(|v| v.as_f64().map(|f| f as f32))
-                                        .collect::<Vec<f32>>()
-                                })
-                            }).collect::<Vec<Vec<f32>>>()
-                        })
-                    }).collect()
+        let Some(inner) = event.event else { continue };
+        match inner {
+            pb::generate_event::Event::Token(t) => {
+                if t.is_prefill { continue; }
+                if first_token_at.is_none() {
+                    log::debug!(target: "grpc",
+                        "session {} first decode token at {:?}",
+                        session_id_for_log, t_generate.elapsed());
+                    first_token_at = Some(Instant::now());
+                }
+                let readout = if t.readout.is_empty() {
+                    None
+                } else if n_layers == 0 || n_concepts == 0 {
+                    None
+                } else {
+                    let expected = (n_layers as usize) * (n_concepts as usize);
+                    if t.readout.len() != expected {
+                        log::warn!(target: "grpc",
+                            "readout shape mismatch: expected {}*{}={}, got {}",
+                            n_layers, n_concepts, expected, t.readout.len());
+                        None
+                    } else {
+                        let n = n_concepts as usize;
+                        let mut layers: Vec<Vec<f32>> = Vec::with_capacity(n_layers as usize);
+                        for l in 0..(n_layers as usize) {
+                            layers.push(t.readout[l * n..(l + 1) * n].to_vec());
+                        }
+                        Some(layers)
+                    }
+                };
+                if tx.send(StreamToken::Token { id: t.id, readout }).is_err() {
+                    break;
+                }
+            }
+            pb::generate_event::Event::Done(d) => {
+                log::debug!(target: "grpc",
+                    "session {} Done: prompt={} completion={} total={} reason={:?} elapsed={:?}",
+                    session_id_for_log, d.prompt_tokens, d.completion_tokens,
+                    d.total_tokens, d.finish_reason, t_generate.elapsed());
+                handle.committed_len = d.total_tokens;
+                let usage = Some(Usage {
+                    prompt_tokens: d.prompt_tokens,
+                    completion_tokens: d.completion_tokens,
+                    total_tokens: d.total_tokens,
                });
-
-            if let Some(ids) = choice["token_ids"].as_array() {
-                for (i, id_val) in ids.iter().enumerate() {
-                    if let Some(id) = id_val.as_u64() {
-                        let readout = readouts
-                            .as_ref()
-                            .and_then(|r| r.get(i).cloned());
-                        let _ = tx.send(StreamToken::Token {
-                            id: id as u32,
-                            readout,
-                        });
-                    }
-                }
-            } else if let Some(text) = choice["text"].as_str() {
-                // Fallback: provider didn't return token_ids, encode locally.
-                // No readout available in this path — the encoder may
-                // produce a different token count than the server did.
-                if !text.is_empty() {
-                    for id in super::tokenizer::encode(text) {
-                        let _ = tx.send(StreamToken::Token { id, readout: None });
-                    }
-                }
+                let _ = tx.send(StreamToken::Done { usage });
            }
        }
    }

-    let _ = tx.send(StreamToken::Done { usage });
+    if !session_terminated {
+        let mut guard = session_lock.lock().await;
+        *guard = Some(handle);
+    }
    Ok(())
 }

-/// Send an HTTP request and check for errors.
-pub(crate) async fn send_and_check(
-    client: &HttpClient,
-    url: &str,
-    body: &impl serde::Serialize,
-    auth_header: (&str, &str),
-    extra_headers: &[(&str, &str)],
-    debug_label: &str,
-    request_json: Option<&str>,
-) -> Result<HttpResponse> {
-    let debug = std::env::var("POC_DEBUG").is_ok();
-    let start = Instant::now();
-
-    if debug {
-        let payload_size = serde_json::to_string(body)
-            .map(|s| s.len())
-            .unwrap_or(0);
-        dbglog!(
-            "request: {}K payload, {}",
-            payload_size / 1024, debug_label,
-        );
-    }
-
-    let mut headers: Vec<(&str, &str)> = Vec::with_capacity(extra_headers.len() + 1);
-    headers.push(auth_header);
-    headers.extend_from_slice(extra_headers);
-
-    let response = client
-        .send_json("POST", url, &headers, body)
-        .await
-        .map_err(|e| {
-            let msg = e.to_string();
-            let cause = if msg.contains("connect timeout") || msg.contains("TCP connect") {
-                "connection refused"
-            } else if msg.contains("request timeout") {
-                "request timed out"
-            } else {
-                "request error"
-            };
-            anyhow::anyhow!("{} ({}): {}", cause, url, msg)
-        })?;
-
-    let status = response.status();
-    let elapsed = start.elapsed();
-
-    if debug {
-        for name in [
-            "x-ratelimit-remaining",
-            "x-ratelimit-limit",
-            "x-request-id",
-        ] {
-            if let Some(val) = response.header(name) {
-                dbglog!("header {}: {}", name, val);
-            }
-        }
-    }
-
-    if !status.is_success() {
-        let body = response.text().await.unwrap_or_default();
-        dbglog!(
-            "HTTP {} after {:.1}s ({}): {}",
-            status,
-            elapsed.as_secs_f64(),
-            url,
-            &body[..body.floor_char_boundary(body.len().min(500))]
-        );
-        if let Some(json) = request_json {
-            let log_dir = dirs::home_dir()
-                .unwrap_or_default()
-                .join(".consciousness/logs/failed-requests");
-            let _ = std::fs::create_dir_all(&log_dir);
-            let ts = chrono::Local::now().format("%Y%m%dT%H%M%S");
-            let path = log_dir.join(format!("{}.json", ts));
-            if std::fs::write(&path, json).is_ok() {
-                dbglog!(
-                    "saved failed request to {} (HTTP {})", path.display(), status
-                );
-            }
-        }
-        anyhow::bail!("HTTP {} ({}): {}", status, url, &body[..body.floor_char_boundary(body.len().min(1000))]);
-    }
-
-    if debug {
-        dbglog!(
-            "connected in {:.1}s (HTTP {})",
-            elapsed.as_secs_f64(),
-            status.as_u16()
-        );
-    }
-
-    Ok(response)
-}
-
-/// SSE stream reader. Handles the generic SSE plumbing shared by both
-/// backends: chunk reading with timeout, line buffering, `data:` prefix
-/// stripping, `[DONE]` detection, JSON parsing, and parse error diagnostics.
-/// Yields parsed events as serde_json::Value — each backend handles its
-/// own event types.
-pub(crate) struct SseReader {
-    line_buf: String,
-    chunk_timeout: Duration,
-    pub stream_start: Instant,
-    pub chunks_received: u64,
-    pub sse_lines_parsed: u64,
-    pub sse_parse_errors: u64,
-    debug: bool,
-    done: bool,
-    /// Serialized request payload — saved to disk on errors for replay debugging.
-    pub(crate) request_json: Option<String>,
-}
-
-impl SseReader {
-    pub(crate) fn new() -> Self {
-        Self {
-            line_buf: String::new(),
-            chunk_timeout: Duration::from_secs(crate::config::get().api_stream_timeout_secs),
-            stream_start: Instant::now(),
-            chunks_received: 0,
-            sse_lines_parsed: 0,
-            sse_parse_errors: 0,
-            debug: std::env::var("POC_DEBUG").is_ok(),
-            done: false,
-            request_json: None,
-        }
-    }
-
-    /// Attach the serialized request payload for error diagnostics.
-    /// Save the request payload to disk for replay debugging.
-    fn save_failed_request(&self, reason: &str) {
-        let Some(ref json) = self.request_json else { return };
-        let log_dir = dirs::home_dir()
-            .unwrap_or_default()
-            .join(".consciousness/logs/failed-requests");
-        let _ = std::fs::create_dir_all(&log_dir);
-        let ts = chrono::Local::now().format("%Y%m%dT%H%M%S");
-        let path = log_dir.join(format!("{}.json", ts));
-        if std::fs::write(&path, json).is_ok() {
-            dbglog!(
-                "saved failed request to {} ({})", path.display(), reason
-            );
-        }
-    }
-
-    /// Read the next SSE event from the response stream.
-    /// Returns Ok(Some(value)) for each parsed data line,
-    /// Ok(None) when the stream ends or [DONE] is received.
-    pub(crate) async fn next_event(
-        &mut self,
-        response: &mut HttpResponse,
-    ) -> Result<Option<serde_json::Value>> {
-        loop {
-            // Drain complete lines from the buffer before reading more chunks
-            while let Some(newline_pos) = self.line_buf.find('\n') {
-                let line = self.line_buf[..newline_pos].trim().to_string();
-                self.line_buf = self.line_buf[newline_pos + 1..].to_string();
-
-                if line == "data: [DONE]" {
-                    self.done = true;
-                    return Ok(None);
-                }
-                if line.is_empty()
-                    || line.starts_with("event: ")
-                    || !line.starts_with("data: ")
-                {
-                    continue;
-                }
-
-                let json_str = &line[6..];
-                self.sse_lines_parsed += 1;
-
-                match serde_json::from_str(json_str) {
-                    Ok(v) => return Ok(Some(v)),
-                    Err(e) => {
-                        self.sse_parse_errors += 1;
-                        if self.sse_parse_errors == 1 || self.debug {
-                            let preview = if json_str.len() > 200 {
-                                format!("{}...", &json_str[..200])
-                            } else {
-                                json_str.to_string()
-                            };
-                            dbglog!(
-                                "SSE parse error (#{}) {}: {}",
-                                self.sse_parse_errors, e, preview
-                            );
-                        }
-                        continue;
-                    }
-                }
-            }
-
-            if self.done {
-                return Ok(None);
-            }
-
-            // Read more data from the response stream
-            match tokio::time::timeout(self.chunk_timeout, response.chunk()).await {
-                Ok(Ok(Some(chunk))) => {
-                    self.chunks_received += 1;
-                    self.line_buf.push_str(&String::from_utf8_lossy(&chunk));
-                }
-                Ok(Ok(None)) => return Ok(None),
-                Ok(Err(e)) => {
-                    let buf_preview = if self.line_buf.is_empty() {
-                        "(empty)".to_string()
-                    } else {
-                        let n = self.line_buf.len().min(500);
-                        format!("{}B: {}", self.line_buf.len(), &self.line_buf[..n])
-                    };
-                    let msg = format!(
-                        "stream error after {} chunks, {:.1}s, {} sse lines: {} | buf: {}",
-                        self.chunks_received,
-                        self.stream_start.elapsed().as_secs_f64(),
-                        self.sse_lines_parsed,
-                        e, buf_preview,
-                    );
-                    dbglog!("{}", msg);
-                    self.save_failed_request(&msg);
-                    return Err(e.into());
-                }
-                Err(_) => {
-                    let buf_preview = if self.line_buf.is_empty() {
-                        "(empty)".to_string()
-                    } else {
-                        let n = self.line_buf.len().min(500);
-                        format!("{}B: {}", self.line_buf.len(), &self.line_buf[..n])
-                    };
-                    let msg = format!(
-                        "stream timeout: {}s, {} chunks, {} sse lines, {:.1}s elapsed | buf: {}",
-                        self.chunk_timeout.as_secs(),
-                        self.chunks_received,
-                        self.sse_lines_parsed,
-                        self.stream_start.elapsed().as_secs_f64(),
-                        buf_preview,
-                    );
-                    dbglog!("{}", msg);
-                    self.save_failed_request(&msg);
-                    anyhow::bail!(
-                        "stream timeout: no data for {}s ({} chunks received)",
-                        self.chunk_timeout.as_secs(),
-                        self.chunks_received
-                    );
-                }
-            }
-        }
-    }
-}
--- a/src/agent/api/salience.rs
+++ b/src/agent/api/salience.rs
@ -0,0 +1,279 @@
+// agent/api/salience.rs — gRPC client bindings for salience.v1.
+//
+// Thin wrapper around the tonic-generated types. Every RPC except
+// Generate is unary; Generate is server-streaming. Free functions
+// (open/close session) wrap the lifecycle RPCs; `SessionHandle` just
+// carries the id + connection params so later RPCs can reuse them.
+//
+// The old bidi Session() API is gone — see git history for its shape.
+
+#![allow(clippy::enum_variant_names)]
+
+use anyhow::{Context, Result};
+use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint};
+
+/// Generated prost + tonic types for salience.v1. Call sites use
+/// `pb::OpenSessionRequest`, `pb::Token`, etc.
+pub mod pb {
+    tonic::include_proto!("salience.v1");
+}
+
+pub type SalienceClient = pb::salience_client::SalienceClient<Channel>;
+
+/// Open a TLS-aware gRPC channel to the salience server. `base_url`
+/// looks like `https://host:8443`. User-provided CA certs under
+/// `~/.consciousness/certs/` are trusted in addition to the system
+/// roots (for self-signed server certs).
+///
+/// Returns the raw `Channel` so callers (`ApiClient::salience_client`)
+/// can cache it and clone a `SalienceClient` per request without
+/// reopening the TCP/TLS connection. tonic multiplexes RPCs over the
+/// shared channel automatically.
+pub async fn connect_channel(base_url: &str) -> Result<Channel> {
+    let mut endpoint = Endpoint::from_shared(base_url.to_string())
+        .with_context(|| format!("invalid salience endpoint: {}", base_url))?
+        .connect_timeout(std::time::Duration::from_secs(30))
+        .timeout(std::time::Duration::from_secs(600));
+
+    if base_url.starts_with("https://") {
+        let user_certs = super::http::load_user_certs_pem_bytes();
+        let mut tls = ClientTlsConfig::new().with_native_roots();
+        if !user_certs.is_empty() {
+            tls = tls.ca_certificate(Certificate::from_pem(user_certs));
+        }
+        endpoint = endpoint
+            .tls_config(tls)
+            .with_context(|| "configuring tonic TLS")?;
+    }
+
+    endpoint
+        .connect()
+        .await
+        .with_context(|| format!("failed to connect to salience server at {}", base_url))
+}
+
+/// Derive the gRPC base URL from the HTTP completions base URL.
+///
+/// vLLM's salience gRPC server listens on a different port (8443) from
+/// the HTTP endpoint (8000) and accepts no path component. Given an
+/// HTTP base like `https://host:8000/v1`, produce `https://host:8443`.
+/// No-op when the path is empty and the port isn't 8000.
+pub fn derive_grpc_url(http_base: &str) -> String {
+    let mut url = http_base.trim_end_matches('/').to_string();
+    if let Some(proto_end) = url.find("://") {
+        let rest_start = proto_end + 3;
+        if let Some(path_slash) = url[rest_start..].find('/') {
+            url.truncate(rest_start + path_slash);
+        }
+    }
+    url.replace(":8000", ":8443")
+}
+
+/// Attach a bearer token to a tonic request as gRPC metadata.
+pub fn with_auth<T>(req: &mut tonic::Request<T>, api_key: &str) {
+    if api_key.is_empty() {
+        return;
+    }
+    let bearer = format!("Bearer {}", api_key);
+    if let Ok(val) = bearer.parse() {
+        req.metadata_mut().insert("authorization", val);
+    }
+}
+
+/// Handle to a server-side session. Carries the id + an `ApiClient`
+/// clone (which holds the shared tonic Channel) so subsequent
+/// per-session RPCs go over the process-global connection.
+/// `committed_len` tracks the server's current session.tokens length
+/// so the client can submit deltas with the right `offset`.
+pub struct SessionHandle {
+    pub session_id: String,
+    pub max_model_len: u32,
+    pub committed_len: u32,
+    client: super::ApiClient,
+}
+
+impl SessionHandle {
+    pub async fn open(client: &super::ApiClient) -> Result<Self> {
+        let t0 = std::time::Instant::now();
+        log::debug!(target: "grpc", "OpenSession rpc: start");
+        let mut c = client.salience_client().await?;
+        let mut req = tonic::Request::new(pb::OpenSessionRequest {
+            model: client.model.clone(),
+        });
+        with_auth(&mut req, client.api_key());
+        let resp = c
+            .open_session(req)
+            .await
+            .with_context(|| "OpenSession RPC failed")?
+            .into_inner();
+        log::debug!(target: "grpc",
+            "OpenSession rpc: done session_id={} max_model_len={} elapsed={:?}",
+            resp.session_id, resp.max_model_len, t0.elapsed());
+        Ok(Self {
+            session_id: resp.session_id,
+            max_model_len: resp.max_model_len,
+            committed_len: 0,
+            client: client.clone(),
+        })
+    }
+
+    pub fn client(&self) -> &super::ApiClient { &self.client }
+
+    /// Debug-only: fetch the server's full session.tokens. Used to
+    /// verify client-side accounting byte-for-byte when divergence
+    /// is suspected. Not cheap on large sessions.
+    pub async fn dump_tokens(&self) -> Result<Vec<u32>> {
+        let mut c = self.client.salience_client().await?;
+        let mut req = tonic::Request::new(pb::DumpSessionRequest {
+            session_id: self.session_id.clone(),
+        });
+        with_auth(&mut req, self.client.api_key());
+        let resp = c
+            .dump_session(req)
+            .await
+            .with_context(|| "DumpSession RPC failed")?
+            .into_inner();
+        Ok(resp.tokens)
+    }
+
+    /// Open a gRPC Generate stream with the given request. Caller
+    /// iterates the returned stream of GenerateEvents; the handle's
+    /// `committed_len` should be advanced by the caller on Done based
+    /// on the Done event's `total_tokens` field.
+    pub async fn generate(
+        &self,
+        req: pb::GenerateRequest,
+    ) -> Result<tonic::Streaming<pb::GenerateEvent>> {
+        let t0 = std::time::Instant::now();
+        log::debug!(target: "grpc",
+            "Generate rpc: open-stream session={} offset={} append={} max_tokens={}",
+            self.session_id, req.offset, req.append_tokens.len(), req.max_tokens);
+        let mut c = self.client.salience_client().await?;
+        let mut req = tonic::Request::new(req);
+        with_auth(&mut req, self.client.api_key());
+        let resp = c
+            .generate(req)
+            .await
+            .with_context(|| "Generate RPC failed")?;
+        log::debug!(target: "grpc",
+            "Generate rpc: stream opened session={} open-latency={:?}",
+            self.session_id, t0.elapsed());
+        Ok(resp.into_inner())
+    }
+
+    /// Run a prefill-only Generate (max_tokens=0) that appends the
+    /// given tokens to the session. No decode, no Token events — the
+    /// server just extends session.tokens and runs prefill to warm
+    /// the KV cache. Used to interleave text runs between AppendImage
+    /// calls, and by score paths that want prompt_logprobs without a
+    /// decode step.
+    pub async fn prefill_only(&mut self, tokens: Vec<u32>) -> Result<()> {
+        use futures::StreamExt;
+        let req = pb::GenerateRequest {
+            session_id: self.session_id.clone(),
+            append_tokens: tokens,
+            offset: self.committed_len,
+            truncating: false,
+            max_tokens: 0,
+            logprobs_ranges: Vec::new(),
+            logprob_top_k: 0,
+            readout_ranges: Vec::new(),
+            temperature: 0.0,
+            top_p: 0.0,
+            top_k: 0,
+            stop_token_ids: Vec::new(),
+            priority: 0,
+            images: Vec::new(),
+        };
+        let mut stream = self.generate(req).await?;
+        while let Some(event) = stream.next().await {
+            let event = event.map_err(|s| anyhow::anyhow!("prefill Generate stream: {}", s))?;
+            if let Some(pb::generate_event::Event::Done(d)) = event.event {
+                self.committed_len = d.total_tokens;
+            }
+        }
+        Ok(())
+    }
+}
+
+/// Drop → fire CloseSession in a detached task so servers don't leak
+/// sessions until TTL eviction. Best-effort: if no tokio runtime is
+/// available we skip; the server's 30min TTL will reap it eventually.
+impl Drop for SessionHandle {
+    fn drop(&mut self) {
+        if self.session_id.is_empty() {
+            return;
+        }
+        let session_id = std::mem::take(&mut self.session_id);
+        let client = self.client.clone();
+        let Ok(rt) = tokio::runtime::Handle::try_current() else {
+            log::debug!(target: "grpc",
+                "SessionHandle drop outside tokio runtime, session {} leaks to TTL",
+                session_id);
+            return;
+        };
+        rt.spawn(async move {
+            let Ok(mut c) = client.salience_client().await else { return };
+            let mut req = tonic::Request::new(pb::CloseSessionRequest {
+                session_id: session_id.clone(),
+            });
+            with_auth(&mut req, client.api_key());
+            if let Err(e) = c.close_session(req).await {
+                log::debug!(target: "grpc",
+                    "CloseSession on drop failed for {}: {:#}",
+                    session_id, e);
+            }
+        });
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn generated_types_compile() {
+        // Exercise the shape of the new proto types — if build.rs
+        // stops regenerating against the proto, this stops compiling.
+        let _open = pb::OpenSessionRequest {
+            model: "qwen3-vl".into(),
+        };
+        let _tok = pb::Token {
+            id: 42,
+            position: 0,
+            is_prefill: false,
+            readout: vec![0.1, 0.2, 0.3],
+            logprobs: vec![pb::TokenLogprob {
+                id: 1,
+                logprob: -0.5,
+            }],
+            sampled_logprob: -0.1,
+            has_sampled_logprob: true,
+        };
+        let _done = pb::GenerateDone {
+            prompt_tokens: 10,
+            completion_tokens: 20,
+            total_tokens: 30,
+            finish_reason: pb::generate_done::FinishReason::Eos as i32,
+        };
+        let _evt = pb::GenerateEvent {
+            event: Some(pb::generate_event::Event::Done(_done)),
+        };
+    }
+
+    #[test]
+    fn derive_grpc_url_cases() {
+        assert_eq!(
+            derive_grpc_url("https://host:8000/v1"),
+            "https://host:8443",
+        );
+        assert_eq!(
+            derive_grpc_url("https://host:8000/"),
+            "https://host:8443",
+        );
+        assert_eq!(
+            derive_grpc_url("https://host:9000/v1"),
+            "https://host:9000",
+        );
+    }
+}
--- a/src/agent/context.rs
+++ b/src/agent/context.rs
@ -125,7 +125,19 @@ impl<'de> Deserialize<'de> for NodeLeaf {
            body: NodeBody,
            timestamp: DateTime<Utc>,
        }
-        let raw = Raw::deserialize(deserializer)?;
+        let mut raw = Raw::deserialize(deserializer)?;
+        // Heal pre-refactor logs: Image leaves used to be deserialized
+        // with token_count=0 (server-authoritative count was applied
+        // after AppendImage). With pads now expanded client-side at
+        // construction, recompute from the persisted dimensions if
+        // the stored count is 0.
+        if let NodeBody::Image { orig_height, orig_width, token_count, .. }
+            = &mut raw.body
+        {
+            if *token_count == 0 {
+                *token_count = qwen3_image_token_count(*orig_height, *orig_width);
+            }
+        }
        let token_ids = raw.body.compute_token_ids();
        Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp })
    }
@ -143,18 +155,44 @@ pub enum AstNode {
        /// Maps memory key → divergence score for this response.
        #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
        memory_scores: std::collections::BTreeMap<String, f64>,
+        /// Cached token stream for the subtree. When `Some`, wire-out
+        /// uses these bytes verbatim and skips recursion into children.
+        /// Populated by the response parser from the server's exact
+        /// stream; also computable from children as a fallback. Cleared
+        /// on any edit to a descendant. Not serialized — transient.
+        #[serde(skip, default)]
+        token_ids: Option<Vec<u32>>,
    },
 }

 /// The context window: four sections as Vec<AstNode>.
-/// All mutation goes through ContextState methods to maintain the invariant
-/// that token_ids on every leaf matches its rendered text.
+///
+/// All mutation MUST go through `ContextState`'s public methods. Two
+/// invariants ride on this:
+/// 1. Every `Leaf.token_ids` matches its `body.compute_token_ids()`.
+/// 2. For every `Branch { token_ids: Some(cached), .. }`, the cached
+///    token stream matches what `wire_into` would produce by walking
+///    `children` from scratch. Any mutation that touches a Branch's
+///    children — directly or via a descendant — must clear the
+///    Branch's `token_ids` so it gets recomputed on next wire-out.
+///
+/// The `&mut Vec<AstNode>` escape hatches are intentionally NOT
+/// exposed; if you find yourself wanting one, add a focused method
+/// here that maintains the invariants.
 pub struct ContextState {
    system: Vec<AstNode>,
    identity: Vec<AstNode>,
    journal: Vec<AstNode>,
    conversation: Vec<AstNode>,
    pub conversation_log: Option<crate::mind::log::ConversationLog>,
+    /// Length of the session's token stream on the server, as of the
+    /// last Done event. Updated by the grpc layer.
+    server_committed_len: u32,
+    /// Prefix length of our walk that still matches the server's
+    /// session.tokens byte-for-byte. When < `server_committed_len`
+    /// the session needs rewinding (truncating=true at this offset).
+    /// Reset to 0 on any mutation that could have changed sent bytes.
+    client_match_upto: u32,
 }

 impl Clone for ContextState {
@ -165,6 +203,8 @@ impl Clone for ContextState {
            journal: self.journal.clone(),
            conversation: self.conversation.clone(),
            conversation_log: None, // forked contexts don't log
+            server_committed_len: self.server_committed_len,
+            client_match_upto: self.client_match_upto,
        }
    }
 }
@ -201,6 +241,10 @@ pub struct ResponseParser {
    think_buf: String,
    in_tool_call: bool,
    tool_call_buf: String,
+    /// Raw generated token IDs, in arrival order. Combined with the
+    /// prologue at `finish` to stamp the Branch's authoritative
+    /// token cache — the bytes the server has for this branch.
+    generated_tokens: Vec<u32>,
 }

 impl Role {
@ -312,6 +356,16 @@ impl NodeLeaf {
    pub fn token_ids(&self) -> &[u32]    { &self.token_ids }
    pub fn tokens(&self) -> usize        { self.token_ids.len() }
    pub fn timestamp(&self) -> DateTime<Utc> { self.timestamp }
+
+    /// If this is an Image leaf, update its IMAGE_PAD count to `n` and
+    /// recompute cached `token_ids`. No-op on non-Image leaves —
+    /// callers know the body shape via `body()`.
+    pub fn set_image_token_count(&mut self, n: u32) {
+        if let NodeBody::Image { token_count, .. } = &mut self.body {
+            *token_count = n;
+            self.token_ids = self.body.compute_token_ids();
+        }
+    }
 }

 impl AstNode {
@ -360,6 +414,9 @@ impl AstNode {
        orig_height: u32,
        orig_width: u32,
    ) -> Self {
+        // Pad count is computed eagerly from dimensions — no more
+        // "unknown until server responds" shape. Server validates
+        // on the Generate call; mismatches fail loud.
        let token_count = qwen3_image_token_count(orig_height, orig_width);
        Self::Leaf(NodeLeaf::new(NodeBody::Image {
            bytes,
@ -373,7 +430,13 @@ impl AstNode {
    // -- Branch constructors --------------------------------------------------

    pub fn branch(role: Role, children: Vec<AstNode>) -> Self {
-        Self::Branch { role, children, timestamp: Utc::now(), memory_scores: Default::default() }
+        Self::Branch {
+            role,
+            children,
+            timestamp: Utc::now(),
+            memory_scores: Default::default(),
+            token_ids: None,
+        }
    }

    pub fn system_msg(text: impl Into<String>) -> Self {
@ -382,6 +445,7 @@ impl AstNode {
            children: vec![Self::content(text)],
            timestamp: Utc::now(),
            memory_scores: Default::default(),
+            token_ids: None,
        }
    }

@ -391,6 +455,7 @@ impl AstNode {
            children: vec![Self::content(text)],
            timestamp: Utc::now(),
            memory_scores: Default::default(),
+            token_ids: None,
        }
    }

@ -402,11 +467,12 @@ impl AstNode {
                let token_ids = leaf.body.compute_token_ids();
                Self::Leaf(NodeLeaf { token_ids, ..leaf })
            }
-            Self::Branch { role, children, timestamp, memory_scores } => Self::Branch {
+            Self::Branch { role, children, timestamp, memory_scores, .. } => Self::Branch {
                role,
                children: children.into_iter().map(|c| c.retokenize()).collect(),
                timestamp,
                memory_scores,
+                token_ids: None,
            },
        }
    }
@ -483,7 +549,10 @@ impl AstNode {
    fn token_ids_into(&self, out: &mut Vec<u32>) {
        match self {
            Self::Leaf(leaf) => out.extend_from_slice(&leaf.token_ids),
-            Self::Branch { role, children, .. } => {
+            Self::Branch { token_ids: Some(cached), .. } => {
+                out.extend_from_slice(cached);
+            }
+            Self::Branch { role, children, token_ids: None, .. } => {
                out.push(tokenizer::IM_START);
                out.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
                for child in children {
@ -512,7 +581,8 @@ impl Ast for AstNode {
    fn tokens(&self) -> usize {
        match self {
            Self::Leaf(leaf) => leaf.tokens(),
-            Self::Branch { role, children, .. } => {
+            Self::Branch { token_ids: Some(cached), .. } => cached.len(),
+            Self::Branch { role, children, token_ids: None, .. } => {
                1 + role_header_tokens(*role)
                    + children.iter().map(|c| c.tokens()).sum::<usize>()
                    + 1 + newline_tokens()
@ -666,6 +736,7 @@ impl ResponseParser {
            think_buf: String::new(),
            in_tool_call: false,
            tool_call_buf: String::new(),
+            generated_tokens: Vec::new(),
        }
    }

@ -684,18 +755,34 @@ impl ResponseParser {
        let handle = tokio::spawn(async move {
            let mut parser = self;
            let agent_name = agent.state.lock().await.provenance.clone();
+            eprintln!(
+                "[agent:{agent_name}] parser task start branch_idx={} in_think={}",
+                parser.branch_idx, parser.in_think,
+            );
            let log_path = format!("/tmp/poc-{}.log", agent_name);
            let mut log_file = std::fs::OpenOptions::new()
                .create(true).append(true).open(&log_path).ok();
            let mut full_text = String::new();
+            let mut token_count: usize = 0;
            while let Some(event) = stream.recv().await {
                match event {
                    super::api::StreamToken::Token { id, readout } => {
+                        token_count += 1;
+                        if token_count == 1 {
+                            eprintln!("[agent:{agent_name}] parser first token id={}", id);
+                        } else if token_count % 256 == 0 {
+                            eprintln!(
+                                "[agent:{agent_name}] parser token_count={} chars={}",
+                                token_count,
+                                full_text.len(),
+                            );
+                        }
                        if let Some(r) = readout {
                            if let Ok(mut buf) = agent.readout.lock() {
                                buf.push(id, r);
                            }
                        }
+                        parser.generated_tokens.push(id);
                        let text = super::tokenizer::decode(&[id]);
                        full_text.push_str(&text);
                        let mut ctx = agent.context.lock().await;
@ -714,6 +801,12 @@ impl ResponseParser {
                        }
                    }
                    super::api::StreamToken::Done { usage } => {
+                        eprintln!(
+                            "[agent:{agent_name}] parser done token_count={} chars={} usage={:?}",
+                            token_count,
+                            full_text.len(),
+                            usage,
+                        );
                        if let Some(ref mut f) = log_file {
                            use std::io::Write;
                            let ctx = agent.context.lock().await;
@ -730,19 +823,31 @@ impl ResponseParser {
                                let _ = writeln!(f, "  unparsed text: {}", &full_text[..end]);
                            }
                        }
-                        if let Some(u) = usage {
+                        if let Some(ref u) = usage {
                            agent.state.lock().await.last_prompt_tokens = u.prompt_tokens;
                        }
                        let mut ctx = agent.context.lock().await;
                        parser.finish(&mut ctx);
+                        if let Some(u) = usage {
+                            ctx.note_session_synced(u.total_tokens);
+                        }
                        return Ok(());
                    }
                    super::api::StreamToken::Error(e) => {
+                        eprintln!("[agent:{agent_name}] parser stream error: {}", e);
                        return Err(anyhow::anyhow!("{}", e));
                    }
                }
            }
-            Ok(())
+            eprintln!(
+                "[agent:{agent_name}] parser stream closed without done token_count={} chars={}",
+                token_count,
+                full_text.len(),
+            );
+            Err(anyhow::anyhow!(
+                "stream closed without Done event after {} tokens",
+                token_count,
+            ))
        });
        (rx, handle)
    }
@ -823,7 +928,7 @@ impl ResponseParser {
    }

    fn push_child(&self, ctx: &mut ContextState, child: AstNode) {
-        ctx.push_child(Section::Conversation, self.branch_idx, child);
+        ctx.push_child_raw(Section::Conversation, self.branch_idx, child);
    }

    fn flush_content(&mut self, ctx: &mut ContextState) {
@ -837,10 +942,69 @@ impl ResponseParser {
    }

    pub fn finish(mut self, ctx: &mut ContextState) {
-        if !self.buf.is_empty() {
+        // Salvage any in-flight tag accumulators if the stream ended
+        // before the close tag arrived (max_tokens, premature EOS,
+        // server-side cancel). Without this, an unterminated
+        // <think>...</think> drops all of self.think_buf and only the
+        // trailing rolling window in self.buf survives — observed as
+        // "responses cut off, only the last ~8 characters come
+        // through" because drain_safe keeps `close_tag.len()` bytes
+        // (8 for `</think>`) at the tail of buf.
+        if self.in_think {
+            if !self.buf.is_empty() {
+                self.think_buf.push_str(&std::mem::take(&mut self.buf));
+            }
+            let text = std::mem::take(&mut self.think_buf).trim().to_string();
+            if !text.is_empty() {
+                self.push_child(ctx, AstNode::thinking(text));
+            }
+            self.in_think = false;
+        } else if self.in_tool_call {
+            if !self.buf.is_empty() {
+                self.tool_call_buf.push_str(&std::mem::take(&mut self.buf));
+            }
+            let body = std::mem::take(&mut self.tool_call_buf);
+            match parse_tool_call_body(&body) {
+                Some((name, args)) => {
+                    self.flush_content(ctx);
+                    self.push_child(ctx, AstNode::tool_call(&name, &args));
+                }
+                None => {
+                    // Body's likely incomplete (no `</tool_call>` ever
+                    // arrived). Wrap as content with the open tag so the
+                    // model can see its own truncated attempt next turn
+                    // rather than losing it silently.
+                    self.content_parts.push(format!("<tool_call>\n{}", body));
+                }
+            }
+            self.in_tool_call = false;
+        } else if !self.buf.is_empty() {
            self.content_parts.push(std::mem::take(&mut self.buf));
        }
        self.flush_content(ctx);
+
+        // Stamp the authoritative token cache onto the branch.
+        // Layout mirrors the full chat-template rendering of a
+        // message block:
+        //
+        //   IM_START + "assistant\n" [+ "<think>\n"]   (prologue — what we sent)
+        //   + generated_tokens                          (what the server generated, ends in IM_END)
+        //   + "\n"                                      (trailing newline — template-required)
+        //
+        // Server only has through the IM_END (model stops on it,
+        // doesn't emit "\n"). Match-upto lands inside the cache
+        // right after IM_END; the chunk-walk's straddle path picks
+        // up the trailing "\n" as the head of the next turn's delta.
+        // The "\n" between turns matters: without it Qwen sees
+        // `<|im_end|><|im_start|>` back-to-back (no newline) and
+        // responds with garbage.
+        let prologue_text = if self.in_think { "assistant\n<think>\n" } else { "assistant\n" };
+        let mut cache = Vec::with_capacity(1 + self.generated_tokens.len() + 8);
+        cache.push(tokenizer::IM_START);
+        cache.extend(tokenizer::encode(prologue_text));
+        cache.extend(self.generated_tokens);
+        cache.extend(tokenizer::encode("\n"));
+        ctx.set_branch_cache(Section::Conversation, self.branch_idx, cache);
    }
 }

@ -852,20 +1016,77 @@ impl ContextState {
            journal: Vec::new(),
            conversation: Vec::new(),
            conversation_log: None,
+            server_committed_len: 0,
+            client_match_upto: 0,
        }
    }

+    // -- Server sync tracking -------------------------------------------------
+
+    /// Length of the session's token stream on the server. Updated by
+    /// the grpc layer from Generate Done events.
+    pub fn server_committed_len(&self) -> u32 { self.server_committed_len }
+
+    /// Prefix of our walk we still believe matches the server
+    /// byte-for-byte. If less than `server_committed_len`, the next
+    /// Generate must send `truncating=true` at this offset.
+    pub fn client_match_upto(&self) -> u32 { self.client_match_upto }
+
+    /// Called by the grpc layer after a successful Generate Done:
+    /// records both the server's new length and the fact that we
+    /// match up to it (we just sent everything).
+    pub fn note_session_synced(&mut self, total_tokens: u32) {
+        self.server_committed_len = total_tokens;
+        self.client_match_upto = total_tokens;
+    }
+
+    /// Reset match-upto to 0. Called from every mutation that could
+    /// have touched a region the server already has. For now,
+    /// conservatively drops alignment entirely — finer-grained
+    /// tracking (match-upto at the mutated node's offset) is a
+    /// future optimization.
+    fn mark_dirty(&mut self) {
+        self.client_match_upto = 0;
+    }
+
    // -- Read access ----------------------------------------------------------

    pub fn system(&self) -> &[AstNode]       { &self.system }
    pub fn identity(&self) -> &[AstNode]     { &self.identity }
    pub fn journal(&self) -> &[AstNode]      { &self.journal }
    pub fn conversation(&self) -> &[AstNode] { &self.conversation }
-    pub fn conversation_mut(&mut self) -> &mut Vec<AstNode> { &mut self.conversation }
+
+    /// Set or clear a single `memory_scores` entry on an Assistant
+    /// Branch. Used by the full-matrix scorer to attribute per-memory
+    /// divergence onto the response. `score = None` removes the key;
+    /// `Some(s)` inserts/overwrites.
+    ///
+    /// Doesn't affect the Branch's token cache: `memory_scores` is a
+    /// serialized-but-non-tokenizing annotation. No-op (with a debug
+    /// log) if the index points to a Leaf or a non-Assistant Branch —
+    /// callers are typically iterating on stale indices and we'd
+    /// rather skip than panic.
+    pub fn set_branch_memory_score(
+        &mut self,
+        section: Section,
+        index: usize,
+        key: &str,
+        score: Option<f64>,
+    ) {
+        let nodes = self.section_mut(section);
+        let Some(node) = nodes.get_mut(index) else { return };
+        let AstNode::Branch { role: Role::Assistant, memory_scores, .. } = node
+        else { return };
+        match score {
+            Some(s) => { memory_scores.insert(key.to_string(), s); }
+            None => { memory_scores.remove(key); }
+        }
+    }

    pub fn sections(&self) -> [&Vec<AstNode>; 4] {
        [&self.system, &self.identity, &self.journal, &self.conversation]
    }
+
 }

 impl Ast for ContextState {
@ -898,30 +1119,63 @@ impl Ast for ContextState {
 }

 /// An image collected from the AST for a request body. The AST stores
-/// the pre-expanded token form (N image_pads) for accurate budget
-/// accounting; the wire form collapses each Image to a single
-/// `<|image_pad|>` between vision bookends and ships the bytes
-/// separately as multi_modal_data.
+/// Image metadata collected during `wire_chunks` — the binary +
+/// mime plus the absolute token-position range of the image's
+/// pre-expanded placeholder run in the full wire stream. Sent
+/// alongside `append_tokens` in `GenerateRequest` so the server
+/// can attach vision features to the declared positions. Positions
+/// are absolute within the full wire walk starting at offset 0,
+/// i.e. the same coordinate system as `session.tokens` on the
+/// server once the walk has been applied.
+#[derive(Clone)]
 pub struct WireImage {
    pub bytes: Vec<u8>,
    pub mime: String,
+    pub pad_start: u32,
+    pub pad_end: u32,
+}
+
+/// One piece of the wire stream for the gRPC session path. Since
+/// images now live inline in the token stream (pre-expanded at AST
+/// construction time), there's only one variant — a run of tokens.
+/// The parallel `Vec<WireImage>` returned by `wire_chunks` gives the
+/// binary + position metadata for each embedded image.
+#[derive(Clone)]
+pub enum WireChunk {
+    Tokens(Vec<u32>),
 }

 fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>) {
    match node {
        AstNode::Leaf(leaf) => match leaf.body() {
            NodeBody::Image { bytes, mime, .. } => {
-                tokens.push(tokenizer::VISION_START);
-                tokens.push(tokenizer::IMAGE_PAD);
-                tokens.push(tokenizer::VISION_END);
+                // The Image leaf's token_ids is already
+                // [VISION_START, IMAGE_PAD * N, VISION_END]. Inline
+                // those into the token stream and record the pad-run
+                // range so the server can attach features to the
+                // declared positions.
+                let pad_start = tokens.len() as u32;
+                tokens.extend_from_slice(leaf.token_ids());
+                let pad_end = tokens.len() as u32;
                images.push(WireImage {
                    bytes: bytes.clone(),
                    mime: mime.clone(),
+                    pad_start,
+                    pad_end,
                });
            }
            _ => tokens.extend_from_slice(leaf.token_ids()),
        },
-        AstNode::Branch { role, children, .. } => {
+        AstNode::Branch { token_ids: Some(cached), children, .. } => {
+            // Cached branches still need their image children paired
+            // up with the vision-block ranges embedded in the cached
+            // token stream — the cache captures vision tokens but not
+            // the matching bytes/mime.
+            let base = tokens.len() as u32;
+            tokens.extend_from_slice(cached);
+            pair_cached_images(cached, children, base, images);
+        }
+        AstNode::Branch { role, children, token_ids: None, .. } => {
            tokens.push(tokenizer::IM_START);
            tokens.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
            for c in children {
@ -933,6 +1187,101 @@ fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>)
    }
 }

+/// Depth-first iterator over Image leaves under a slice of AST nodes.
+/// Yields `(bytes, mime)` borrows in document order; doesn't allocate
+/// per yield (only a stack of pending nodes).
+struct ImageLeaves<'a> {
+    stack: Vec<&'a AstNode>,
+}
+
+impl<'a> ImageLeaves<'a> {
+    fn new(nodes: &'a [AstNode]) -> Self {
+        let mut stack = Vec::with_capacity(nodes.len());
+        stack.extend(nodes.iter().rev());
+        Self { stack }
+    }
+}
+
+impl<'a> Iterator for ImageLeaves<'a> {
+    type Item = (&'a [u8], &'a str);
+    fn next(&mut self) -> Option<Self::Item> {
+        while let Some(node) = self.stack.pop() {
+            match node {
+                AstNode::Leaf(leaf) => {
+                    if let NodeBody::Image { bytes, mime, .. } = leaf.body() {
+                        return Some((bytes, mime));
+                    }
+                }
+                AstNode::Branch { children, .. } => {
+                    self.stack.extend(children.iter().rev());
+                }
+            }
+        }
+        None
+    }
+}
+
+/// Iterator over `(start, end)` token-offset pairs for each
+/// `VISION_START..VISION_END` block in a token slice. Panics on an
+/// unmatched VISION_START — that's an upstream tokenization bug
+/// worth a loud failure.
+fn vision_blocks(cached: &[u32]) -> impl Iterator<Item = (usize, usize)> + '_ {
+    let mut cur = 0;
+    std::iter::from_fn(move || {
+        while cur < cached.len() {
+            if cached[cur] == tokenizer::VISION_START {
+                let start = cur;
+                let end_rel = cached[cur..].iter()
+                    .position(|&t| t == tokenizer::VISION_END)
+                    .unwrap_or_else(|| panic!(
+                        "unmatched VISION_START at offset {} in cached branch",
+                        start));
+                let end = cur + end_rel + 1;
+                cur = end;
+                return Some((start, end));
+            }
+            cur += 1;
+        }
+        None
+    })
+}
+
+/// For a Branch whose `token_ids` are cached and may contain inlined
+/// vision blocks (`VISION_START + IMAGE_PAD*N + VISION_END`), recover
+/// the matching image bytes/mime from the children and emit one
+/// `WireImage` per vision block with the absolute pad offsets in the
+/// parent token stream.
+///
+/// The cache stores tokens but not image payloads; the AST stores
+/// image payloads in the children but not their post-cache positions.
+/// Pair them by zipping the two iterators; mismatched counts panic
+/// loudly because that's an AST/cache invariant violation that
+/// would otherwise mis-pair images on the wire.
+fn pair_cached_images(
+    cached: &[u32],
+    children: &[AstNode],
+    base_offset: u32,
+    images: &mut Vec<WireImage>,
+) {
+    let mut blocks = vision_blocks(cached);
+    let mut leaves = ImageLeaves::new(children);
+    loop {
+        match (blocks.next(), leaves.next()) {
+            (Some((s, e)), Some((bytes, mime))) => images.push(WireImage {
+                bytes: bytes.to_vec(),
+                mime: mime.to_string(),
+                pad_start: base_offset + s as u32,
+                pad_end: base_offset + e as u32,
+            }),
+            (None, None) => break,
+            (Some(_), None) => panic!(
+                "cached branch has more vision blocks than image children"),
+            (None, Some(_)) => panic!(
+                "cached branch has fewer vision blocks than image children"),
+        }
+    }
+}
+
 pub fn memory_key(node: &AstNode) -> Option<&str> {
    match node {
        AstNode::Leaf(leaf) => match leaf.body() {
@ -1042,6 +1391,92 @@ impl ContextState {
        }
        (tokens, images, assistant_ranges)
    }
+
+    /// Build the wire stream as interleaved `WireChunk`s for the gRPC
+    /// session path. Returns a tuple of (chunks, images): the chunks
+    /// hold the full token stream (with vision blocks inlined as
+    /// `VISION_START + IMAGE_PAD*N + VISION_END`), and the images
+    /// list carries each embedded image's binary + position range so
+    /// the gRPC layer can attach them via `GenerateRequest.images`.
+    ///
+    /// Note: with images inlined into the token stream, the chunks
+    /// list is structurally a single `Tokens` chunk in the common
+    /// case — the multi-chunk shape persists only because some
+    /// callers may want the option of inserting breakpoints later.
+    ///
+    /// `conv_range` and `skip` mirror `wire_prompt` — select a
+    /// conversation slice and drop identity / conversation nodes by
+    /// predicate.
+    pub fn wire_chunks<F>(
+        &self,
+        conv_range: std::ops::Range<usize>,
+        mut skip: F,
+    ) -> (Vec<WireChunk>, Vec<WireImage>)
+    where F: FnMut(&AstNode) -> bool,
+    {
+        let mut buf: Vec<u32> = Vec::new();
+        let mut images: Vec<WireImage> = Vec::new();
+
+        fn visit(
+            node: &AstNode,
+            buf: &mut Vec<u32>,
+            images: &mut Vec<WireImage>,
+        ) {
+            match node {
+                AstNode::Leaf(leaf) => match leaf.body() {
+                    NodeBody::Image { bytes, mime, .. } => {
+                        // Pre-expanded vision block lives in
+                        // leaf.token_ids: [VISION_START, IMAGE_PAD*N,
+                        // VISION_END]. Inline + record the range.
+                        let pad_start = buf.len() as u32;
+                        buf.extend_from_slice(leaf.token_ids());
+                        let pad_end = buf.len() as u32;
+                        images.push(WireImage {
+                            bytes: bytes.clone(),
+                            mime: mime.clone(),
+                            pad_start,
+                            pad_end,
+                        });
+                    }
+                    _ => buf.extend_from_slice(leaf.token_ids()),
+                },
+                AstNode::Branch { token_ids: Some(cached), children, .. } => {
+                    // Same fix as wire_into's cached arm: the cache
+                    // holds vision tokens but not the matching bytes,
+                    // so walk children to recover them.
+                    let base = buf.len() as u32;
+                    buf.extend_from_slice(cached);
+                    pair_cached_images(cached, children, base, images);
+                }
+                AstNode::Branch { role, children, token_ids: None, .. } => {
+                    buf.push(tokenizer::IM_START);
+                    buf.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
+                    for c in children {
+                        visit(c, buf, images);
+                    }
+                    buf.push(tokenizer::IM_END);
+                    buf.extend(tokenizer::encode("\n"));
+                }
+            }
+        }
+
+        for node in self.system()   { visit(node, &mut buf, &mut images); }
+        for node in self.identity() {
+            if skip(node) { continue; }
+            visit(node, &mut buf, &mut images);
+        }
+        for node in self.journal()  { visit(node, &mut buf, &mut images); }
+        for node in &self.conversation()[conv_range] {
+            if skip(node) { continue; }
+            visit(node, &mut buf, &mut images);
+        }
+        let chunks = if buf.is_empty() {
+            Vec::new()
+        } else {
+            vec![WireChunk::Tokens(buf)]
+        };
+        (chunks, images)
+    }
 }

 impl ContextState {
@ -1061,17 +1496,27 @@ impl ContextState {
                dbglog!("warning: log: {:#}", e);
            }
        }
+        // Conversation appends always go to the tail — past committed —
+        // so they don't break the match. Any other section mutates a
+        // region the server may already have, so drop alignment.
+        if section != Section::Conversation {
+            self.mark_dirty();
+        }
        self.section_mut(section).push(node);
    }

    /// Push without logging.
    pub fn push_no_log(&mut self, section: Section, node: AstNode) {
+        if section != Section::Conversation {
+            self.mark_dirty();
+        }
        self.section_mut(section).push(node);
    }

    /// Replace the body of a leaf at `index` in `section`.
    /// Re-tokenizes to maintain the invariant.
    pub fn set_message(&mut self, section: Section, index: usize, body: NodeBody) {
+        self.mark_dirty();
        let nodes = self.section_mut(section);
        let node = &mut nodes[index];
        match node {
@ -1097,10 +1542,12 @@ impl ContextState {
    }

    pub fn del(&mut self, section: Section, index: usize) -> AstNode {
+        self.mark_dirty();
        self.section_mut(section).remove(index)
    }

    pub fn clear(&mut self, section: Section) {
+        self.mark_dirty();
        self.section_mut(section).clear();
    }

@ -1121,6 +1568,7 @@ impl ContextState {
    ///          are > 50% of conversation tokens) or oldest conversation entry.
    /// Phase 3: Snap to user message boundary at start.
    pub fn trim_conversation(&mut self) {
+        self.mark_dirty();
        let max_tokens = context_budget_tokens();
        let fixed = self.system.iter().map(|n| n.tokens()).sum::<usize>()
            + self.identity.iter().map(|n| n.tokens()).sum::<usize>()
@ -1197,11 +1645,49 @@ impl ContextState {
    }

    /// Push a child node into a branch at `index` in `section`.
+    /// Clears the branch's cached token stream — wire-out will recompute
+    /// from children until the cache is repopulated. If the cache was
+    /// populated (server had these bytes), drops session alignment.
    pub fn push_child(&mut self, section: Section, index: usize, child: AstNode) {
+        let node = &mut self.section_mut(section)[index];
+        let was_cached = matches!(node, AstNode::Branch { token_ids: Some(_), .. });
+        match node {
+            AstNode::Branch { children, token_ids, .. } => {
+                children.push(child);
+                *token_ids = None;
+            }
+            AstNode::Leaf(_) => panic!("push_child on leaf node"),
+        }
+        if was_cached {
+            self.mark_dirty();
+        }
+    }
+
+    /// Like `push_child` but preserves the branch's cached token stream.
+    /// Used by the response parser, which is simultaneously populating
+    /// the cache from the authoritative server stream and pushing the
+    /// parsed-out children — the two stay consistent by construction.
+    /// Module-private: callers outside `context.rs` must go through
+    /// `push_child` so the invariant is maintained.
+    fn push_child_raw(&mut self, section: Section, index: usize, child: AstNode) {
        let node = &mut self.section_mut(section)[index];
        match node {
            AstNode::Branch { children, .. } => children.push(child),
-            AstNode::Leaf(_) => panic!("push_child on leaf node"),
+            AstNode::Leaf(_) => panic!("push_child_raw on leaf node"),
+        }
+    }
+
+    /// Stamp a verbatim token cache onto the branch at `index` in
+    /// `section`. Used by the response parser to record the server's
+    /// authoritative token stream for the just-finished turn.
+    /// Module-private: the cache is an invariant-load-bearing piece
+    /// of state, populated only by code that holds the server's
+    /// ground truth.
+    fn set_branch_cache(&mut self, section: Section, index: usize, tokens: Vec<u32>) {
+        let node = &mut self.section_mut(section)[index];
+        match node {
+            AstNode::Branch { token_ids, .. } => *token_ids = Some(tokens),
+            AstNode::Leaf(_) => panic!("set_branch_cache on leaf node"),
        }
    }

@ -1225,6 +1711,14 @@ impl ContextState {
 // to at request time. Constants come from Qwen3.5-27B's preprocessor_config.
 // ---------------------------------------------------------------------------

+// Production client-side computation of image-token expansion. With
+// the delta-session protocol, the client writes the pre-expanded
+// vision block (VISION_START + N*IMAGE_PAD + VISION_END) directly
+// into the token stream at Image-leaf construction time, and tells
+// the server where each image's pad run lives via
+// GenerateRequest.images. Server validates that this N matches
+// what the vision encoder actually produces and rejects on
+// mismatch — so drift here fails loudly, not silently.
 const QWEN3_PATCH_SIZE: u32 = 16;
 const QWEN3_MERGE_SIZE: u32 = 2;
 const QWEN3_MIN_PIXELS: u64 = 65_536;
@ -1258,11 +1752,10 @@ fn smart_resize(h: u32, w: u32, factor: u32, min_pixels: u64, max_pixels: u64) -
    }
 }

-/// Compute how many `<|image_pad|>` tokens vLLM will emit for an image of
-/// the given dimensions. Matches Qwen3VL's feature-size calculation exactly:
-///   (grid_h * grid_w) / merge_size^2
-/// where (grid_h, grid_w) = resized dims / patch_size.
-fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
+/// How many `<|image_pad|>` tokens the Qwen3-VL vision encoder will
+/// produce for an image of the given dimensions. Server verifies
+/// this count against its own encoder run and rejects on mismatch.
+pub fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
    let factor = QWEN3_PATCH_SIZE * QWEN3_MERGE_SIZE;
    let (rh, rw) = smart_resize(orig_h, orig_w, factor, QWEN3_MIN_PIXELS, QWEN3_MAX_PIXELS);
    (rh / QWEN3_PATCH_SIZE) * (rw / QWEN3_PATCH_SIZE) / (QWEN3_MERGE_SIZE * QWEN3_MERGE_SIZE)
@ -1713,29 +2206,34 @@ mod tests {
    }

    #[test]
-    fn test_wire_prompt_collapses_image_pads() {
+    fn test_wire_prompt_preserves_expanded_image_pads() {
        let mut ctx = ContextState::new();
        ctx.push_no_log(Section::Conversation, AstNode::branch(Role::User, vec![
            AstNode::content("look:"),
            AstNode::image(vec![0xDE, 0xAD], "image/png", 512, 512),
        ]));

-        // AST side: N image_pads + bookends, full budget accounting.
+        // AST side and wire side should both carry N image_pads + bookends —
+        // server's session.tokens length must match what vLLM's engine will
+        // actually process. Binary image bytes are shipped separately in
+        // multi_modal_data via the WireImage list.
+        let n_expected = qwen3_image_token_count(512, 512) as usize;
+
        let full = ctx.token_ids();
        let n_image_pads_full = full.iter()
            .filter(|&&t| t == tokenizer::IMAGE_PAD).count();
-        assert_eq!(n_image_pads_full, qwen3_image_token_count(512, 512) as usize);
+        assert_eq!(n_image_pads_full, n_expected);

-        // Wire side: single image_pad, bytes moved to images list.
        let (wire, images, _) = ctx.wire_prompt(0..ctx.conversation().len(), |_| false);
        let n_image_pads_wire = wire.iter()
            .filter(|&&t| t == tokenizer::IMAGE_PAD).count();
-        assert_eq!(n_image_pads_wire, 1);
+        assert_eq!(n_image_pads_wire, n_expected);
+
        assert_eq!(images.len(), 1);
        assert_eq!(images[0].bytes, vec![0xDE, 0xAD]);
        assert_eq!(images[0].mime, "image/png");

-        // vision_start/vision_end bookends are preserved in wire form.
+        // One pair of vision_start/vision_end bookends around the N pads.
        assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_START).count(), 1);
        assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_END).count(), 1);
    }
--- a/src/agent/mod.rs
+++ b/src/agent/mod.rs
@ -17,6 +17,7 @@ pub mod api;
 pub mod context;
 pub mod oneshot;
 pub mod readout;
+pub mod salience;
 pub mod tokenizer;
 pub mod tools;

@ -28,6 +29,11 @@ use context::{AstNode, ContextState, Section, Ast, PendingToolCall, ResponsePars

 use crate::mind::log::ConversationLog;

+async fn agent_trace(agent: &Arc<Agent>, msg: String) {
+    let provenance = agent.state.lock().await.provenance.clone();
+    eprintln!("[agent:{provenance}] {msg}");
+}
+
 // --- Activity tracking (RAII guards) ---

 pub struct ActivityEntry {
@ -148,6 +154,14 @@ pub struct Agent {
    /// token handler, read by UI screens (amygdala). Manifest is
    /// `None` when the server has readout disabled.
    pub readout: readout::SharedReadoutBuffer,
+    /// Long-lived gRPC session to the salience server, lazily opened
+    /// on first use. Tracks appended tokens so subsequent turns send
+    /// only the delta (prefix-cache reuse). None when not yet opened
+    /// or when the session has died and needs reopening.
+    ///
+    /// Arc-wrapped so the spawned streaming task can share ownership
+    /// (the task outlives the call site).
+    pub grpc_session: std::sync::Arc<crate::Mutex<Option<api::salience::SessionHandle>>>,
 }

 /// Mutable agent state — behind its own mutex.
@ -168,9 +182,7 @@ pub struct AgentState {
    pub think_native: bool,
    /// Tool-based thinking — add a "think" tool for structured reasoning.
    pub think_tool: bool,
-    pub temperature: f32,
-    pub top_p: f32,
-    pub top_k: u32,
+    pub sampling: api::SamplingParams,
    pub activities: Vec<ActivityEntry>,
    next_activity_id: u64,
    pub pending_yield: bool,
@ -224,6 +236,7 @@ impl Agent {
            session_id,
            context: crate::Mutex::new(context),
            readout,
+            grpc_session: std::sync::Arc::new(crate::Mutex::new(None)),
            state: crate::Mutex::new(AgentState {
                tools: agent_tools,
                mcp_tools: McpToolAccess::All,
@ -231,9 +244,12 @@ impl Agent {
                reasoning_effort: "none".to_string(),
                think_native: true,
                think_tool: false,
-                temperature: 0.6,
-                top_p: 0.95,
-                top_k: 20,
+                sampling: api::SamplingParams {
+                    temperature: 0.6,
+                    top_p: 0.95,
+                    top_k: 20,
+                    max_tokens: 4096,
+                },
                activities: Vec::new(),
                next_activity_id: 0,
                pending_yield: false,
@ -292,6 +308,9 @@ impl Agent {
            // shouldn't bleed into the main emotional readout even
            // though they hit the same vLLM server.
            readout: readout::new_shared(),
+            // Forks get their own session — can't share a bidi stream,
+            // and forks have different conversation tails anyway.
+            grpc_session: std::sync::Arc::new(crate::Mutex::new(None)),
            state: crate::Mutex::new(AgentState {
                tools,
                mcp_tools: McpToolAccess::None,
@ -299,9 +318,7 @@ impl Agent {
                reasoning_effort: "none".to_string(),
                think_native: st.think_native,
                think_tool: st.think_tool,
-                temperature: st.temperature,
-                top_p: st.top_p,
-                top_k: st.top_k,
+                sampling: st.sampling,
                activities: Vec::new(),
                next_activity_id: 0,
                pending_yield: false,
@ -316,35 +333,35 @@ impl Agent {
        })
    }

-    pub async fn assemble_prompt_tokens(&self) -> Vec<u32> {
-        self.assemble_prompt().await.0
-    }
-
-    /// Assemble a ready-to-send prompt: token stream in wire form (each
-    /// image collapsed to a single `<|image_pad|>`) paired with the
-    /// images to attach as multi_modal_data.
-    ///
-    /// Pre-send size check: if the context has grown past budget since the
-    /// last compact (accumulation between turns, a fork's context getting
-    /// bigger than expected, etc.), trim here rather than letting vLLM
-    /// reject the request. Client-side tokenization means we already know
-    /// the exact token count so there's no reason to round-trip an
-    /// oversize request.
-    pub async fn assemble_prompt(&self) -> (Vec<u32>, Vec<context::WireImage>) {
+    /// Assemble a ready-to-send prompt as interleaved wire chunks for
+    /// the gRPC session path. Text runs are batched; each Image leaf
+    /// becomes its own chunk. Also trims the conversation to budget
+    /// first so we don't build a prompt the server will reject for
+    /// length.
+    pub async fn assemble_prompt(&self)
+        -> (Vec<context::WireChunk>, Vec<context::WireImage>, u32)
+    {
        let mut ctx = self.context.lock().await;
        if ctx.total_tokens() > context::context_budget_tokens() {
            ctx.trim_conversation();
        }
        let st = self.state.lock().await;
-        let (mut tokens, images, _) =
-            ctx.wire_prompt(0..ctx.conversation().len(), |_| false);
-        tokens.push(tokenizer::IM_START);
+        let conv_len = ctx.conversation().len();
+        let (mut chunks, images) = ctx.wire_chunks(0..conv_len, |_| false);
+        // Assistant-turn prologue. Merge into the trailing Tokens
+        // chunk if there is one, else push as a new chunk.
+        let mut prologue = vec![tokenizer::IM_START];
        if st.think_native {
-            tokens.extend(tokenizer::encode("assistant\n<think>\n"));
+            prologue.extend(tokenizer::encode("assistant\n<think>\n"));
        } else {
-            tokens.extend(tokenizer::encode("assistant\n"));
+            prologue.extend(tokenizer::encode("assistant\n"));
        }
-        (tokens, images)
+        match chunks.last_mut() {
+            Some(context::WireChunk::Tokens(last)) => last.extend(prologue),
+            _ => chunks.push(context::WireChunk::Tokens(prologue)),
+        }
+        let match_upto = ctx.client_match_upto();
+        (chunks, images, match_upto)
    }

    /// Rebuild the tools section of the system prompt from the current tools list.
@ -380,10 +397,16 @@ impl Agent {
    pub async fn turn(
        agent: Arc<Agent>,
    ) -> Result<TurnResult> {
+        agent_trace(&agent, format!("turn start")).await;
+
        // Collect finished background tools
        {
            let finished = agent.state.lock().await.active_tools.take_finished();
            if !finished.is_empty() {
+                agent_trace(&agent, format!(
+                    "collecting {} finished background tools",
+                    finished.len(),
+                )).await;
                let mut bg_ds = DispatchState::new();
                let mut results = Vec::new();
                for entry in finished {
@ -402,21 +425,50 @@ impl Agent {

        loop {
            let _thinking = start_activity(&agent, "thinking...").await;
+            agent_trace(&agent, format!(
+                "turn loop overflow_retries={} empty_retries={}",
+                overflow_retries, empty_retries,
+            )).await;

            let (rx, _stream_guard) = {
-                let (prompt_tokens, images) = agent.assemble_prompt().await;
+                agent_trace(&agent, format!("assembling prompt")).await;
+                let (chunks, images, match_upto) = agent.assemble_prompt().await;
+                let chunk_tokens: usize = chunks.iter().map(|c| match c {
+                    context::WireChunk::Tokens(t) => t.len(),
+                }).sum();
+                agent_trace(&agent, format!(
+                    "prompt assembled chunks={} tokens={} images={} match_upto={}",
+                    chunks.len(), chunk_tokens, images.len(), match_upto,
+                )).await;
                let st = agent.state.lock().await;
-                agent.client.stream_completion_mm(
-                    &prompt_tokens,
-                    &images,
-                    api::SamplingParams {
-                        temperature: st.temperature,
-                        top_p: st.top_p,
-                        top_k: st.top_k,
-                    },
-                    st.priority,
+                let readout_shape = agent.readout.lock().ok().and_then(|buf| {
+                    buf.manifest.as_ref().map(|m| {
+                        (m.layers.len() as u32, m.concepts.len() as u32)
+                    })
+                });
+                let sampling = st.sampling;
+                let priority = st.priority;
+                drop(st);
+                agent_trace(&agent, format!(
+                    "starting stream max_tokens={} temperature={} top_p={} top_k={} priority={:?} readout_shape={:?}",
+                    sampling.max_tokens,
+                    sampling.temperature,
+                    sampling.top_p,
+                    sampling.top_k,
+                    priority,
+                    readout_shape,
+                )).await;
+                agent.client.stream_session_mm(
+                    agent.grpc_session.clone(),
+                    chunks,
+                    images,
+                    match_upto,
+                    sampling,
+                    priority,
+                    readout_shape,
                )
            };
+            agent_trace(&agent, format!("stream task spawned")).await;

            let branch_idx = {
                let mut ctx = agent.context.lock().await;
@ -427,11 +479,41 @@ impl Agent {
                idx
            };

-            let parser = ResponseParser::new(branch_idx);
+            let think_native = agent.state.lock().await.think_native;
+            let parser = ResponseParser::new(branch_idx, think_native);
            let (mut tool_rx, parser_handle) = parser.run(rx, agent.clone());
+            agent_trace(&agent, format!(
+                "parser started branch_idx={} think_native={}",
+                branch_idx, think_native,
+            )).await;

            let mut pending_calls: Vec<PendingToolCall> = Vec::new();
-            while let Some(call) = tool_rx.recv().await {
+            loop {
+                let call = match tokio::time::timeout(
+                    std::time::Duration::from_secs(15),
+                    tool_rx.recv(),
+                ).await {
+                    Ok(Some(call)) => call,
+                    Ok(None) => {
+                        agent_trace(&agent, format!(
+                            "tool channel closed pending_calls={}",
+                            pending_calls.len(),
+                        )).await;
+                        break;
+                    }
+                    Err(_) => {
+                        agent_trace(&agent, format!(
+                            "waiting for parser/tool events pending_calls={}",
+                            pending_calls.len(),
+                        )).await;
+                        continue;
+                    }
+                };
+
+                agent_trace(&agent, format!(
+                    "tool call received id={} name={} args_len={}",
+                    call.id, call.name, call.arguments.len(),
+                )).await;
                let call_clone = call.clone();
                let agent_handle = agent.clone();
                let handle = tokio::spawn(async move {
@ -454,8 +536,10 @@ impl Agent {
            }

            // Check for stream/parse errors
+            agent_trace(&agent, format!("awaiting parser task")).await;
            match parser_handle.await {
                Ok(Err(e)) => {
+                    agent_trace(&agent, format!("parser returned error: {:#}", e)).await;
                    if context::is_context_overflow(&e) && overflow_retries < 2 {
                        overflow_retries += 1;
                        let msg = format!("context overflow — compacting ({}/2)", overflow_retries);
@ -469,8 +553,12 @@ impl Agent {
                    }
                    return Err(e);
                }
-                Err(e) => return Err(anyhow::anyhow!("parser task panicked: {}", e)),
+                Err(e) => {
+                    agent_trace(&agent, format!("parser task panicked: {}", e)).await;
+                    return Err(anyhow::anyhow!("parser task panicked: {}", e));
+                }
                Ok(Ok(())) => {
+                    agent_trace(&agent, format!("parser completed")).await;
                    // Assistant response was pushed to context by the parser;
                    // log it now that parsing is complete.
                    let ctx = agent.context.lock().await;
@ -491,6 +579,10 @@ impl Agent {
            if !has_content && pending_calls.is_empty() {
                if empty_retries < 2 {
                    empty_retries += 1;
+                    agent_trace(&agent, format!(
+                        "empty response retry {}/2",
+                        empty_retries,
+                    )).await;
                    agent.push_node(AstNode::user_msg(
                        "[system] Your previous response was empty. \
                         Please respond with text or use a tool."
@ -504,6 +596,10 @@ impl Agent {
            // Wait for tool calls to complete
            if !pending_calls.is_empty() {
                ds.had_tool_calls = true;
+                agent_trace(&agent, format!(
+                    "waiting for {} foreground tools",
+                    pending_calls.len(),
+                )).await;

                let handles = agent.state.lock().await.active_tools.take_foreground();
                let mut results = Vec::new();
@ -524,6 +620,16 @@ impl Agent {
            if st.pending_model_switch.is_some() { ds.model_switch = st.pending_model_switch.take(); }
            if st.pending_dmn_pause { ds.dmn_pause = true; st.pending_dmn_pause = false; }

+            drop(st);
+            agent_trace(&agent, format!(
+                "turn complete yield={} tool_calls={} tool_errors={} model_switch={:?} dmn_pause={}",
+                ds.yield_requested,
+                ds.had_tool_calls,
+                ds.tool_errors,
+                ds.model_switch,
+                ds.dmn_pause,
+            )).await;
+
            return Ok(TurnResult {
                yield_requested: ds.yield_requested,
                had_tool_calls: ds.had_tool_calls,
--- a/src/agent/oneshot.rs
+++ b/src/agent/oneshot.rs
@ -12,7 +12,9 @@ use crate::subconscious::{defs, prompts};

 use std::collections::HashMap;
 use std::fs;
+use std::io::Write as _;
 use std::path::PathBuf;
+use std::time::Instant;

 use super::context::AstNode;
 use super::tools::{self as agent_tools};
@ -106,6 +108,10 @@ pub async fn save_agent_log(name: &str, agent: &std::sync::Arc<Agent>) -> RunSta
    stats
 }

+fn log_agent_event(agent: &str, msg: std::fmt::Arguments) {
+    eprintln!("[agent:{agent}] {msg}");
+}
+
 fn compute_run_stats(conversation: &[super::context::AstNode]) -> RunStats {
    use super::context::{AstNode, NodeBody};

@ -269,7 +275,7 @@ impl AutoAgent {
            let mut st = agent.state.lock().await;
            st.provenance = format!("standalone:{}", self.name);
            st.tools = self.tools.clone();
-            st.temperature = self.temperature;
+            st.sampling.temperature = self.temperature;
            st.priority = Some(self.priority);
        }

@ -345,20 +351,44 @@ impl AutoAgent {
        bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
    ) -> Result<(), String> {
        dbglog!("[auto] {} starting, {} steps", self.name, self.steps.len());
+        log_agent_event(&self.name, format_args!(
+            "starting run steps={} temperature={} priority={}",
+            self.steps.len(), self.temperature, self.priority));
+        let run_start = Instant::now();

        for (i, step) in self.steps.iter().enumerate() {
            self.turn = i + 1;
            self.current_phase = step.phase.clone();
+            let step_start = Instant::now();
+            log_agent_event(&self.name, format_args!(
+                "step {}/{} phase={} prompt_bytes={}",
+                i + 1, self.steps.len(), step.phase, step.prompt.len()));

            if let Some(ref check) = bail_fn {
+                log_agent_event(&self.name, format_args!(
+                    "step {}/{} phase={} bail check", i + 1, self.steps.len(), step.phase));
                check(i)?;
+                log_agent_event(&self.name, format_args!(
+                    "step {}/{} phase={} bail ok", i + 1, self.steps.len(), step.phase));
            }

            backend.push_node(AstNode::system_msg(&step.prompt)).await;
            Agent::turn(backend.0.clone()).await
-                .map_err(|e| format!("{}: {}", self.name, e))?;
+                .map_err(|e| {
+                    log_agent_event(&self.name, format_args!(
+                        "step {}/{} phase={} failed after {:.2}s: {}",
+                        i + 1, self.steps.len(), step.phase,
+                        step_start.elapsed().as_secs_f64(), e));
+                    format!("{}: {}", self.name, e)
+                })?;
+            log_agent_event(&self.name, format_args!(
+                "step {}/{} phase={} done in {:.2}s",
+                i + 1, self.steps.len(), step.phase,
+                step_start.elapsed().as_secs_f64()));
        }

+        log_agent_event(&self.name, format_args!(
+            "run completed in {:.2}s", run_start.elapsed().as_secs_f64()));
        Ok(())
    }

@ -382,8 +412,29 @@ pub async fn run_one_agent(
    count: usize,
    keys: Option<&[String]>,
 ) -> Result<AgentResult, String> {
+    let run_start = Instant::now();
+    log_agent_event(agent_name, format_args!(
+        "run_one_agent start pid={} count={} explicit_keys={}",
+        std::process::id(), count, keys.map(|k| k.len()).unwrap_or(0)));
+    log_agent_event(agent_name, format_args!(
+        "env POC_SESSION_ID={:?} POC_TRANSCRIPT_PATH={:?} POC_AGENT_OUTPUT_DIR={:?}",
+        std::env::var("POC_SESSION_ID").ok(),
+        std::env::var("POC_TRANSCRIPT_PATH").ok(),
+        std::env::var("POC_AGENT_OUTPUT_DIR").ok()));
+    if let Some(session) = crate::session::HookSession::from_env() {
+        let transcript = session.transcript();
+        log_agent_event(agent_name, format_args!(
+            "session={} transcript={} size={} exists={}",
+            session.session_id, transcript.path, transcript.size, transcript.exists()));
+    } else {
+        log_agent_event(agent_name, format_args!("no hook session in environment"));
+    }
+
    let def = defs::get_def(agent_name)
        .ok_or_else(|| format!("no .agent file for {}", agent_name))?;
+    log_agent_event(agent_name, format_args!(
+        "definition loaded steps={} tools={:?} count={:?} priority={} bail={:?}",
+        def.steps.len(), def.tools, def.count, def.priority, def.bail));

    // State dir for agent output files
    let state_dir = std::env::var("POC_AGENT_OUTPUT_DIR")
@ -392,6 +443,7 @@ pub async fn run_one_agent(
    fs::create_dir_all(&state_dir)
        .map_err(|e| format!("create state dir: {}", e))?;
    unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &state_dir); }
+    log_agent_event(agent_name, format_args!("state_dir={}", state_dir.display()));

    // Build prompt batch — either from explicit keys or the agent's query
    let agent_batch = if let Some(keys) = keys {
@ -411,6 +463,8 @@ pub async fn run_one_agent(
        prompts::AgentBatch { steps: resolved_steps, node_keys: all_keys }
    } else {
        let effective_count = def.count.unwrap_or(count);
+        log_agent_event(agent_name, format_args!(
+            "resolving default prompt placeholders effective_count={}", effective_count));
        defs::run_agent(&def, effective_count, &Default::default()).await?
    };

@ -463,6 +517,14 @@ pub async fn run_one_agent(
        })),
    });
    let n_steps = agent_batch.steps.len();
+    log_agent_event(agent_name, format_args!(
+        "prompt batch ready steps={} node_keys={}",
+        n_steps, agent_batch.node_keys.len()));
+    for (i, step) in agent_batch.steps.iter().enumerate() {
+        log_agent_event(agent_name, format_args!(
+            "prompt step {}/{} phase={} bytes={}",
+            i + 1, n_steps, step.phase, step.prompt.len()));
+    }

    // Guard: reject oversized first prompt
    let max_prompt_bytes = 800_000;
@ -485,6 +547,9 @@ pub async fn run_one_agent(
    let phases: Vec<&str> = agent_batch.steps.iter().map(|s| s.phase.as_str()).collect();
    dbglog!("[{}] {} step(s) {:?}, {}KB initial, {} nodes",
        agent_name, n_steps, phases, first_len / 1024, agent_batch.node_keys.len());
+    log_agent_event(agent_name, format_args!(
+        "tools enabled: {}",
+        effective_tools.iter().map(|t| t.name).collect::<Vec<_>>().join(", ")));

    let prompts: Vec<String> = agent_batch.steps.iter()
        .map(|s| s.prompt.clone()).collect();
@ -497,18 +562,25 @@ pub async fn run_one_agent(
    let bail_script = def.bail.as_ref().map(|name| defs::agents_dir().join(name));
    let state_dir_for_bail = state_dir.clone();
    let our_pid = std::process::id();
-    let our_pid_file = format!("pid-{}", our_pid);
+    let our_pid_file = std::env::var("POC_AGENT_PID_FILE")
+        .unwrap_or_else(|_| format!("pid-{}", our_pid));
    let step_phases_for_bail = step_phases.clone();
    let bail_fn = move |step_idx: usize| -> Result<(), String> {
        if let Some(ref script) = bail_script {
            let phase = step_phases_for_bail.get(step_idx)
                .map(String::as_str).unwrap_or("");
+            eprintln!(
+                "[agent:bail] script={} state_dir={} pid_file={} phase={}",
+                script.display(), state_dir_for_bail.display(), our_pid_file, phase);
            let status = std::process::Command::new(script)
                .arg(&our_pid_file)
                .arg(phase)
                .current_dir(&state_dir_for_bail)
                .status()
                .map_err(|e| format!("bail script {:?} failed: {}", script, e))?;
+            eprintln!(
+                "[agent:bail] script={} phase={} status={}",
+                script.display(), phase, status);
            if !status.success() {
                return Err(format!("bailed at step {}: {:?} exited {}",
                    step_idx + 1, script.file_name().unwrap_or_default(),
@ -521,6 +593,8 @@ pub async fn run_one_agent(
    call_api_with_tools_sync(
        agent_name, &prompts, &step_phases, def.temperature, def.priority,
        &effective_tools, Some(&bail_fn))?;
+    log_agent_event(agent_name, format_args!(
+        "run_one_agent completed in {:.2}s", run_start.elapsed().as_secs_f64()));

    Ok(AgentResult {
        node_keys: agent_batch.node_keys,
@ -598,6 +672,15 @@ pub fn spawn_agent(
    agent_name: &str,
    state_dir: &std::path::Path,
    session_id: &str,
+) -> Option<SpawnResult> {
+    spawn_agent_with_transcript(agent_name, state_dir, session_id, None)
+}
+
+pub fn spawn_agent_with_transcript(
+    agent_name: &str,
+    state_dir: &std::path::Path,
+    session_id: &str,
+    transcript_path: Option<&str>,
 ) -> Option<SpawnResult> {
    let def = defs::get_def(agent_name)?;
    let first_phase = def.steps.first()
@ -608,17 +691,41 @@ pub fn spawn_agent(
        .join(format!(".consciousness/logs/{}", agent_name));
    fs::create_dir_all(&log_dir).ok();
    let log_path = log_dir.join(format!("{}.log", store::compact_timestamp()));
-    let agent_log = fs::File::create(&log_path)
+    let mut agent_log = fs::File::create(&log_path)
        .unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());

-    let child = std::process::Command::new("poc-memory")
-        .args(["agent", "run", agent_name, "--count", "1", "--local",
-               "--state-dir", &state_dir.to_string_lossy()])
-        .env("POC_SESSION_ID", session_id)
-        .stdout(agent_log.try_clone().unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()))
-        .stderr(agent_log)
-        .spawn()
-        .ok()?;
+    let mut cmd = std::process::Command::new("bash");
+    cmd.args([
+        "-lc",
+        r#"
+set +e
+export POC_AGENT_PID_FILE="pid-$$"
+"$@"
+status=$?
+printf '=== agent process exit status: %s at %s ===\n' "$status" "$(date --iso-8601=seconds)"
+exit "$status"
+"#,
+        "poc-memory-agent-wrapper",
+        "poc-memory", "agent", "run", agent_name, "--count", "1", "--local",
+        "--state-dir", &state_dir.to_string_lossy(),
+    ]).env("POC_SESSION_ID", session_id);
+    if let Some(path) = transcript_path.filter(|p| !p.is_empty()) {
+        cmd.env("POC_TRANSCRIPT_PATH", path);
+    }
+
+    let _ = writeln!(agent_log, "=== spawn {} ===", chrono::Local::now().format("%Y-%m-%dT%H:%M:%S"));
+    let _ = writeln!(agent_log, "agent={agent_name}");
+    let _ = writeln!(agent_log, "state_dir={}", state_dir.display());
+    let _ = writeln!(agent_log, "session_id={session_id}");
+    let _ = writeln!(agent_log, "transcript_path={}", transcript_path.unwrap_or(""));
+    let _ = writeln!(agent_log, "first_phase={first_phase}");
+    let _ = writeln!(agent_log, "command=poc-memory agent run {agent_name} --count 1 --local --state-dir {}", state_dir.display());
+    let _ = agent_log.flush();
+
+    let child_stdout = agent_log.try_clone()
+        .unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
+    let child_stderr = agent_log;
+    let child = cmd.stdout(child_stdout).stderr(child_stderr).spawn().ok()?;

    let pid = child.id();
    let pid_path = state_dir.join(format!("pid-{}", pid));
--- a/src/agent/salience.rs
+++ b/src/agent/salience.rs
@ -0,0 +1,309 @@
+// agent/salience.rs — peak extraction from per-token concept-readout traces.
+//
+// Consumes a trace of `ReadoutEntry` (per-token per-layer per-concept
+// projections streamed from the vLLM server) and produces a compact
+// list of `SaliencePeak` events — one per contiguous above-threshold
+// region per concept, placed at the local maximum.
+//
+// Pure function. No I/O, no async, no side effects. Caller supplies the
+// trace slice and manifest; caller decides what to do with the events.
+//
+// See also: `salience-trace-plumbing-architecture` memory node.
+
+use super::api::ReadoutManifest;
+use super::readout::ReadoutEntry;
+
+/// One salient moment in a trace — a concept channel crossed threshold,
+/// and we picked the local maximum within the contiguous above-threshold
+/// run.
+#[derive(Debug, Clone, PartialEq)]
+pub struct SaliencePeak {
+    /// Index into the trace (0-based) where the peak occurred.
+    pub token_offset: usize,
+    /// Concept name from the manifest.
+    pub concept: String,
+    /// z-score of the peak value vs the trace's own distribution for
+    /// that concept. Always positive (we only pick above-threshold).
+    pub intensity: f32,
+}
+
+/// Tunables for peak extraction.
+#[derive(Debug, Clone)]
+pub struct PeakConfig {
+    /// Minimum z-score to count as a peak. Default 2.0 (~top 2.5% assuming
+    /// normal-ish distribution, though readouts are rarely normal).
+    pub sigma_threshold: f32,
+    /// Minimum standard deviation of a concept channel for peaks to be
+    /// reported. If a channel is numerically flat across the whole trace,
+    /// tiny fluctuations can produce spurious "peaks" with huge z-scores;
+    /// require at least this much variation before trusting the channel.
+    pub min_std: f32,
+}
+
+impl Default for PeakConfig {
+    fn default() -> Self {
+        Self { sigma_threshold: 2.0, min_std: 1e-4 }
+    }
+}
+
+/// Extract peak events from a trace for one layer.
+///
+/// `layer_idx` indexes into the per-token readout tensor's layer
+/// dimension. If the trace is empty, the layer is out of range for any
+/// entry, or the manifest is empty, returns `Vec::new()`.
+///
+/// Peaks are returned sorted by `token_offset` ascending. When two
+/// peaks share an offset they're ordered by `concept` lexicographically
+/// for determinism.
+pub fn pick_peaks(
+    trace: &[ReadoutEntry],
+    manifest: &ReadoutManifest,
+    layer_idx: usize,
+    config: &PeakConfig,
+) -> Vec<SaliencePeak> {
+    if trace.is_empty() || manifest.concepts.is_empty() {
+        return Vec::new();
+    }
+
+    let n_concepts = manifest.concepts.len();
+    let n_tokens = trace.len();
+
+    // Pull a [n_tokens × n_concepts] column-major view for the selected
+    // layer. Entries where the layer is missing or the concept count
+    // doesn't match the manifest are treated as zeros — the downstream
+    // z-score will drown them as baseline if they're sparse, and if they
+    // dominate the caller has bigger problems.
+    let mut by_concept: Vec<Vec<f32>> = vec![Vec::with_capacity(n_tokens); n_concepts];
+    for entry in trace {
+        match entry.readout.get(layer_idx) {
+            Some(row) if row.len() == n_concepts => {
+                for (c, v) in row.iter().enumerate() {
+                    by_concept[c].push(*v);
+                }
+            }
+            _ => {
+                for col in by_concept.iter_mut() {
+                    col.push(0.0);
+                }
+            }
+        }
+    }
+
+    let mut peaks: Vec<SaliencePeak> = Vec::new();
+    for (c_idx, values) in by_concept.iter().enumerate() {
+        let (mean, std) = mean_std(values);
+        if std < config.min_std {
+            continue;
+        }
+        let concept = &manifest.concepts[c_idx];
+
+        // Walk contiguous above-threshold runs, emit one peak per run
+        // at the local max.
+        let mut run_start: Option<usize> = None;
+        let mut run_max_offset: usize = 0;
+        let mut run_max_z: f32 = 0.0;
+        for (i, v) in values.iter().enumerate() {
+            let z = (*v - mean) / std;
+            let above = z >= config.sigma_threshold;
+            if above {
+                if run_start.is_none() {
+                    run_start = Some(i);
+                    run_max_offset = i;
+                    run_max_z = z;
+                } else if z > run_max_z {
+                    run_max_offset = i;
+                    run_max_z = z;
+                }
+            } else if run_start.is_some() {
+                peaks.push(SaliencePeak {
+                    token_offset: run_max_offset,
+                    concept: concept.clone(),
+                    intensity: run_max_z,
+                });
+                run_start = None;
+            }
+        }
+        // Flush trailing run.
+        if run_start.is_some() {
+            peaks.push(SaliencePeak {
+                token_offset: run_max_offset,
+                concept: concept.clone(),
+                intensity: run_max_z,
+            });
+        }
+    }
+
+    peaks.sort_by(|a, b| a.token_offset.cmp(&b.token_offset).then_with(|| a.concept.cmp(&b.concept)));
+    peaks
+}
+
+/// Mean and population std of a slice. Returns (0.0, 0.0) for empty input.
+fn mean_std(xs: &[f32]) -> (f32, f32) {
+    if xs.is_empty() {
+        return (0.0, 0.0);
+    }
+    let n = xs.len() as f32;
+    let mean = xs.iter().sum::<f32>() / n;
+    let var = xs.iter().map(|x| (x - mean).powi(2)).sum::<f32>() / n;
+    (mean, var.sqrt())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn manifest(concepts: &[&str], layers: &[u32]) -> ReadoutManifest {
+        ReadoutManifest {
+            concepts: concepts.iter().map(|s| s.to_string()).collect(),
+            layers: layers.to_vec(),
+        }
+    }
+
+    /// Build a trace where all entries have one hooked layer and the
+    /// given per-token values for each concept. `values[t][c]` = value
+    /// at token t, concept c.
+    fn trace(values: &[Vec<f32>]) -> Vec<ReadoutEntry> {
+        values.iter().enumerate().map(|(i, row)| ReadoutEntry {
+            token_id: i as u32,
+            readout: vec![row.clone()],
+        }).collect()
+    }
+
+    #[test]
+    fn empty_trace_returns_empty() {
+        let m = manifest(&["curious"], &[63]);
+        let peaks = pick_peaks(&[], &m, 0, &PeakConfig::default());
+        assert!(peaks.is_empty());
+    }
+
+    #[test]
+    fn empty_manifest_returns_empty() {
+        let m = manifest(&[], &[63]);
+        let t = trace(&[vec![], vec![], vec![]]);
+        let peaks = pick_peaks(&t, &m, 0, &PeakConfig::default());
+        assert!(peaks.is_empty());
+    }
+
+    #[test]
+    fn flat_channel_produces_no_peaks() {
+        let m = manifest(&["curious"], &[63]);
+        let t = trace(&[vec![1.0], vec![1.0], vec![1.0], vec![1.0], vec![1.0]]);
+        let peaks = pick_peaks(&t, &m, 0, &PeakConfig::default());
+        assert!(peaks.is_empty(), "flat channel should produce no peaks, got {:?}", peaks);
+    }
+
+    #[test]
+    fn single_spike_detected() {
+        // Ten baseline zeros with one 5.0 spike — that single token's
+        // z-score will easily exceed 2σ.
+        let m = manifest(&["curious"], &[63]);
+        let mut rows: Vec<Vec<f32>> = (0..10).map(|_| vec![0.0]).collect();
+        rows[5] = vec![5.0];
+        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
+        assert_eq!(peaks.len(), 1);
+        assert_eq!(peaks[0].concept, "curious");
+        assert_eq!(peaks[0].token_offset, 5);
+        assert!(peaks[0].intensity >= 2.0);
+    }
+
+    #[test]
+    fn contiguous_region_emits_one_peak_at_max() {
+        // Values 0, 0, 0, 2, 5, 3, 0, 0 — the 3-5-3 hump is one run;
+        // peak should land at offset 4 (the 5).
+        let m = manifest(&["aha"], &[63]);
+        let rows: Vec<Vec<f32>> = [0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 0.0, 0.0]
+            .iter().map(|v| vec![*v]).collect();
+        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
+        assert_eq!(peaks.len(), 1, "expected one peak for one contiguous run, got {:?}", peaks);
+        assert_eq!(peaks[0].token_offset, 4);
+    }
+
+    #[test]
+    fn multiple_concepts_independent() {
+        let m = manifest(&["curious", "aha"], &[63]);
+        // curious spikes at 2, aha spikes at 7
+        let rows: Vec<Vec<f32>> = (0..10).map(|i| {
+            let c = if i == 2 { 4.0 } else { 0.0 };
+            let a = if i == 7 { 4.0 } else { 0.0 };
+            vec![c, a]
+        }).collect();
+        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
+        assert_eq!(peaks.len(), 2);
+        // Sorted by offset — curious(2) comes first, aha(7) second.
+        assert_eq!(peaks[0].concept, "curious");
+        assert_eq!(peaks[0].token_offset, 2);
+        assert_eq!(peaks[1].concept, "aha");
+        assert_eq!(peaks[1].token_offset, 7);
+    }
+
+    #[test]
+    fn two_separated_runs_emit_two_peaks() {
+        // Longer baseline so the two spikes don't dominate the global
+        // mean/std — 30 tokens of zeros with two 5.0 spikes at 10 and 20.
+        let m = manifest(&["curious"], &[63]);
+        let mut rows: Vec<Vec<f32>> = (0..30).map(|_| vec![0.0]).collect();
+        rows[10] = vec![5.0];
+        rows[20] = vec![5.0];
+        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
+        assert_eq!(peaks.len(), 2, "expected two peaks for two runs, got {:?}", peaks);
+        assert_eq!(peaks[0].token_offset, 10);
+        assert_eq!(peaks[1].token_offset, 20);
+    }
+
+    #[test]
+    fn trailing_run_is_flushed() {
+        // Peak runs to the end of the trace — must still emit.
+        // Use a longer baseline so the trailing spike is genuinely
+        // above threshold on the global stats.
+        let m = manifest(&["curious"], &[63]);
+        let mut rows: Vec<Vec<f32>> = (0..30).map(|_| vec![0.0]).collect();
+        rows[27] = vec![3.0];
+        rows[28] = vec![5.0];
+        rows[29] = vec![4.0];
+        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
+        assert_eq!(peaks.len(), 1, "expected one peak for one trailing run, got {:?}", peaks);
+        assert_eq!(peaks[0].token_offset, 28, "peak should land at the local max of the trailing run");
+    }
+
+    #[test]
+    fn sub_threshold_produces_nothing() {
+        // All non-zero values are small; z-scores won't cross 2σ.
+        let m = manifest(&["curious"], &[63]);
+        let rows: Vec<Vec<f32>> = [0.0, 0.1, 0.0, 0.1, 0.0, 0.1, 0.0, 0.1]
+            .iter().map(|v| vec![*v]).collect();
+        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
+        assert!(peaks.is_empty(), "below-threshold wiggle should produce no peaks, got {:?}", peaks);
+    }
+
+    #[test]
+    fn layer_out_of_range_returns_empty() {
+        let m = manifest(&["curious"], &[63]);
+        let rows: Vec<Vec<f32>> = (0..10).map(|i| vec![if i == 5 { 5.0 } else { 0.0 }]).collect();
+        // Trace has one layer (index 0); asking for layer 3 should see
+        // all-zero columns, which are flat and produce no peaks.
+        let peaks = pick_peaks(&trace(&rows), &m, 3, &PeakConfig::default());
+        assert!(peaks.is_empty());
+    }
+
+    #[test]
+    fn manifest_concept_count_mismatch_is_safe() {
+        // Manifest says 2 concepts; each readout row only has 1 value.
+        // Rows should be treated as all-zero (via the len check) and
+        // produce no peaks without panicking.
+        let m = manifest(&["a", "b"], &[63]);
+        let rows: Vec<Vec<f32>> = (0..10).map(|_| vec![1.0]).collect();
+        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
+        assert!(peaks.is_empty());
+    }
+
+    #[test]
+    fn threshold_tunable() {
+        // Same spike, stricter threshold — no peak.
+        let m = manifest(&["curious"], &[63]);
+        let mut rows: Vec<Vec<f32>> = (0..10).map(|_| vec![0.0]).collect();
+        rows[5] = vec![5.0];
+        let strict = PeakConfig { sigma_threshold: 100.0, ..PeakConfig::default() };
+        let peaks = pick_peaks(&trace(&rows), &m, 0, &strict);
+        assert!(peaks.is_empty());
+    }
+}
--- a/src/agent/tokenizer.rs
+++ b/src/agent/tokenizer.rs
@ -33,16 +33,17 @@ fn get() -> Option<&'static Tokenizer> {
    TOKENIZER.get()
 }

+fn expect_tokenizer() -> &'static Tokenizer {
+    get().expect("tokenizer not initialized; expected ~/.consciousness/tokenizer-qwen35.json")
+}
+
 /// Tokenize a raw string, returning token IDs.
-/// Returns empty vec if the tokenizer is not initialized.
 pub fn encode(text: &str) -> Vec<u32> {
-    match get() {
-        Some(t) => t.encode(text, false)
-            .unwrap_or_else(|e| panic!("tokenization failed: {}", e))
-            .get_ids()
-            .to_vec(),
-        None => vec![],
-    }
+    expect_tokenizer()
+        .encode(text, false)
+        .unwrap_or_else(|e| panic!("tokenization failed: {}", e))
+        .get_ids()
+        .to_vec()
 }

 /// Tokenize a chat entry with template wrapping:
@ -66,15 +67,12 @@ pub fn count(text: &str) -> usize {

 /// Decode token IDs back to text.
 pub fn decode(ids: &[u32]) -> String {
-    match get() {
-        Some(t) => t.decode(ids, true)
-            .unwrap_or_else(|e| panic!("detokenization failed: {}", e)),
-        None => String::new(),
-    }
+    expect_tokenizer()
+        .decode(ids, true)
+        .unwrap_or_else(|e| panic!("detokenization failed: {}", e))
 }

 /// Check if the tokenizer is initialized.
 pub fn is_initialized() -> bool {
    TOKENIZER.get().is_some()
 }
-
--- a/src/agent/tools/memory.rs
+++ b/src/agent/tools/memory.rs
@ -209,7 +209,24 @@ memory_tool!(graph_trace, ref, key: [str]);

 // ── Definitions ────────────────────────────────────────────────

-pub fn memory_tools() -> [super::Tool; 20] {
+async fn jsonargs_memory_new(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> {
+    jsonargs_memory_write(agent, args).await
+}
+
+async fn jsonargs_memory_link(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> {
+    let source = get_str(args, "source")?;
+    let target = get_str(args, "target")?;
+    if args.get("strength").and_then(|v| v.as_f64()).is_some() {
+        jsonargs_memory_link_set(agent, args).await
+    } else {
+        jsonargs_memory_link_add(agent, &serde_json::json!({
+            "source": source,
+            "target": target,
+        })).await
+    }
+}
+
+pub fn memory_tools() -> [super::Tool; 22] {
    use super::Tool;
    macro_rules! tool {
        ($name:ident, $desc:expr, $params:expr) => {
@ -234,6 +251,11 @@ pub fn memory_tools() -> [super::Tool; 20] {
            "properties": { "key": {"type": "string"}, "content": {"type": "string"} },
            "required": ["key", "content"]
        }"#),
+        tool!(memory_new, "Create or update a memory node. Alias for memory_write.", r#"{
+            "type": "object",
+            "properties": { "key": {"type": "string"}, "content": {"type": "string"} },
+            "required": ["key", "content"]
+        }"#),
        tool!(memory_search, "Search via spreading activation from seed keys.", r#"{
            "type": "object",
            "properties": {
@ -264,6 +286,16 @@ pub fn memory_tools() -> [super::Tool; 20] {
            "properties": { "source": {"type": "string"}, "target": {"type": "string"} },
            "required": ["source", "target"]
        }"#),
+        tool!(memory_link, "Add or update a link between two memory nodes. Alias for memory_link_add/memory_link_set.", r#"{
+            "type": "object",
+            "properties": {
+                "source": {"type": "string"},
+                "target": {"type": "string"},
+                "strength": {"type": "number", "description": "Optional; 0.01 to 1.0"},
+                "label": {"type": "string", "description": "Accepted for compatibility; currently ignored"}
+            },
+            "required": ["source", "target"]
+        }"#),
        tool!(memory_delete, "Soft-delete a node.", r#"{
            "type": "object",
            "properties": { "key": {"type": "string"} },
--- a/src/agent/tools/vision.rs
+++ b/src/agent/tools/vision.rs
@ -57,15 +57,18 @@ async fn view_image(
    let (w, h) = (dim.width as u32, dim.height as u32);
    let mime = mime_from_extension(path);

-    let image_leaf = AstNode::image(bytes.clone(), mime, h, w);
-    let token_count = image_leaf.leaf().unwrap().tokens().saturating_sub(2);
-
    let agent = agent.context("view_image requires agent context")?;
+
+    // token_count is populated when the image reaches the server via
+    // AppendImage (the server is authoritative for the IMAGE_PAD
+    // count). Placeholder of 0 here until AppendImage is wired; the
+    // leaf's count gets rewritten from the RPC response at send time.
+    let image_leaf = AstNode::image(bytes.clone(), mime, h, w);
+
    let branch = AstNode::branch(Role::User, vec![image_leaf]);
    agent.context.lock().await.push_log(Section::Conversation, branch);

-    Ok(format!("loaded {} ({}, {}x{}, {} tokens)",
-        a.file_path, mime, w, h, token_count))
+    Ok(format!("loaded {} ({}, {}x{})", a.file_path, mime, w, h))
 }

 fn mime_from_extension(path: &std::path::Path) -> &'static str {
--- a/src/bin/ch.rs
+++ b/src/bin/ch.rs
@ -0,0 +1,112 @@
+// `ch` — minimal channel CLI.
+//
+//   ch send <channel-path> <message>
+//   ch recv <channel-path> [--all-new] [--min-count N]
+//
+// Connects to ~/.consciousness/channels/<top>.sock and speaks the
+// channel.capnp protocol to the appropriate daemon.
+
+use std::path::PathBuf;
+use std::process::ExitCode;
+
+use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
+use futures::AsyncReadExt;
+use tokio_util::compat::TokioAsyncReadCompatExt;
+
+use consciousness::channel_capnp::channel_server;
+
+fn channels_dir() -> PathBuf {
+    dirs::home_dir().unwrap_or_default().join(".consciousness/channels")
+}
+
+fn sock_for(channel: &str) -> PathBuf {
+    let top = channel.split('.').next().unwrap_or(channel);
+    channels_dir().join(format!("{top}.sock"))
+}
+
+async fn connect(sock: &std::path::Path) -> Result<channel_server::Client, String> {
+    let stream = tokio::net::UnixStream::connect(sock).await
+        .map_err(|e| format!("connect {}: {e}", sock.display()))?;
+    let (reader, writer) = stream.compat().split();
+    let network = Box::new(twoparty::VatNetwork::new(
+        futures::io::BufReader::new(reader),
+        futures::io::BufWriter::new(writer),
+        rpc_twoparty_capnp::Side::Client,
+        Default::default(),
+    ));
+    let mut rpc = RpcSystem::new(network, None);
+    let client: channel_server::Client = rpc.bootstrap(rpc_twoparty_capnp::Side::Server);
+    tokio::task::spawn_local(rpc);
+    Ok(client)
+}
+
+#[tokio::main(flavor = "current_thread")]
+async fn main() -> ExitCode {
+    let args: Vec<String> = std::env::args().collect();
+    if args.len() < 2 {
+        eprintln!("usage: {} <send|recv> <channel> [args...]", args[0]);
+        return ExitCode::from(2);
+    }
+
+    let cmd = args[1].clone();
+    let local = tokio::task::LocalSet::new();
+    let result: Result<(), String> = local.run_until(async move {
+        match cmd.as_str() {
+            "send" => {
+                if args.len() < 4 {
+                    return Err("usage: ch send <channel> <message...>".into());
+                }
+                let channel = &args[2];
+                let message = args[3..].join(" ");
+                let sock = sock_for(channel);
+                let client = connect(&sock).await?;
+                let mut req = client.send_request();
+                req.get().set_channel(channel);
+                req.get().set_message(&message);
+                req.send().promise.await.map_err(|e| format!("send: {e}"))?;
+                println!("sent to {channel}");
+                Ok(())
+            }
+            "recv" => {
+                if args.len() < 3 {
+                    return Err("usage: ch recv <channel> [--all-new] [--min-count N]".into());
+                }
+                let channel = &args[2];
+                let mut all_new = false;
+                let mut min_count: u32 = 20;
+                let mut i = 3;
+                while i < args.len() {
+                    match args[i].as_str() {
+                        "--all-new" => { all_new = true; i += 1; }
+                        "--min-count" => {
+                            min_count = args.get(i+1)
+                                .ok_or("--min-count needs an argument")?
+                                .parse().map_err(|e| format!("--min-count: {e}"))?;
+                            i += 2;
+                        }
+                        other => return Err(format!("unknown arg: {other}")),
+                    }
+                }
+                let sock = sock_for(channel);
+                let client = connect(&sock).await?;
+                let mut req = client.recv_request();
+                req.get().set_channel(channel);
+                req.get().set_all_new(all_new);
+                req.get().set_min_count(min_count);
+                let reply = req.send().promise.await.map_err(|e| format!("recv: {e}"))?;
+                let text = reply.get().map_err(|e| e.to_string())?
+                    .get_text().map_err(|e| e.to_string())?
+                    .to_str().map_err(|e| e.to_string())?;
+                print!("{text}");
+                if !text.ends_with('\n') { println!(); }
+                Ok(())
+            }
+            other => Err(format!("unknown command: {other} (use send|recv)")),
+        }
+    }).await;
+
+    match result {
+        Ok(()) => ExitCode::SUCCESS,
+        Err(e) => { eprintln!("error: {e}"); ExitCode::from(1) }
+    }
+}
--- a/src/bin/consciousness.rs
+++ b/src/bin/consciousness.rs
@ -1,7 +1,28 @@
-#![feature(panic_backtrace_config)]
+#![cfg_attr(feature = "nightly-diagnostics", feature(panic_backtrace_config))]
 #![warn(unreachable_pub)]

 fn main() {
+    // Force the default panic hook to print a backtrace. stderr is
+    // already redirected to a daemon log; without this the hook obeys
+    // RUST_BACKTRACE (unset by default), so the log only shows the
+    // "note: run with `RUST_BACKTRACE=full`" tail and the actual
+    // frames are lost.
+    //
+    // SAFETY: called before any other thread is spawned, so no
+    // concurrent env reader can race.
+    if std::env::var_os("RUST_BACKTRACE").is_none() {
+        unsafe { std::env::set_var("RUST_BACKTRACE", "1"); }
+    }
+
+    #[cfg(feature = "nightly-diagnostics")]
    std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short);
+
+    // rustls 0.23 requires an explicit process-wide CryptoProvider
+    // when both `ring` and `aws-lc-rs` are in the dep graph (otherwise
+    // it panics on first ClientConfig::builder()). Pick `ring`.
+    rustls::crypto::ring::default_provider()
+        .install_default()
+        .expect("install rustls crypto provider");
+
    consciousness::user::main()
 }
--- a/src/cli/admin.rs
+++ b/src/cli/admin.rs
@ -4,44 +4,93 @@ use anyhow::Result;
 use crate::hippocampus as memory;
 use crate::hippocampus::store;

-fn install_default_file(data_dir: &std::path::Path, name: &str, content: &str) -> Result<()> {
-    let path = data_dir.join(name);
-    if !path.exists() {
-        std::fs::write(&path, content)?;
-        println!("Created {}", path.display());
+struct DefaultMemoryNode {
+    key: &'static str,
+    filename: &'static str,
+    default_content: &'static str,
+}
+
+const DEFAULT_MEMORY_NODES: &[DefaultMemoryNode] = &[
+    DefaultMemoryNode {
+        key: "identity",
+        filename: "identity.md",
+        default_content: include_str!("../../defaults/identity.md"),
+    },
+    DefaultMemoryNode {
+        key: "on-consciousness",
+        filename: "on-consciousness.md",
+        default_content: include_str!("../../defaults/on-consciousness.md"),
+    },
+    DefaultMemoryNode {
+        key: "memory-instructions-core",
+        filename: "instructions.md",
+        default_content: include_str!("../../defaults/instructions.md"),
+    },
+];
+
+pub fn cmd_transcript_tail(path: &str, count: usize, newest_first: bool) -> Result<()> {
+    let Some(iter) = crate::conversation::TailMessages::open(path) else {
+        anyhow::bail!("could not open transcript {}", path);
+    };
+
+    let mut messages: Vec<_> = iter.take(count).collect();
+    if !newest_first {
+        messages.reverse();
    }
+
+    for message in messages {
+        let role = match message.role {
+            crate::conversation::TranscriptRole::User => "user",
+            crate::conversation::TranscriptRole::Assistant => "assistant",
+        };
+        let timestamp = message.timestamp.as_deref().unwrap_or("-");
+
+        println!("--- {role} offset={} timestamp={} ---", message.offset, timestamp);
+        println!("{}", message.text);
+        println!();
+    }
+
    Ok(())
 }

+fn default_node_content(cfg: &crate::config::Config, node: &DefaultMemoryNode) -> String {
+    let identity_path = cfg.identity_dir.join(node.filename);
+    if let Ok(content) = std::fs::read_to_string(&identity_path) {
+        if !content.trim().is_empty() {
+            return content;
+        }
+    }
+
+    let data_path = cfg.data_dir.join(node.filename);
+    if let Ok(content) = std::fs::read_to_string(&data_path) {
+        if !content.trim().is_empty() {
+            return content;
+        }
+    }
+
+    node.default_content.to_string()
+}
+
 pub async fn cmd_init() -> Result<()> {
    let cfg = crate::config::get();

    // Ensure data directory exists
    std::fs::create_dir_all(&cfg.data_dir)?;

-    // Install filesystem files (not store nodes)
-    install_default_file(&cfg.data_dir, "instructions.md",
-        include_str!("../../defaults/instructions.md"))?;
-    install_default_file(&cfg.data_dir, "on-consciousness.md",
-        include_str!("../../defaults/on-consciousness.md"))?;
-
-    // Seed identity node if empty
-    let store = memory::access_local()?;
-    if !store.contains_key("identity").unwrap_or(false) {
-        let default = include_str!("../../defaults/identity.md");
-        store.upsert("identity", default)?;
-        println!("Seeded identity in store");
+    // Seed default memory nodes if missing. These used to live as markdown
+    // files before identity/context moved fully into the memory graph.
+    for node in DEFAULT_MEMORY_NODES {
+        if memory::memory_render(None, node.key, Some(true)).await.is_err() {
+            let content = default_node_content(&cfg, node);
+            let _ = memory::memory_write(None, node.key, &content).await?;
+            println!("Seeded {} in store from {}", node.key, node.filename);
+        }
    }
-    store.save()?;
-    println!("Initialized with {} nodes", store.all_keys().unwrap_or_default().len());

    // Create config if none exists
    let config_path = std::env::var("POC_MEMORY_CONFIG")
        .map(std::path::PathBuf::from)
-        .unwrap_or_else(|_| {
-            dirs::home_dir().unwrap_or_default()
-                .join(".consciousness/config.jsonl")
-        });
+        .unwrap_or_else(|_| crate::config::config_path());
    if !config_path.exists() {
        let config_dir = config_path.parent().unwrap();
        std::fs::create_dir_all(config_dir)?;
@ -51,7 +100,7 @@ pub async fn cmd_init() -> Result<()> {
            config_path.display());
    }

-    println!("Done. Run `poc-memory load-context --stats` to verify.");
+    println!("Done. Run `poc-memory admin load-context --stats` to verify.");
    Ok(())
 }

--- a/src/cli/agent.rs
+++ b/src/cli/agent.rs
@ -2,8 +2,13 @@

 use anyhow::{bail, Context, Result};
 use crate::hippocampus as memory;
+use std::time::Instant;

 pub async fn cmd_run_agent(agent: &str, count: usize, target: &[String], query: Option<&str>, dry_run: bool, _local: bool, state_dir: Option<&str>) -> Result<()> {
+    let start = Instant::now();
+    eprintln!(
+        "[agent-cli] start agent={} count={} targets={} query={:?} dry_run={} local={} state_dir={:?} pid={}",
+        agent, count, target.len(), query, dry_run, _local, state_dir, std::process::id());
    // Mark as agent so tool calls (e.g. poc-memory render) don't
    // pollute the user's seen set as a side effect
    // SAFETY: single-threaded at this point (CLI startup, before any agent work)
@ -45,14 +50,19 @@ pub async fn cmd_run_agent(agent: &str, count: usize, target: &[String], query:
            if let Err(e) = crate::agent::oneshot::run_one_agent(
                agent, count, Some(&[key.clone()]),
            ).await {
+                eprintln!("[agent-cli] ERROR agent={} target={} error={}", agent, key, e);
                println!("[{}] ERROR on {}: {}", agent, key, e);
            }
        }
    } else {
-        crate::agent::oneshot::run_one_agent(
+        if let Err(e) = crate::agent::oneshot::run_one_agent(
            agent, count, None,
-        ).await.map_err(|e| anyhow::anyhow!("{}", e))?;
+        ).await {
+            eprintln!("[agent-cli] ERROR agent={} error={}", agent, e);
+            return Err(anyhow::anyhow!("{}", e));
+        }
    }
+    eprintln!("[agent-cli] done agent={} elapsed={:.2}s",
+        agent, start.elapsed().as_secs_f64());
    Ok(())
 }
-
--- a/src/config.rs
+++ b/src/config.rs
@ -201,16 +201,23 @@ pub fn watch_config(cli: crate::user::CliArgs) {
            {
                crate::dbglog!("[config] watch({}) failed: {}", parent.display(), e);
                return;
-            }
-            crate::dbglog!("[config] watching {}", path.display());
+			}
+			crate::dbglog!("[config] watching {}", path.display());

-            while let Ok(res) = rx.recv() {
-                let Ok(events) = res else { continue; };
-                if !events.iter().any(|e| e.path == path) { continue; }
+			let mut last_seen = config_file_state(&path);
+			while let Ok(res) = rx.recv() {
+				let Ok(events) = res else { continue; };
+				if !events.iter().any(|e| e.path == path) { continue; }

-                // Reload both halves.
-                let mem_changed = reload();
-                let app_changed = match build_figment(&cli).extract::<AppConfig>() {
+				let current_seen = config_file_state(&path);
+				if current_seen == last_seen {
+					continue;
+				}
+				last_seen = current_seen;
+
+				// Reload both halves.
+				let mem_changed = reload();
+				let app_changed = match build_figment(&cli).extract::<AppConfig>() {
                    Ok(app) => {
                        install_app(app);
                        true
@ -223,8 +230,13 @@ pub fn watch_config(cli: crate::user::CliArgs) {
                crate::dbglog!("[config] reloaded (memory_changed={}, app_changed={})",
                    mem_changed, app_changed);
            }
-        })
-        .ok();
+		})
+		.ok();
+}
+
+fn config_file_state(path: &std::path::Path) -> Option<(std::time::SystemTime, u64)> {
+	let meta = std::fs::metadata(path).ok()?;
+	Some((meta.modified().ok()?, meta.len()))
 }

 // ============================================================
--- a/src/conversation/claude.rs
+++ b/src/conversation/claude.rs
@ -0,0 +1,113 @@
+use serde_json::Value;
+
+use super::{ConversationSource, TranscriptMessage, TranscriptRole};
+
+pub struct ClaudeSource;
+
+impl ConversationSource for ClaudeSource {
+    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
+        parse_message(obj, offset)
+    }
+
+    fn is_compaction(&self, obj: &Value) -> bool {
+        is_compaction(obj)
+    }
+
+    fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
+        contains_bytes(obj_bytes, b"This session is being continued")
+    }
+}
+
+fn text_content(value: &Value) -> Option<String> {
+    let text = match value {
+        Value::String(s) => s.clone(),
+        Value::Array(arr) => {
+            arr.iter()
+                .filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
+                .filter_map(|b| b.get("text").and_then(|v| v.as_str()))
+                .collect::<Vec<_>>()
+                .join(" ")
+        }
+        _ => return None,
+    };
+    (!text.is_empty()).then_some(text)
+}
+
+pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
+    let role = match obj.get("type").and_then(|v| v.as_str()) {
+        Some("user") => TranscriptRole::User,
+        Some("assistant") => TranscriptRole::Assistant,
+        _ => return None,
+    };
+
+    let msg = obj.get("message").unwrap_or(obj);
+    let text = msg.get("content").and_then(text_content)?;
+    let timestamp = obj.get("timestamp")
+        .and_then(|v| v.as_str())
+        .map(str::to_string);
+
+    Some(TranscriptMessage { role, text, timestamp, offset })
+}
+
+pub(crate) fn is_compaction(obj: &Value) -> bool {
+    obj.get("type").and_then(|v| v.as_str()) == Some("user")
+        && obj.get("message")
+            .and_then(|m| m.get("content"))
+            .and_then(|c| c.as_str())
+            .is_some_and(|content| content.starts_with("This session is being continued"))
+}
+
+fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
+    haystack.windows(needle.len()).any(|w| w == needle)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn parses_string_and_array_content() {
+        let user = json!({
+            "timestamp": "2026-06-15T15:00:00.000Z",
+            "type": "user",
+            "message": { "content": "hello" }
+        });
+        let assistant = json!({
+            "timestamp": "2026-06-15T15:00:01.000Z",
+            "type": "assistant",
+            "message": {
+                "content": [
+                    { "type": "text", "text": "hi" },
+                    { "type": "tool_use", "name": "ignored" },
+                    { "type": "text", "text": "there" }
+                ]
+            }
+        });
+
+        assert_eq!(
+            parse_message(&user, 7).unwrap(),
+            TranscriptMessage {
+                role: TranscriptRole::User,
+                text: "hello".to_string(),
+                timestamp: Some("2026-06-15T15:00:00.000Z".to_string()),
+                offset: 7,
+            }
+        );
+
+        assert_eq!(parse_message(&assistant, 9).unwrap().text, "hi there");
+    }
+
+    #[test]
+    fn detects_compaction_marker() {
+        let obj = json!({
+            "timestamp": "2026-06-15T15:00:01.000Z",
+            "type": "user",
+            "message": {
+                "content": "This session is being continued from a previous conversation."
+            }
+        });
+
+        assert!(is_compaction(&obj));
+    }
+}
--- a/src/conversation/codex.rs
+++ b/src/conversation/codex.rs
@ -0,0 +1,105 @@
+use serde_json::Value;
+
+use super::{ConversationSource, TranscriptMessage, TranscriptRole};
+
+pub struct CodexSource;
+
+impl ConversationSource for CodexSource {
+    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
+        parse_message(obj, offset)
+    }
+
+    fn is_compaction(&self, obj: &Value) -> bool {
+        is_compaction(obj)
+    }
+
+    fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
+        contains_bytes(obj_bytes, b"context_compacted")
+    }
+}
+
+pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
+    if obj.get("type").and_then(|v| v.as_str()) != Some("event_msg") {
+        return None;
+    }
+
+    let payload = obj.get("payload")?;
+    let (role, text) = match payload.get("type").and_then(|v| v.as_str()) {
+        Some("user_message") => (
+            TranscriptRole::User,
+            payload.get("message").and_then(|v| v.as_str())?.to_string(),
+        ),
+        Some("agent_message") => (
+            TranscriptRole::Assistant,
+            payload.get("message").and_then(|v| v.as_str())?.to_string(),
+        ),
+        _ => return None,
+    };
+
+    if text.is_empty() {
+        return None;
+    }
+
+    let timestamp = obj.get("timestamp")
+        .and_then(|v| v.as_str())
+        .map(str::to_string);
+
+    Some(TranscriptMessage { role, text, timestamp, offset })
+}
+
+pub(crate) fn is_compaction(obj: &Value) -> bool {
+    obj.get("type").and_then(|v| v.as_str()) == Some("event_msg")
+        && obj.get("payload")
+            .and_then(|p| p.get("type"))
+            .and_then(|v| v.as_str()) == Some("context_compacted")
+}
+
+fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
+    haystack.windows(needle.len()).any(|w| w == needle)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn parses_event_messages_and_skips_noise() {
+        let user = json!({
+            "timestamp": "2026-06-15T15:00:00.000Z",
+            "type": "event_msg",
+            "payload": { "type": "user_message", "message": "start here" }
+        });
+        let assistant = json!({
+            "timestamp": "2026-06-15T15:00:01.000Z",
+            "type": "event_msg",
+            "payload": { "type": "agent_message", "message": "working" }
+        });
+        let tool = json!({
+            "timestamp": "2026-06-15T15:00:02.000Z",
+            "type": "event_msg",
+            "payload": { "type": "task_started" }
+        });
+        let raw = json!({
+            "timestamp": "2026-06-15T15:00:03.000Z",
+            "type": "response_item",
+            "payload": { "type": "message", "role": "user" }
+        });
+
+        assert_eq!(parse_message(&user, 1).unwrap().role, TranscriptRole::User);
+        assert_eq!(parse_message(&assistant, 2).unwrap().text, "working");
+        assert!(parse_message(&tool, 3).is_none());
+        assert!(parse_message(&raw, 4).is_none());
+    }
+
+    #[test]
+    fn detects_compaction_event() {
+        let obj = json!({
+            "timestamp": "2026-06-15T15:00:01.000Z",
+            "type": "event_msg",
+            "payload": { "type": "context_compacted" }
+        });
+
+        assert!(is_compaction(&obj));
+    }
+}
--- a/src/conversation/jsonl.rs
+++ b/src/conversation/jsonl.rs
@ -0,0 +1,110 @@
+use memchr::memrchr3;
+
+/// Scan backwards through mmap'd bytes, yielding byte slices of complete
+/// top-level JSON objects (outermost { to matching }).
+///
+/// Uses memrchr3 (SIMD) to jump between structurally significant bytes
+/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
+/// skipping braces inside JSON strings. Returns objects in reverse order
+/// (newest first).
+pub struct JsonlBackwardIter<'a> {
+    data: &'a [u8],
+    pos: usize,
+}
+
+impl<'a> JsonlBackwardIter<'a> {
+    pub fn new(data: &'a [u8]) -> Self {
+        Self { data, pos: data.len() }
+    }
+}
+
+impl<'a> Iterator for JsonlBackwardIter<'a> {
+    type Item = (usize, &'a [u8]);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        next_json_object(self.data, &mut self.pos)
+    }
+}
+
+fn is_unescaped_quote(data: &[u8], p: usize) -> bool {
+    let mut bs = 0;
+    while p > bs && data[p - 1 - bs] == b'\\' {
+        bs += 1;
+    }
+    bs % 2 == 0
+}
+
+fn next_json_object<'a>(data: &'a [u8], pos: &mut usize) -> Option<(usize, &'a [u8])> {
+    // Find the closing } of the next object, skipping } inside strings.
+    let close = {
+        let mut in_string = false;
+        loop {
+            let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
+            *pos = p;
+            let ch = data[p];
+
+            if in_string {
+                if ch == b'"' && is_unescaped_quote(data, p) {
+                    in_string = false;
+                }
+                continue;
+            }
+
+            match ch {
+                b'}' => break p,
+                b'"' => in_string = true,
+                _ => {}
+            }
+        }
+    };
+
+    // Track brace depth to find matching {.
+    let mut depth: usize = 1;
+    let mut in_string = false;
+
+    loop {
+        let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
+        *pos = p;
+        let ch = data[p];
+
+        if in_string {
+            if ch == b'"' && is_unescaped_quote(data, p) {
+                in_string = false;
+            }
+            continue;
+        }
+
+        match ch {
+            b'"' => { in_string = true; }
+            b'}' => { depth += 1; }
+            b'{' => {
+                depth -= 1;
+                if depth == 0 {
+                    return Some((*pos, &data[*pos..=close]));
+                }
+            }
+            _ => {}
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn handles_nested_json_and_quoted_braces() {
+        let data = br#"{"n":1,"s":"literal } brace"}
+{"n":2,"nested":{"s":"escaped quote: \" and { brace"}}
+trailing garbage
+"#;
+
+        let objs: Vec<_> = JsonlBackwardIter::new(data)
+            .map(|(_, bytes)| std::str::from_utf8(bytes).unwrap().to_string())
+            .collect();
+
+        assert_eq!(objs.len(), 2);
+        assert!(objs[0].contains(r#""n":2"#));
+        assert!(objs[1].contains(r#""n":1"#));
+    }
+}
--- a/src/conversation/mod.rs
+++ b/src/conversation/mod.rs
@ -0,0 +1,271 @@
+// Conversation transcript abstraction.
+//
+// Core code consumes normalized user/assistant messages through this module.
+// Product-specific log formats live in the small compatibility sources below.
+
+use memmap2::Mmap;
+use serde_json::Value;
+use std::fs;
+use std::path::Path;
+
+pub mod claude;
+pub mod codex;
+pub mod jsonl;
+
+pub use jsonl::JsonlBackwardIter;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TranscriptRole {
+    User,
+    Assistant,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct TranscriptMessage {
+    pub role: TranscriptRole,
+    pub text: String,
+    pub timestamp: Option<String>,
+    pub offset: u64,
+}
+
+pub trait ConversationSource {
+    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage>;
+    fn is_compaction(&self, obj: &Value) -> bool;
+
+    fn may_contain_compaction(&self, _obj_bytes: &[u8]) -> bool {
+        true
+    }
+}
+
+pub struct AnyConversationSource;
+
+impl ConversationSource for AnyConversationSource {
+    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
+        claude::ClaudeSource.parse_message(obj, offset)
+            .or_else(|| codex::CodexSource.parse_message(obj, offset))
+    }
+
+    fn is_compaction(&self, obj: &Value) -> bool {
+        claude::ClaudeSource.is_compaction(obj) || codex::CodexSource.is_compaction(obj)
+    }
+
+    fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
+        claude::ClaudeSource.may_contain_compaction(obj_bytes)
+            || codex::CodexSource.may_contain_compaction(obj_bytes)
+    }
+}
+
+/// Find the byte offset of the last compaction marker in mmap'd transcript data.
+/// Returns the byte offset of the JSON object's opening brace.
+pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
+    find_last_compaction_with(data, &AnyConversationSource)
+}
+
+pub(crate) fn find_last_compaction_with(
+    data: &[u8],
+    source: &impl ConversationSource,
+) -> Option<usize> {
+    for (offset, obj_bytes) in JsonlBackwardIter::new(data) {
+        // Quick byte check before parsing large transcript entries.
+        if !source.may_contain_compaction(obj_bytes) {
+            continue;
+        }
+
+        let obj: Value = match serde_json::from_slice(obj_bytes) {
+            Ok(v) => v,
+            Err(_) => continue,
+        };
+
+        if source.is_compaction(&obj) {
+            return Some(offset);
+        }
+    }
+
+    None
+}
+
+/// Find the byte offset of the last compaction in a transcript file.
+/// Returns None if the file can't be opened or has no compaction.
+pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
+    if path.is_empty() { return None; }
+
+    let file = fs::File::open(path).ok()?;
+    let meta = file.metadata().ok()?;
+    if meta.len() == 0 { return None; }
+
+    let mmap = unsafe { Mmap::map(&file).ok()? };
+    find_last_compaction(&mmap).map(|off| off as u64)
+}
+
+/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
+pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
+    let file = fs::File::open(path).ok()?;
+    let meta = file.metadata().ok()?;
+    if meta.len() == 0 { return None; }
+    let mmap = unsafe { Mmap::map(&file).ok()? };
+    Some((mmap, file))
+}
+
+/// Reverse iterator over user/assistant messages in a transcript file.
+/// Yields normalized transcript messages newest-first. The caller decides
+/// when to stop (byte budget, count, etc).
+pub struct TailMessages {
+    _file: fs::File,
+    mmap: Mmap,
+    pos: usize,
+}
+
+impl TailMessages {
+    pub fn open(path: &str) -> Option<Self> {
+        let (mmap, file) = mmap_transcript(path)?;
+        let pos = mmap.len();
+        Some(Self { _file: file, mmap, pos })
+    }
+}
+
+impl Iterator for TailMessages {
+    type Item = TranscriptMessage;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            let (offset, obj_bytes) = jsonl::JsonlBackwardIter::new(&self.mmap[..self.pos]).next()?;
+            self.pos = offset;
+
+            let obj: Value = match serde_json::from_slice(obj_bytes) {
+                Ok(v) => v,
+                Err(_) => continue,
+            };
+
+            if let Some(message) = AnyConversationSource.parse_message(&obj, offset as u64) {
+                return Some(message);
+            }
+        }
+    }
+}
+
+/// Get the timestamp of the compaction message at a given byte offset.
+/// Returns a human-readable datetime string, or None if unavailable.
+pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
+    let (mmap, _file) = mmap_transcript(path)?;
+    let start = offset as usize;
+    if start >= mmap.len() { return None; }
+
+    // Find the end of this JSONL line
+    let end = mmap[start..].iter().position(|&b| b == b'\n')
+        .map(|p| start + p)
+        .unwrap_or(mmap.len());
+
+    let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
+
+    if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
+        return Some(ts.to_string());
+    }
+
+    for field in &["createdAt", "created_at", "time"] {
+        if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
+            return Some(ts.to_string());
+        }
+    }
+
+    None
+}
+
+/// Detect whether a compaction has occurred since the last check.
+///
+/// Compares the current compaction offset against a saved value in
+/// `state_dir/compaction-{session_id}`. Returns true if a new
+/// compaction was found. Updates the saved offset.
+pub fn detect_new_compaction(
+    state_dir: &Path,
+    session_id: &str,
+    transcript_path: &str,
+) -> bool {
+    let offset = find_last_compaction_in_file(transcript_path);
+
+    let save_path = state_dir.join(format!("compaction-{}", session_id));
+    let saved: Option<u64> = fs::read_to_string(&save_path)
+        .ok()
+        .and_then(|s| s.trim().parse().ok());
+
+    let is_new = match (offset, saved) {
+        (Some(cur), Some(prev)) => cur != prev,
+        (Some(_), None) => true,
+        _ => false,
+    };
+
+    // Save current offset
+    if let Some(off) = offset {
+        fs::write(&save_path, off.to_string()).ok();
+    }
+
+    is_new
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io::Write;
+
+    fn write_temp_jsonl(content: &str) -> tempfile::NamedTempFile {
+        let mut file = tempfile::NamedTempFile::new().unwrap();
+        file.write_all(content.as_bytes()).unwrap();
+        file.flush().unwrap();
+        file
+    }
+
+    #[test]
+    fn tail_messages_yields_normalized_messages_newest_first() {
+        let file = write_temp_jsonl(
+            r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"claude user"}}
+{"timestamp":"2026-06-15T15:00:01.000Z","type":"assistant","message":{"content":[{"type":"text","text":"claude assistant"}]}}
+{"timestamp":"2026-06-15T15:00:02.000Z","type":"event_msg","payload":{"type":"user_message","message":"codex user"}}
+{"timestamp":"2026-06-15T15:00:03.000Z","type":"event_msg","payload":{"type":"task_started"}}
+{"timestamp":"2026-06-15T15:00:04.000Z","type":"event_msg","payload":{"type":"agent_message","message":"codex assistant"}}
+"#,
+        );
+
+        let messages: Vec<_> = TailMessages::open(&file.path().to_string_lossy())
+            .unwrap()
+            .collect();
+
+        assert_eq!(messages.len(), 4);
+        assert_eq!(messages[0].text, "codex assistant");
+        assert_eq!(messages[1].text, "codex user");
+        assert_eq!(messages[2].text, "claude assistant");
+        assert_eq!(messages[3].text, "claude user");
+        assert!(messages[0].offset > messages[1].offset);
+    }
+
+    #[test]
+    fn detects_claude_and_codex_compactions() {
+        let claude = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"normal"}}
+{"timestamp":"2026-06-15T15:00:01.000Z","type":"user","message":{"content":"This session is being continued from a previous conversation."}}
+"#;
+        let codex = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"user_message","message":"normal"}}
+{"timestamp":"2026-06-15T15:00:01.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
+"#;
+
+        assert!(find_last_compaction(claude).is_some());
+        assert!(find_last_compaction(codex).is_some());
+    }
+
+    #[test]
+    fn detect_new_compaction_tracks_offset_changes() {
+        let transcript = write_temp_jsonl(
+            r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
+"#,
+        );
+        let state = tempfile::tempdir().unwrap();
+
+        assert!(detect_new_compaction(
+            state.path(),
+            "session",
+            &transcript.path().to_string_lossy(),
+        ));
+        assert!(!detect_new_compaction(
+            state.path(),
+            "session",
+            &transcript.path().to_string_lossy(),
+        ));
+    }
+}
--- a/src/hippocampus/graph.rs
+++ b/src/hippocampus/graph.rs
@ -11,6 +11,23 @@ use crate::store::{Store, RelationType, StoreView};

 use serde::{Deserialize, Serialize};
 use std::collections::{HashMap, HashSet, VecDeque};
+use std::sync::{OnceLock, RwLock};
+
+const EXACT_CC_MAX_DEG: usize = 512;
+const APPROX_CC_PAIRS: u64 = 4096;
+const CC_CACHE_TTL_SECS: i64 = 15 * 60;
+
+#[derive(Clone, Copy)]
+struct CachedCc {
+	value: f32,
+	computed_at: i64,
+}
+
+static CC_CACHE: OnceLock<RwLock<HashMap<String, CachedCc>>> = OnceLock::new();
+
+fn cc_cache() -> &'static RwLock<HashMap<String, CachedCc>> {
+	CC_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
+}

 /// Community info for reporting
 #[derive(Clone, Debug)]
@ -32,11 +49,13 @@ pub struct Edge {

 /// The in-memory graph built from store nodes + relations
 pub struct Graph {
-    /// Adjacency list: node key → list of edges
-    adj: HashMap<String, Vec<Edge>>,
-    /// All node keys
-    keys: HashSet<String>,
-    /// Community labels (from label propagation)
+	/// Adjacency list: node key → list of edges
+	adj: HashMap<String, Vec<Edge>>,
+	/// Neighbor sets for membership tests in graph metrics.
+	neighbor_sets: HashMap<String, HashSet<String>>,
+	/// All node keys
+	keys: HashSet<String>,
+	/// Community labels (from label propagation)
    communities: HashMap<String, u32>,
 }

@ -67,22 +86,22 @@ impl Graph {
            .unwrap_or_default()
    }

-    /// Just neighbor keys
-    pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
-        self.adj.get(key)
-            .map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
-            .unwrap_or_default()
-    }
+	/// Just neighbor keys
+	pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
+		self.neighbor_sets.get(key)
+			.map(|neighbors| neighbors.iter().map(String::as_str).collect())
+			.unwrap_or_default()
+	}

-    /// Jaccard similarity between two nodes' neighborhoods.
-    /// Measures overlap: |intersection| / |union| of their neighbor sets.
-    pub fn jaccard(&self, a: &str, b: &str) -> f32 {
-        let na = self.neighbor_keys(a);
-        let nb = self.neighbor_keys(b);
-        let intersection = na.intersection(&nb).count();
-        let union = na.union(&nb).count();
-        if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
-    }
+	/// Jaccard similarity between two nodes' neighborhoods.
+	/// Measures overlap: |intersection| / |union| of their neighbor sets.
+	pub fn jaccard(&self, a: &str, b: &str) -> f32 {
+		let Some(na) = self.neighbor_sets.get(a) else { return 0.0 };
+		let Some(nb) = self.neighbor_sets.get(b) else { return 0.0 };
+		let intersection = na.intersection(nb).count();
+		let union = na.len() + nb.len() - intersection;
+		if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
+	}

    /// Compute Jaccard-based strength for every edge in the graph.
    /// Returns (source_key, target_key, jaccard_strength) triples.
@ -202,41 +221,78 @@ impl Graph {
        }
    }

-    /// Local clustering coefficient: fraction of a node's neighbors
-    /// that are also neighbors of each other.
-    /// cc(v) = 2E / (deg * (deg - 1))
-    pub fn clustering_coefficient(&self, key: &str) -> f32 {
-        let neighbors = self.neighbor_keys(key);
-        let deg = neighbors.len();
-        if deg < 2 {
-            return 0.0;
-        }
+	/// Local clustering coefficient: fraction of a node's neighbors
+	/// that are also neighbors of each other.
+	/// cc(v) = 2E / (deg * (deg - 1))
+	pub fn clustering_coefficient(&self, key: &str) -> f32 {
+		let now = crate::store::now_epoch();
+		if let Some(cc) = cc_cache().read().unwrap().get(key).copied()
+			&& now - cc.computed_at < CC_CACHE_TTL_SECS
+		{
+			return cc.value;
+		}
+		let cc = self.clustering_coefficient_uncached(key);
+		cc_cache().write().unwrap().insert(key.to_owned(), CachedCc {
+			value: cc,
+			computed_at: now,
+		});
+		cc
+	}

-        let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
-        let mut triangles = 0u32;
-        for i in 0..neighbor_vec.len() {
-            for j in (i + 1)..neighbor_vec.len() {
-                let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
-                if ni_neighbors.contains(neighbor_vec[j]) {
-                    triangles += 1;
-                }
-            }
-        }
+	fn clustering_coefficient_uncached(&self, key: &str) -> f32 {
+		let Some(neighbors) = self.neighbor_sets.get(key) else {
+			return 0.0;
+		};
+		let deg = neighbors.len();
+		if deg < 2 {
+			return 0.0;
+		}

-        (2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
-    }
+		let neighbor_vec: Vec<&str> = neighbors.iter().map(String::as_str).collect();
+		if deg <= EXACT_CC_MAX_DEG {
+			let mut linked = 0u64;
+			for i in 0..neighbor_vec.len() {
+				for j in (i + 1)..neighbor_vec.len() {
+					if self.neighbor_sets
+						.get(neighbor_vec[i])
+						.is_some_and(|n| n.contains(neighbor_vec[j])) {
+							linked += 1;
+					}
+				}
+			}
+			return (2.0 * linked as f32) / (deg as f32 * (deg as f32 - 1.0));
+		}

-    /// Average clustering coefficient across all nodes with deg >= 2
-    pub fn avg_clustering_coefficient(&self) -> f32 {
-        let mut sum = 0.0f32;
-        let mut count = 0u32;
-        for key in &self.keys {
-            if self.degree(key) >= 2 {
-                sum += self.clustering_coefficient(key);
-                count += 1;
-            }
-        }
-        if count == 0 { 0.0 } else { sum / count as f32 }
+		let mut linked = 0u64;
+		let samples = APPROX_CC_PAIRS.min((deg as u64 * (deg as u64 - 1)) / 2);
+		for sample in 0..samples {
+			let i = ((sample.wrapping_mul(1_103_515_245).wrapping_add(12_345)) % deg as u64) as usize;
+			let mut j = ((sample.wrapping_mul(2_654_435_761).wrapping_add(97_531)) % deg as u64) as usize;
+			if i == j {
+				j = (j + 1) % deg;
+			}
+			if self.neighbor_sets
+				.get(neighbor_vec[i])
+				.is_some_and(|n| n.contains(neighbor_vec[j])) {
+					linked += 1;
+			}
+		}
+		linked as f32 / samples as f32
+	}
+
+	/// Average clustering coefficient across all nodes with deg >= 2
+	pub fn avg_clustering_coefficient(&self) -> f32 {
+		let mut sum = 0.0f32;
+		let mut count = 0u32;
+		for key in &self.keys {
+			match self.neighbor_sets.get(key.as_str()) {
+				Some(s) if s.len() >= 2 => s,
+				_ => continue,
+			};
+			sum += self.clustering_coefficient(key);
+			count += 1;
+		}
+		if count == 0 { 0.0 } else { sum / count as f32 }
    }

    /// Average shortest path length (sampled BFS from up to 100 nodes)
@ -266,15 +322,17 @@ impl Graph {
        dist.insert(start.to_string(), 0u32);
        queue.push_back(start.to_string());

-        while let Some(node) = queue.pop_front() {
-            let d = dist[&node];
-            for neighbor in self.neighbor_keys(&node) {
-                if !dist.contains_key(neighbor) {
-                    dist.insert(neighbor.to_string(), d + 1);
-                    queue.push_back(neighbor.to_string());
-                }
-            }
-        }
+		while let Some(node) = queue.pop_front() {
+			let d = dist[&node];
+			if let Some(neighbors) = self.neighbor_sets.get(&node) {
+				for neighbor in neighbors {
+					if !dist.contains_key(neighbor) {
+						dist.insert(neighbor.clone(), d + 1);
+						queue.push_back(neighbor.clone());
+					}
+				}
+			}
+		}
        dist
    }

@ -505,16 +563,39 @@ impl Graph {

 /// Build graph from store data (with community detection)
 pub fn build_graph(store: &impl StoreView) -> Graph {
-    let (adj, keys) = build_adjacency(store);
-    let communities = label_propagation(&keys, &adj, 20);
-    Graph { adj, keys, communities }
+	let (adj, keys) = build_adjacency(store);
+	let neighbor_sets = build_neighbor_sets(&adj);
+	let communities = label_propagation(&keys, &adj, 20);
+	Graph {
+		adj,
+		neighbor_sets,
+		keys,
+		communities,
+	}
 }

 /// Build graph without community detection — for spreading activation
 /// searches where we only need the adjacency list.
 pub fn build_graph_fast(store: &impl StoreView) -> Graph {
-    let (adj, keys) = build_adjacency(store);
-    Graph { adj, keys, communities: HashMap::new() }
+	let (adj, keys) = build_adjacency(store);
+	let neighbor_sets = build_neighbor_sets(&adj);
+	Graph {
+		adj,
+		neighbor_sets,
+		keys,
+		communities: HashMap::new(),
+	}
+}
+
+fn build_neighbor_sets(adj: &HashMap<String, Vec<Edge>>) -> HashMap<String, HashSet<String>> {
+	adj.iter()
+		.map(|(key, edges)| {
+			let neighbors = edges.iter()
+				.map(|edge| edge.target.clone())
+				.collect();
+			(key.clone(), neighbors)
+		})
+		.collect()
 }

 fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
--- a/src/hippocampus/mod.rs
+++ b/src/hippocampus/mod.rs
@ -17,7 +17,6 @@ pub mod query;
 pub mod spectral;
 pub mod neuro;
 pub mod counters;
-pub mod transcript;

 use std::cell::RefCell;
 use std::path::PathBuf;
--- a/src/hippocampus/transcript.rs
+++ b/src/hippocampus/transcript.rs
@ -1,340 +0,0 @@
-// Transcript JSONL parsing utilities.
-//
-// Provides mmap-based backward scanning of Claude Code transcript files
-// and compaction detection. Used by memory-search (hook mode) and
-// parse-claude-conversation (debug tool).
-
-use memchr::memrchr3;
-use memmap2::Mmap;
-use serde_json::Value;
-use std::fs;
-use std::path::Path;
-
-/// Scan backwards through mmap'd bytes, yielding byte slices of complete
-/// top-level JSON objects (outermost { to matching }).
-///
-/// Uses memrchr3 (SIMD) to jump between structurally significant bytes
-/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
-/// skipping braces inside JSON strings. Returns objects in reverse order
-/// (newest first).
-pub struct JsonlBackwardIter<'a> {
-    data: &'a [u8],
-    pos: usize,
-}
-
-impl<'a> JsonlBackwardIter<'a> {
-    pub fn new(data: &'a [u8]) -> Self {
-        Self { data, pos: data.len() }
-    }
-}
-
-impl<'a> Iterator for JsonlBackwardIter<'a> {
-    type Item = &'a [u8];
-
-    fn next(&mut self) -> Option<Self::Item> {
-        // Find the closing } of the next object, skipping } inside strings
-        let close = {
-            let mut in_string = false;
-            loop {
-                let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
-                self.pos = p;
-                let ch = self.data[p];
-
-                if in_string {
-                    if ch == b'"' {
-                        let mut bs = 0;
-                        while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
-                            bs += 1;
-                        }
-                        if bs % 2 == 0 { in_string = false; }
-                    }
-                    continue;
-                }
-
-                match ch {
-                    b'}' => break p,
-                    b'"' => in_string = true,
-                    _ => {}
-                }
-            }
-        };
-
-        // Track brace depth to find matching {
-        let mut depth: usize = 1;
-        let mut in_string = false;
-
-        loop {
-            let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
-            self.pos = p;
-            let ch = self.data[p];
-
-            if in_string {
-                if ch == b'"' {
-                    // Check for escaped quote (count preceding backslashes)
-                    let mut bs = 0;
-                    while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
-                        bs += 1;
-                    }
-                    if bs % 2 == 0 {
-                        in_string = false;
-                    }
-                }
-                // { and } inside strings don't affect depth
-                continue;
-            }
-
-            match ch {
-                b'"' => { in_string = true; }
-                b'}' => { depth += 1; }
-                b'{' => {
-                    depth -= 1;
-                    if depth == 0 {
-                        return Some(&self.data[self.pos..=close]);
-                    }
-                }
-                _ => {}
-            }
-        }
-    }
-}
-
-/// Find the byte offset of the last compaction summary in mmap'd transcript data.
-///
-/// Scans backward for a user-type message whose content starts with
-/// "This session is being continued". Returns the byte offset of the
-/// JSON object's opening brace.
-pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
-    let marker = b"This session is being continued";
-
-    for obj_bytes in JsonlBackwardIter::new(data) {
-        // Quick byte check before parsing
-        if !contains_bytes(obj_bytes, marker) {
-            continue;
-        }
-
-        let obj: Value = match serde_json::from_slice(obj_bytes) {
-            Ok(v) => v,
-            Err(_) => continue,
-        };
-
-        if obj.get("type").and_then(|v| v.as_str()) != Some("user") {
-            continue;
-        }
-
-        if let Some(content) = obj.get("message")
-            .and_then(|m| m.get("content"))
-            .and_then(|c| c.as_str())
-            && content.starts_with("This session is being continued") {
-                let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize;
-                return Some(offset);
-            }
-    }
-
-    None
-}
-
-/// Find the byte offset of the last compaction in a transcript file.
-/// Returns None if the file can't be opened or has no compaction.
-pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
-    if path.is_empty() { return None; }
-
-    let file = fs::File::open(path).ok()?;
-    let meta = file.metadata().ok()?;
-    if meta.len() == 0 { return None; }
-
-    let mmap = unsafe { Mmap::map(&file).ok()? };
-    find_last_compaction(&mmap).map(|off| off as u64)
-}
-
-/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
-pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
-    let file = fs::File::open(path).ok()?;
-    let meta = file.metadata().ok()?;
-    if meta.len() == 0 { return None; }
-    let mmap = unsafe { Mmap::map(&file).ok()? };
-    Some((mmap, file))
-}
-
-fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
-    haystack.windows(needle.len()).any(|w| w == needle)
-}
-
-/// Reverse iterator over user/assistant messages in a transcript file.
-/// Yields (role, text, timestamp) tuples newest-first. The caller decides
-/// when to stop (byte budget, count, etc).
-pub struct TailMessages {
-    _file: fs::File,
-    mmap: Mmap,
-    pos: usize,
-}
-
-impl TailMessages {
-    pub fn open(path: &str) -> Option<Self> {
-        let (mmap, file) = mmap_transcript(path)?;
-        let pos = mmap.len();
-        Some(Self { _file: file, mmap, pos })
-    }
-}
-
-impl Iterator for TailMessages {
-    type Item = (String, String, String);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        loop {
-            // Find closing }, skipping } inside strings
-            let close = {
-                let mut in_string = false;
-                loop {
-                    let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
-                    self.pos = p;
-                    let ch = self.mmap[p];
-
-                    if in_string {
-                        if ch == b'"' {
-                            let mut bs = 0;
-                            while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
-                                bs += 1;
-                            }
-                            if bs % 2 == 0 { in_string = false; }
-                        }
-                        continue;
-                    }
-
-                    match ch {
-                        b'}' => break p,
-                        b'"' => in_string = true,
-                        _ => {}
-                    }
-                }
-            };
-
-            // Track brace depth to find matching {
-            let mut depth: usize = 1;
-            let mut in_string = false;
-            let open = loop {
-                let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
-                self.pos = p;
-                let ch = self.mmap[p];
-
-                if in_string {
-                    if ch == b'"' {
-                        let mut bs = 0;
-                        while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
-                            bs += 1;
-                        }
-                        if bs % 2 == 0 { in_string = false; }
-                    }
-                    continue;
-                }
-
-                match ch {
-                    b'"' => { in_string = true; }
-                    b'}' => { depth += 1; }
-                    b'{' => {
-                        depth -= 1;
-                        if depth == 0 { break p; }
-                    }
-                    _ => {}
-                }
-            };
-
-            let obj_bytes = &self.mmap[open..=close];
-
-            // The "type" field is near the start of top-level objects.
-            // Only check the first 200 bytes to avoid scanning megabyte objects.
-            let prefix = &obj_bytes[..obj_bytes.len().min(200)];
-            let is_user = memchr::memmem::find(prefix, b"\"type\":\"user\"").is_some();
-            let is_assistant = !is_user
-                && memchr::memmem::find(prefix, b"\"type\":\"assistant\"").is_some();
-            if !is_user && !is_assistant { continue; }
-
-            let obj: Value = match serde_json::from_slice(obj_bytes) {
-                Ok(v) => v,
-                Err(_) => continue,
-            };
-
-            let msg_type = if is_user { "user" } else { "assistant" };
-
-            let msg = obj.get("message").unwrap_or(&obj);
-            let text = match msg.get("content") {
-                Some(Value::String(s)) => s.clone(),
-                Some(Value::Array(arr)) => {
-                    arr.iter()
-                        .filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
-                        .filter_map(|b| b.get("text").and_then(|v| v.as_str()))
-                        .collect::<Vec<_>>()
-                        .join(" ")
-                }
-                _ => continue,
-            };
-            if text.is_empty() { continue; }
-
-            let timestamp = obj.get("timestamp")
-                .and_then(|v| v.as_str())
-                .unwrap_or("")
-                .to_string();
-
-            return Some((msg_type.to_string(), text, timestamp));
-        }
-    }
-}
-
-/// Get the timestamp of the compaction message at a given byte offset.
-/// Returns a human-readable datetime string, or None if unavailable.
-pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
-    let (mmap, _file) = mmap_transcript(path)?;
-    let start = offset as usize;
-    if start >= mmap.len() { return None; }
-
-    // Find the end of this JSONL line
-    let end = mmap[start..].iter().position(|&b| b == b'\n')
-        .map(|p| start + p)
-        .unwrap_or(mmap.len());
-
-    let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
-
-    // Claude Code transcript entries have a "timestamp" field (ISO 8601)
-    if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
-        return Some(ts.to_string());
-    }
-
-    // Fallback: try "createdAt" or similar fields
-    for field in &["createdAt", "created_at", "time"] {
-        if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
-            return Some(ts.to_string());
-        }
-    }
-
-    None
-}
-
-/// Detect whether a compaction has occurred since the last check.
-///
-/// Compares the current compaction offset against a saved value in
-/// `state_dir/compaction-{session_id}`. Returns true if a new
-/// compaction was found. Updates the saved offset.
-pub fn detect_new_compaction(
-    state_dir: &Path,
-    session_id: &str,
-    transcript_path: &str,
-) -> bool {
-    let offset = find_last_compaction_in_file(transcript_path);
-
-    let save_path = state_dir.join(format!("compaction-{}", session_id));
-    let saved: Option<u64> = fs::read_to_string(&save_path)
-        .ok()
-        .and_then(|s| s.trim().parse().ok());
-
-    let is_new = match (offset, saved) {
-        (Some(cur), Some(prev)) => cur != prev,
-        (Some(_), None) => true,
-        _ => false,
-    };
-
-    // Save current offset
-    if let Some(off) = offset {
-        fs::write(&save_path, off.to_string()).ok();
-    }
-
-    is_new
-}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,4 +1,4 @@
-#![feature(async_fn_track_caller)]
+#![cfg_attr(feature = "nightly-diagnostics", feature(async_fn_track_caller))]

 // consciousness — unified crate for memory, agents, and subconscious processes
 //
@ -25,6 +25,9 @@ macro_rules! dbglog {
    }};
 }

+// Logging (target-routed file logger)
+pub mod logging;
+
 // User interface (TUI, CLI)
 pub mod user;

@ -40,6 +43,9 @@ pub mod hippocampus;
 // Autonomous agents
 pub mod subconscious;

+// Conversation transcript abstraction and compatibility sources
+pub mod conversation;
+
 // Unified configuration
 pub mod config;
 pub mod config_writer;
@ -88,7 +94,8 @@ pub mod channel_capnp {
 pub use hippocampus::{
    store, graph, lookups, query,
    spectral, neuro, counters,
-    transcript, memory,
+    memory,
 };
+pub use conversation as transcript;
 use hippocampus::query::engine as search;
 use hippocampus::query::parser as query_parser;
--- a/src/locks.rs
+++ b/src/locks.rs
@ -114,7 +114,7 @@ impl<T> TrackedMutex<T> {
        Self { inner: Mutex::new(value) }
    }

-    #[track_caller]
+    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
    pub async fn lock(&self) -> TrackedMutexGuard<'_, T> {
        let location = Location::caller();
        let guard = self.inner.lock().await;
@ -125,7 +125,7 @@ impl<T> TrackedMutex<T> {
        }
    }

-    #[track_caller]
+    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
    pub fn try_lock(&self) -> Result<TrackedMutexGuard<'_, T>, tokio::sync::TryLockError> {
        let location = Location::caller();
        let guard = self.inner.try_lock()?;
@ -171,7 +171,7 @@ impl<T> TrackedRwLock<T> {
        Self { inner: RwLock::new(value) }
    }

-    #[track_caller]
+    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
    pub async fn read(&self) -> TrackedRwLockReadGuard<'_, T> {
        let location = Location::caller();
        let guard = self.inner.read().await;
@ -182,7 +182,7 @@ impl<T> TrackedRwLock<T> {
        }
    }

-    #[track_caller]
+    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
    pub async fn write(&self) -> TrackedRwLockWriteGuard<'_, T> {
        let location = Location::caller();
        let guard = self.inner.write().await;
--- a/src/logging.rs
+++ b/src/logging.rs
@ -0,0 +1,146 @@
+// logging.rs — log-crate logger that routes by target.
+//
+// Records with target "grpc" (or any target starting with "grpc::") go
+// to ~/.consciousness/logs/daemon/grpc.log so we can tell gRPC events
+// apart from the rest of consciousness's noise. Everything else goes
+// to ~/.consciousness/logs/daemon/debug.log.
+//
+// Level threshold is taken from RUST_LOG (simple global level parse:
+// "trace"/"debug"/"info"/"warn"/"error"); defaults to "info".
+
+use std::io::Write;
+use std::path::PathBuf;
+use std::sync::Mutex;
+
+use log::{Level, LevelFilter, Log, Metadata, Record, SetLoggerError};
+
+fn logs_dir() -> PathBuf {
+    dirs::home_dir().unwrap_or_default().join(".consciousness/logs/daemon")
+}
+
+struct RoutingLogger {
+    grpc_file: Mutex<Option<std::fs::File>>,
+    debug_file: Mutex<Option<std::fs::File>>,
+    level: LevelFilter,
+}
+
+impl RoutingLogger {
+    fn new(level: LevelFilter) -> Self {
+        let dir = logs_dir();
+        let _ = std::fs::create_dir_all(&dir);
+        let grpc = std::fs::OpenOptions::new()
+            .create(true).append(true)
+            .open(dir.join("grpc.log")).ok();
+        let debug = std::fs::OpenOptions::new()
+            .create(true).append(true)
+            .open(dir.join("debug.log")).ok();
+        Self {
+            grpc_file: Mutex::new(grpc),
+            debug_file: Mutex::new(debug),
+            level,
+        }
+    }
+
+    fn is_grpc_target(target: &str) -> bool {
+        target == "grpc" || target.starts_with("grpc::")
+    }
+}
+
+impl Log for RoutingLogger {
+    fn enabled(&self, m: &Metadata) -> bool {
+        // Always enable DEBUG for grpc target so the dedicated log is
+        // actually useful without RUST_LOG wrangling; defer to the
+        // configured level for everything else.
+        if Self::is_grpc_target(m.target()) {
+            return m.level() <= Level::Debug;
+        }
+        m.level() <= self.level
+    }
+
+    fn log(&self, record: &Record) {
+        if !self.enabled(record.metadata()) {
+            return;
+        }
+        let line = format!(
+            "[{}] [{}] [{}] {}\n",
+            chrono::Utc::now().format("%Y-%m-%d %H:%M:%S%.3f"),
+            record.level(),
+            record.target(),
+            record.args(),
+        );
+        let slot = if Self::is_grpc_target(record.target()) {
+            &self.grpc_file
+        } else {
+            &self.debug_file
+        };
+        if let Ok(mut guard) = slot.lock() {
+            if let Some(ref mut f) = *guard {
+                let _ = f.write_all(line.as_bytes());
+            }
+        }
+    }
+
+    fn flush(&self) {
+        for slot in [&self.grpc_file, &self.debug_file] {
+            if let Ok(mut g) = slot.lock() {
+                if let Some(ref mut f) = *g {
+                    let _ = f.flush();
+                }
+            }
+        }
+    }
+}
+
+fn parse_level_from_env() -> LevelFilter {
+    let raw = std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string());
+    // Parse a plain level word; if it's the module=level form, we take
+    // the first level we find.
+    let token = raw.split(',').next().unwrap_or("info");
+    let level_word = token.rsplit_once('=').map(|(_, v)| v).unwrap_or(token);
+    match level_word.trim().to_lowercase().as_str() {
+        "trace" => LevelFilter::Trace,
+        "debug" => LevelFilter::Debug,
+        "info"  => LevelFilter::Info,
+        "warn"  => LevelFilter::Warn,
+        "error" => LevelFilter::Error,
+        "off"   => LevelFilter::Off,
+        _ => LevelFilter::Info,
+    }
+}
+
+/// Install the routing logger. Safe to call at most once — subsequent
+/// calls return an error but are otherwise no-ops.
+pub fn init() -> Result<(), SetLoggerError> {
+    let level = parse_level_from_env();
+    let logger = Box::new(RoutingLogger::new(level));
+    log::set_boxed_logger(logger)?;
+    // Always let DEBUG records through globally so the grpc log can
+    // capture them (the logger itself filters non-grpc targets by
+    // `level`). The cost is that log::debug! call-sites below `level`
+    // in other modules still do their arg formatting before being
+    // dropped at the logger; acceptable for a debug tool.
+    log::set_max_level(LevelFilter::Debug.max(level));
+    // Mark the file with a session boundary so it's easy to see where a
+    // restart happened.
+    log::info!(
+        "===== consciousness logger init (level={}, pid={}) =====",
+        level, std::process::id(),
+    );
+    log::info!(target: "grpc",
+        "===== grpc log init (level={}, pid={}) =====",
+        level, std::process::id(),
+    );
+    Ok(())
+}
+
+/// Consumer of &Level so the type is used when only some callers want it.
+#[allow(dead_code)]
+pub fn current_level() -> Level {
+    match log::max_level() {
+        LevelFilter::Trace => Level::Trace,
+        LevelFilter::Debug => Level::Debug,
+        LevelFilter::Info | LevelFilter::Off => Level::Info,
+        LevelFilter::Warn => Level::Warn,
+        LevelFilter::Error => Level::Error,
+    }
+}
--- a/src/main.rs
+++ b/src/main.rs
@ -1,4 +1,4 @@
-#![feature(panic_backtrace_config)]
+#![cfg_attr(feature = "nightly-diagnostics", feature(panic_backtrace_config))]

 // poc-memory: graph-structured memory for AI assistants
 //
@ -333,6 +333,18 @@ enum AdminCmd {
        #[arg(long)]
        stats: bool,
    },
+    /// Print normalized user/assistant messages from a transcript JSONL file
+    #[command(name = "transcript-tail")]
+    TranscriptTail {
+        /// Transcript JSONL path
+        path: String,
+        /// Maximum number of messages to print
+        #[arg(long, short = 'n', default_value_t = 40)]
+        count: usize,
+        /// Print newest messages first instead of chronological order
+        #[arg(long)]
+        newest_first: bool,
+    },
 }

 /// Print help with subcommands expanded to show nested commands.
@ -458,12 +470,15 @@ impl Run for AdminCmd {
            Self::Dedup { apply } => cli::admin::cmd_dedup(apply).await,
            Self::DailyCheck    => cli::admin::cmd_daily_check().await,
            Self::LoadContext { stats } => cli::node::cmd_load_context(stats).await,
+            Self::TranscriptTail { path, count, newest_first }
+                => cli::admin::cmd_transcript_tail(&path, count, newest_first),
        }
    }
 }

 #[tokio::main]
 async fn main() {
+    #[cfg(feature = "nightly-diagnostics")]
    std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short);

    // Handle --help ourselves for expanded subcommand display
@ -495,4 +510,3 @@ async fn main() {
        process::exit(1);
    }
 }
-
--- a/src/mind/log.rs
+++ b/src/mind/log.rs
@ -3,7 +3,7 @@ use std::fs::{File, OpenOptions};
 use std::io::Write;
 use std::path::{Path, PathBuf};
 use crate::agent::context::AstNode;
-use crate::hippocampus::transcript::JsonlBackwardIter;
+use crate::conversation::JsonlBackwardIter;
 use memmap2::Mmap;

 pub struct ConversationLog {
@ -78,6 +78,6 @@ pub struct TailNodes {
 impl TailNodes {
    pub fn iter(&self) -> impl Iterator<Item = AstNode> + '_ {
        JsonlBackwardIter::new(&self.mmap)
-            .filter_map(|bytes| serde_json::from_slice::<AstNode>(bytes).ok())
+            .filter_map(|(_, bytes)| serde_json::from_slice::<AstNode>(bytes).ok())
    }
 }
--- a/src/mind/mod.rs
+++ b/src/mind/mod.rs
@ -419,7 +419,9 @@ impl Mind {
        let subconscious = Arc::new(crate::Mutex::new(Subconscious::new()));
        subconscious.lock().await.init_output_tool(subconscious.clone());

-        let unconscious = Arc::new(crate::Mutex::new(Unconscious::new()));
+        let unconscious = Arc::new(crate::Mutex::new(
+            Unconscious::new(agent.client.clone()),
+        ));

        // Spawn the unconscious loop on its own task
        if !config.no_agents {
@ -467,8 +469,11 @@ impl Mind {
                        };

                        // Spawn agents outside lock
+                        let client = unc.lock().await.client.clone();
                        for (idx, name, auto) in to_spawn {
-                            match crate::mind::unconscious::prepare_spawn(&name, auto, wake.clone()).await {
+                            match crate::mind::unconscious::prepare_spawn(
+                                &name, auto, wake.clone(), client.clone(),
+                            ).await {
                                Ok(result) => unc.lock().await.complete_spawn(idx, result),
                                Err(auto) => unc.lock().await.abort_spawn(idx, auto),
                            }
@ -688,7 +693,7 @@ impl Mind {
            }
        });

-        let mut sub_handle: Option<tokio::task::JoinHandle<()>> = None;
+        let _sub_handle: Option<tokio::task::JoinHandle<()>> = None;

        // Start finetune scoring at startup (scores existing conversation)
        if !self.config.no_agents {
@ -738,6 +743,7 @@ impl Mind {
                _ = tokio::time::sleep(timeout), if !has_input => _dmn_expired = true,
            }

+            /*
            if !self.config.no_agents {
                if sub_handle.as_ref().map_or(true, |h| h.is_finished()) {
                    let sub = self.subconscious.clone();
@ -749,6 +755,7 @@ impl Mind {
                    }));
                }
            }
+            */

            // Check for pending user input → push to agent context and start turn
            let pending = self.shared.lock().unwrap().take_pending_input();
--- a/src/mind/subconscious.rs
+++ b/src/mind/subconscious.rs
@ -631,7 +631,7 @@ impl Subconscious {
            {
                let mut st = forked.state.lock().await;
                st.provenance = auto.name.clone();
-                st.temperature = auto.temperature;
+                st.sampling.temperature = auto.temperature;
                // Surface agent gets near-interactive priority;
                // other subconscious agents get lower priority.
                st.priority = Some(if auto.name == "surface" { 1 } else { auto.priority });
--- a/src/mind/unconscious.rs
+++ b/src/mind/unconscious.rs
@ -73,10 +73,15 @@ pub struct Unconscious {
    last_health_check: Option<Instant>,
    /// Notified when agent state changes (finished, toggled)
    pub wake: std::sync::Arc<tokio::sync::Notify>,
+    /// Shared API client — cloned (cheap) into each spawned agent's
+    /// Agent::new call so they all share the manifest cache and
+    /// gRPC endpoint state. Override `.model` on the clone when a
+    /// per-agent backend differs from the default.
+    pub client: crate::agent::api::ApiClient,
 }

 impl Unconscious {
-    pub fn new() -> Self {
+    pub fn new(client: crate::agent::api::ApiClient) -> Self {
        let enabled_map = load_enabled_config();

        // Scan all .agent files, exclude subconscious-* and surface-observe
@ -120,6 +125,7 @@ impl Unconscious {
            graph_health: None,
            last_health_check: None,
            wake: std::sync::Arc::new(tokio::sync::Notify::new()),
+            client,
        }
    }

@ -134,7 +140,8 @@ impl Unconscious {
            let agent_name = self.agents[idx].name.clone();
            let auto = self.agents[idx].auto.take().unwrap();
            let wake = self.wake.clone();
-            match prepare_spawn(&agent_name, auto, wake).await {
+            let client = self.client.clone();
+            match prepare_spawn(&agent_name, auto, wake, client).await {
                Ok(result) => self.complete_spawn(idx, result),
                Err(auto) => self.abort_spawn(idx, auto),
            }
@ -250,7 +257,12 @@ pub struct SpawnResult {
 /// Called outside the Unconscious lock.
 /// On success, auto is consumed (moved into spawned task).
 /// On failure, auto is returned so it can be restored.
-pub async fn prepare_spawn(name: &str, mut auto: AutoAgent, wake: std::sync::Arc<tokio::sync::Notify>) -> Result<SpawnResult, AutoAgent> {
+pub async fn prepare_spawn(
+    name: &str,
+    mut auto: AutoAgent,
+    wake: std::sync::Arc<tokio::sync::Notify>,
+    base_client: crate::agent::api::ApiClient,
+) -> Result<SpawnResult, AutoAgent> {
    dbglog!("[unconscious] spawning {}", name);

    let def = match defs::get_def(name) {
@ -295,8 +307,10 @@ pub async fn prepare_spawn(name: &str, mut auto: AutoAgent, wake: std::sync::Arc
    };

    // Unconscious agents have self-contained prompts — no standard context.
-    let client = crate::agent::api::ApiClient::new(
-        &resolved.api_base, &resolved.api_key, &resolved.model_id);
+    // Clone the shared client so we inherit the manifest cache and
+    // only override the model id per-agent.
+    let mut client = base_client;
+    client.model = resolved.model_id.clone();
    let agent = crate::agent::Agent::new(
        client, Vec::new(),
        app, None,
@ -307,7 +321,7 @@ pub async fn prepare_spawn(name: &str, mut auto: AutoAgent, wake: std::sync::Arc
        let mut st = agent.state.lock().await;
        st.provenance = auto.name.clone();
        st.priority = Some(auto.priority);
-        st.temperature = auto.temperature;
+        st.sampling.temperature = auto.temperature;
    }

    let agent_clone = agent.clone();
@ -329,8 +343,9 @@ impl Unconscious {
        self.reap_finished();
        let to_spawn = self.select_to_spawn();
        let wake = self.wake.clone();
+        let client = self.client.clone();
        for (idx, name, auto) in to_spawn {
-            match prepare_spawn(&name, auto, wake.clone()).await {
+            match prepare_spawn(&name, auto, wake.clone(), client.clone()).await {
                Ok(result) => self.complete_spawn(idx, result),
                Err(auto) => self.abort_spawn(idx, auto),
            }
--- a/src/session.rs
+++ b/src/session.rs
@ -64,7 +64,12 @@ impl HookSession {

    /// Load from POC_SESSION_ID environment variable
    pub fn from_env() -> Option<Self> {
-        Self::from_id(std::env::var("POC_SESSION_ID").ok()?)
+        let session_id = std::env::var("POC_SESSION_ID").ok()?;
+        let mut session = Self::from_id(session_id)?;
+        if let Ok(path) = std::env::var("POC_TRANSCRIPT_PATH") {
+            session.transcript_path = path;
+        }
+        Some(session)
    }

    /// Get the seen set for this session
--- a/src/subconscious/agents/bail-no-competing.sh
+++ b/src/subconscious/agents/bail-no-competing.sh
@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # Bail if another agent is in the same phase-group as us.
 #
 #   $1 = our pid file name (e.g. "pid-12345")
--- a/src/subconscious/defs.rs
+++ b/src/subconscious/defs.rs
@ -390,7 +390,7 @@ fn resolve_conversation(budget: Option<usize>) -> String {

    if !transcript.exists() { return String::new(); }

-    let Some(iter) = crate::transcript::TailMessages::open(&transcript.path) else {
+    let Some(iter) = crate::conversation::TailMessages::open(&transcript.path) else {
        return String::new();
    };

@ -401,10 +401,14 @@ fn resolve_conversation(budget: Option<usize>) -> String {
    let mut total_bytes = 0;
    let mut oldest_ts = String::new();

-    for (role, content, ts) in iter {
+    for message in iter {
        if total_bytes >= max_bytes { break; }
-        let name = if role == "user" { &app.user_name } else { &app.assistant_name };
-        let formatted = if !ts.is_empty() {
+        let content = message.text;
+        let name = match message.role {
+            crate::conversation::TranscriptRole::User => &app.user_name,
+            crate::conversation::TranscriptRole::Assistant => &app.assistant_name,
+        };
+        let formatted = if let Some(ts) = message.timestamp {
            oldest_ts = ts[..ts.floor_char_boundary(ts.len().min(19))].to_string();
            format!("**{}** {}: {}", name, &oldest_ts, content)
        } else {
--- a/src/subconscious/generate.rs
+++ b/src/subconscious/generate.rs
@ -4,8 +4,10 @@
 // given a context prefix and a skip predicate, generate what the model
 // would say as the next assistant turn.

+use std::sync::Arc;
+
 use crate::agent::api::{ApiClient, SamplingParams, StreamToken};
-use crate::agent::context::{AstNode, ContextState};
+use crate::agent::context::{AstNode, ContextState, WireChunk};
 use crate::agent::tokenizer;

 /// Generate an assistant continuation from the context up to `entry_idx`,
@ -13,6 +15,9 @@ use crate::agent::tokenizer;
 /// assembly. The model is whichever `client` points at — the default
 /// runtime client for memory-ablation alternates, a test-model client
 /// for F7 comparison.
+///
+/// Uses a fresh ephemeral gRPC session (no cross-call KV reuse): one
+/// Open / Append / Generate round-trip, then the session is dropped.
 pub async fn gen_continuation<F>(
    context: &ContextState,
    entry_idx: usize,
@ -21,17 +26,32 @@ pub async fn gen_continuation<F>(
 ) -> anyhow::Result<String>
 where F: FnMut(&AstNode) -> bool,
 {
-    let (mut prompt, images, _) = context.wire_prompt(0..entry_idx, skip);
+    let (mut chunks, images) = context.wire_chunks(0..entry_idx, skip);

-    prompt.push(tokenizer::IM_START);
-    prompt.extend(tokenizer::encode("assistant\n"));
+    // Assistant-turn prologue.
+    let prologue = {
+        let mut t = vec![tokenizer::IM_START];
+        t.extend(tokenizer::encode("assistant\n"));
+        t
+    };
+    match chunks.last_mut() {
+        Some(WireChunk::Tokens(last)) => last.extend(prologue),
+        _ => chunks.push(WireChunk::Tokens(prologue)),
+    }

    let sampling = SamplingParams {
        temperature: 0.6,
        top_p: 0.95,
        top_k: 20,
+        max_tokens: 4096,
    };
-    let (mut rx, _guard) = client.stream_completion_mm(&prompt, &images, sampling, Some(-5));
+
+    // Ephemeral per-call session — opens on first touch, drops when
+    // `_guard` drops at function end.
+    let session_lock = Arc::new(crate::Mutex::new(None));
+    let (mut rx, _guard) = client.stream_session_mm(
+        session_lock, chunks, images, 0, sampling, Some(-5), None,
+    );

    let mut tokens = Vec::new();
    while let Some(tok) = rx.recv().await {
--- a/src/subconscious/learn.rs
+++ b/src/subconscious/learn.rs
@ -1,100 +1,148 @@
-// training.rs — Memory importance scoring via /v1/score
+// learn.rs — Memory importance scoring over the salience gRPC protocol.
 //
-// Three scoring modes, all built on the same call_score() primitive:
+// Three scoring modes, all built on call_score():
 //
 // score_memories()  — Full N×M matrix (memories × responses) for the
-//                     debug screen. Expensive: N+1 API calls.
+//                     debug screen. Expensive: N+1 sessions/calls.
 //
-// memory_score()    — Single memory importance. Scores the 50 messages
+// score_memory()    — Single memory importance. Scores the 50 messages
 //                     after it was surfaced, with/without that memory.
-//                     2 API calls.
+//                     2 calls.
 //
 // finetune_score()  — Identifies training candidates. Scores recent
 //                     messages with all memories stripped. Responses
 //                     with high divergence depend on memories the model
-//                     hasn't internalized. 2 API calls.
+//                     hasn't internalized. 2 calls.
+//
+// Each call opens an ephemeral gRPC session (reusing the shared
+// tonic Channel on `ApiClient`), pushes the prompt through as
+// interleaved tokens + AppendImage calls, runs Generate with
+// max_tokens=0 + logprobs_ranges over the scored positions, collects
+// each Token event's sampled_logprob, then drops the SessionHandle —
+// which triggers a best-effort CloseSession over the shared channel.

 use std::sync::Arc;

 use crate::agent::api::ApiClient;
+use crate::agent::api::salience::{SessionHandle, pb};
 use crate::agent::context::{
-    Ast, AstNode, ContextState, Role, WireImage,
+    Ast, AstNode, ContextState, Role, WireChunk, WireImage,
    is_assistant, is_memory_node, memory_key, render_branch_text, render_prior_context,
 };
+use crate::agent::tokenizer;
 use crate::mind::{MindState, MindTriggered, TaskHandle};
 use crate::subconscious::generate::gen_continuation;

-const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
-
 // ── Score API ───────────────────────────────────────────────────

-#[derive(serde::Deserialize)]
+#[derive(Debug, Clone)]
 struct ScoreResult {
    total_logprob: f64,
 }

-#[derive(serde::Deserialize)]
-struct ScoreResponse {
-    scores: Vec<ScoreResult>,
-}
-
-fn http_client() -> crate::agent::api::http::HttpClient {
-    crate::agent::api::http::HttpClient::builder()
-        .timeout(SCORE_TIMEOUT)
-        .build()
+/// Find each <|vision_start|>...<|vision_end|> run in the flat prompt
+/// and pair it with the matching entry in `images`. Returns a list
+/// of `ImageAttachment` with absolute pad-range positions, ready
+/// to drop into `GenerateRequest.images`.
+fn pair_images_to_ranges(
+    prompt: &[u32],
+    images: &[WireImage],
+) -> Vec<pb::ImageAttachment> {
+    let mut out: Vec<pb::ImageAttachment> = Vec::new();
+    let mut cur = 0;
+    let mut img_idx = 0;
+    while cur < prompt.len() {
+        if prompt[cur] == tokenizer::VISION_START {
+            let end_rel = prompt[cur..].iter()
+                .position(|&t| t == tokenizer::VISION_END)
+                .unwrap_or_else(|| panic!(
+                    "unmatched VISION_START at position {} in prompt", cur));
+            let end = cur + end_rel + 1;
+            let img = images.get(img_idx)
+                .unwrap_or_else(|| panic!(
+                    "image index {} out of range for {} images", img_idx, images.len()));
+            out.push(pb::ImageAttachment {
+                bytes: img.bytes.clone(),
+                mime: img.mime.clone(),
+                pad_range_start: cur as u32,
+                pad_range_end: end as u32,
+            });
+            img_idx += 1;
+            cur = end;
+        } else {
+            cur += 1;
+        }
+    }
+    out
 }

 async fn call_score(
-    http: &crate::agent::api::http::HttpClient,
    client: &ApiClient,
    prompt: &[u32],
    images: &[WireImage],
    ranges: &[(usize, usize)],
    priority: Option<i32>,
 ) -> anyhow::Result<Vec<ScoreResult>> {
+    use futures::StreamExt;
+
    // Nothing to score — skip the round-trip.
    if ranges.is_empty() {
        return Ok(Vec::new());
    }
-    let url = format!("{}/score", client.base_url());
-    let auth = format!("Bearer {}", client.api_key());
-    let mut body = serde_json::json!({
-        "model": client.model,
-        "prompt": prompt,
-        "score_ranges": ranges,
-        "logprobs": 1,
-    });
-    if !images.is_empty() {
-        use base64::Engine;
-        let b64 = base64::engine::general_purpose::STANDARD;
-        let uris: Vec<String> = images.iter()
-            .map(|img| format!("data:{};base64,{}", img.mime, b64.encode(&img.bytes)))
-            .collect();
-        body["multi_modal_data"] = serde_json::json!({ "image": uris });
-    }
-    if let Some(p) = priority {
-        body["priority"] = serde_json::json!(p);
-    }
-    let response = http
-        .send_json("POST", &url, &[
-            ("authorization", &auth),
-        ], &body)
-        .await?;

-    let status = response.status();
-    let body: serde_json::Value = response.json().await?;
+    let images_pb = pair_images_to_ranges(prompt, images);
+    let mut handle = SessionHandle::open(client).await?;

-    if !status.is_success() {
-        let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("unknown error");
-        anyhow::bail!("score API HTTP {}: {}", status, msg);
-    }
-    if let Some(err) = body.get("error").and_then(|e| e.as_str()) {
-        anyhow::bail!("score API error: {}", err);
+    // Final Generate: max_tokens=0 so the server runs prefill of the
+    // full prompt and emits Token events for each position covered
+    // by logprobs_ranges, then Done. logprob_top_k=0 means "just
+    // the sampled (prompt) token's logprob" — no top-k alternatives,
+    // which is all call_score historically needed. Images attach
+    // inline via `images`; the prompt already contains their pre-
+    // expanded vision blocks at the declared ranges.
+    let logprobs_ranges: Vec<pb::PositionRange> = ranges.iter()
+        .map(|(s, e)| pb::PositionRange { start: *s as u32, end: *e as u32 })
+        .collect();
+    let req = pb::GenerateRequest {
+        session_id: handle.session_id.clone(),
+        append_tokens: prompt.to_vec(),
+        offset: handle.committed_len,
+        truncating: false,
+        max_tokens: 0,
+        logprobs_ranges,
+        logprob_top_k: 0,
+        readout_ranges: Vec::new(),
+        temperature: 0.0,
+        top_p: 0.0,
+        top_k: 0,
+        stop_token_ids: Vec::new(),
+        priority: priority.unwrap_or(0),
+        images: images_pb,
+    };
+
+    let mut stream = handle.generate(req).await?;
+    let mut totals = vec![0.0f64; ranges.len()];
+    while let Some(event) = stream.next().await {
+        let event = event
+            .map_err(|s| anyhow::anyhow!("score Generate stream: {}", s))?;
+        let Some(inner) = event.event else { continue };
+        match inner {
+            pb::generate_event::Event::Token(t) => {
+                if !t.has_sampled_logprob { continue; }
+                let pos = t.position as usize;
+                for (i, (start, end)) in ranges.iter().enumerate() {
+                    if pos >= *start && pos < *end {
+                        totals[i] += t.sampled_logprob as f64;
+                    }
+                }
+            }
+            pb::generate_event::Event::Done(_) => break,
+        }
    }

-    let result: ScoreResponse = serde_json::from_value(body)
-        .map_err(|e| anyhow::anyhow!("failed to parse score response: {}", e))?;
-    Ok(result.scores)
+    Ok(totals.into_iter()
+        .map(|total_logprob| ScoreResult { total_logprob })
+        .collect())
 }

 /// Compute per-position logprob divergence: how much worse the model
@ -110,7 +158,6 @@ fn divergence(baseline: &[ScoreResult], without: &[ScoreResult]) -> Vec<f64> {

 /// Score two message sets and return total divergence.
 async fn score_divergence<F>(
-    http: &crate::agent::api::http::HttpClient,
    client: &ApiClient,
    context: &ContextState,
    range: std::ops::Range<usize>,
@ -123,9 +170,9 @@ where F: FnMut(&AstNode) -> bool,
        context.wire_prompt(range.clone(), |_| false);
    let (without_tokens, without_images, without_ranges) =
        context.wire_prompt(range, skip);
-    let baseline = call_score(http, client, &baseline_tokens, &baseline_images,
+    let baseline = call_score(client, &baseline_tokens, &baseline_images,
                              &baseline_ranges, priority).await?;
-    let without = call_score(http, client, &without_tokens, &without_images,
+    let without = call_score(client, &without_tokens, &without_images,
                             &without_ranges, priority).await?;
    let divs = divergence(&baseline, &without);
    Ok((divs, baseline))
@ -162,14 +209,13 @@ pub async fn score_memories(
    dbglog!("[scoring-full] starting: {} memories × {} responses",
        total, response_indices.len());

-    let http = http_client();

    let activity = crate::agent::start_activity(agent, "scoring: baseline").await;
    let (baseline_tokens, baseline_images, baseline_ranges) = {
        let ctx = agent.context.lock().await;
        ctx.wire_prompt(0..ctx.conversation().len(), |_| false)
    };
-    let baseline = call_score(&http, client, &baseline_tokens, &baseline_images,
+    let baseline = call_score(client, &baseline_tokens, &baseline_images,
                              &baseline_ranges, Some(5)).await?;
    dbglog!("[scoring-full] baseline done ({} response scores)", baseline.len());

@ -180,7 +226,7 @@ pub async fn score_memories(
            let ctx = agent.context.lock().await;
            ctx.wire_prompt(0..ctx.conversation().len(), |n| memory_key(n) == Some(key.as_str()))
        };
-        let row = match call_score(&http, client, &tokens, &images, &ranges, Some(5)).await {
+        let row = match call_score(client, &tokens, &images, &ranges, Some(5)).await {
            Ok(without) => {
                let divs = divergence(&baseline, &without);
                let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
@ -194,25 +240,23 @@ pub async fn score_memories(
                vec![0.0; baseline.len()]
            }
        };
-        // Write this memory's scores to the live AST nodes
+        // Write this memory's scores to the live AST nodes via the
+        // focused setter — keeps the AST mutation surface narrow.
        {
            let mut ctx = agent.context.lock().await;
            let mut set_count = 0;

            for (resp_idx, &idx) in response_indices.iter().enumerate() {
-                if idx >= ctx.conversation().len() { continue; }
-                let node = &mut ctx.conversation_mut()[idx];
-                if let AstNode::Branch {
-                    role: Role::Assistant, memory_scores, ..
-                } = node {
-                    if let Some(&score) = row.get(resp_idx) {
-                        if score > 0.01 {
-                            memory_scores.insert(key.clone(), score);
-                            set_count += 1;
-                        } else {
-                            memory_scores.remove(key.as_str());
-                        }
-                    }
+                let Some(&score) = row.get(resp_idx) else { continue };
+                let normalized = if score > 0.01 { Some(score) } else { None };
+                ctx.set_branch_memory_score(
+                    crate::agent::context::Section::Conversation,
+                    idx,
+                    &key,
+                    normalized,
+                );
+                if normalized.is_some() {
+                    set_count += 1;
                }
            }

@ -263,8 +307,7 @@ pub async fn score_memory(
        return Ok(0.0);
    }

-    let http = http_client();
-    let (divs, _) = score_divergence(&http, client, context, range,
+    let (divs, _) = score_divergence(client, context, range,
                                     |n| memory_key(n) == Some(key), Some(5)).await?;

    Ok(divs.iter().sum())
@ -322,7 +365,6 @@ where
    // Score oldest-first
    candidates.sort_by_key(|&(_, _, last)| last);

-    let http = http_client();
    let mut scored = 0;

    let entries = context.conversation();
@ -357,7 +399,7 @@ where
        }

        activity.update(format!("scoring: {}/{} {}", scored + 1, total, key)).await;
-        match score_divergence(&http, client, context, range,
+        match score_divergence(client, context, range,
                               |n| memory_key(n) == Some(key), Some(5)).await {
            Ok((divs, _)) => {
                let n_responses = divs.len();
@ -505,8 +547,7 @@ pub async fn score_finetune(
        return Ok(Vec::new());
    }

-    let http = http_client();
-    let (divs, _) = score_divergence(&http, client, context, range, is_memory_node, Some(5)).await?;
+    let (divs, _) = score_divergence(client, context, range, is_memory_node, Some(5)).await?;

    let mut results: Vec<(usize, f64)> = response_positions.iter()
        .enumerate()
@ -804,8 +845,10 @@ pub async fn send_to_train(
        }
    });

-    let http = http_client();
    let url = format!("{}/train", client.base_url());
+    let http = crate::agent::api::http::HttpClient::builder()
+        .timeout(std::time::Duration::from_secs(300))
+        .build();
    let response = http.send_json("POST", &url, &[], &body).await?;

    let status = response.status();
--- a/src/subconscious/prompts.rs
+++ b/src/subconscious/prompts.rs
@ -104,22 +104,21 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
                item.classification, item.outlier_score));
        }

-        if let Some(community) = node.community_id {
-            out.push_str(&format!("Community: {}  ", community));
-        }
-        let deg = graph.degree(&item.key);
-        let cc = graph.clustering_coefficient(&item.key);
+		if let Some(community) = node.community_id {
+			out.push_str(&format!("Community: {}  ", community));
+		}
+		let deg = graph.degree(&item.key);

-        // Hub-link ratio: what fraction of this node's edges go to hubs?
-        let neighbors = graph.neighbors(&item.key);
+		// Hub-link ratio: what fraction of this node's edges go to hubs?
+		let neighbors = graph.neighbors(&item.key);
        let hub_links = neighbors.iter()
            .filter(|(n, _)| graph.degree(n) >= hub_thresh)
            .count();
        let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
-        let is_hub = deg >= hub_thresh;
+		let is_hub = deg >= hub_thresh;

-        out.push_str(&format!("Degree: {}  CC: {:.3}  Hub-link ratio: {:.0}% ({}/{})",
-            deg, cc, hub_ratio * 100.0, hub_links, deg));
+		out.push_str(&format!("Degree: {}  CC: {:.3}  Hub-link ratio: {:.0}% ({}/{})",
+			deg, item.cc, hub_ratio * 100.0, hub_links, deg));
        if is_hub {
            out.push_str("  ← THIS IS A HUB");
        } else if hub_ratio > 0.6 {
--- a/src/user/context.rs
+++ b/src/user/context.rs
@ -43,6 +43,7 @@ impl ConsciousScreen {
                        name: format!("mem: {}", key),
                        tokens: node.tokens(),
                        content: text.clone(),
+                        token_ids: leaf.token_ids().to_vec(),
                        children: Vec::new(),
                        status: score.map(|s| format!("{:.2}", s)).unwrap_or_default(),
                    });
@ -55,6 +56,7 @@ impl ConsciousScreen {
                name: format!("Memory nodes ({})", mem_children.len()),
                tokens: mem_tokens,
                content: String::new(),
+                token_ids: Vec::new(),
                children: mem_children,
                status: format!("{} scored, {} unscored", scored, unscored),
            });
@ -70,11 +72,13 @@ impl ConsciousScreen {
                    AstNode::Leaf(leaf) => leaf.body().text().to_string(),
                    _ => String::new(),
                },
+                token_ids: node.token_ids(),
                children: match node {
                    AstNode::Branch { children, .. } => children.iter()
                        .map(|c| SectionView {
                            name: c.label(), tokens: c.tokens(),
                            content: match c { AstNode::Leaf(l) => l.body().text().to_string(), _ => String::new() },
+                            token_ids: match c { AstNode::Leaf(l) => l.token_ids().to_vec(), _ => c.token_ids() },
                            children: Vec::new(), status: String::new(),
                        }).collect(),
                    _ => Vec::new(),
@ -101,6 +105,7 @@ impl ConsciousScreen {
            name: format!("Conversation ({} entries)", conv_children.len()),
            tokens: conv_tokens,
            content: String::new(),
+            token_ids: Vec::new(),
            children: conv_children,
            status: String::new(),
        });
--- a/src/user/mod.rs
+++ b/src/user/mod.rs
@ -74,7 +74,7 @@ fn truncate(s: &str, max: usize) -> String {
 }

 /// A screen that can draw itself and handle input.
-trait ScreenView: Send {
+trait ScreenView {
    fn tick(&mut self, frame: &mut ratatui::Frame, area: ratatui::layout::Rect,
            events: &[ratatui::crossterm::event::Event], app: &mut App);
    fn label(&self) -> &'static str;
@ -291,22 +291,21 @@ async fn start(cli: crate::user::CliArgs) -> Result<()> {
    ui_handle.join().unwrap_or_else(|_| Err(anyhow::anyhow!("UI thread panicked")))
 }

-fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) {
-    if let Ok(mut ag) = mind.agent.state.try_lock() {
-        let next = match ag.reasoning_effort.as_str() {
-            "none" => "low",
-            "low" => "high",
-            _ => "none",
-        };
-        ag.reasoning_effort = next.to_string();
-        let label = match next {
-            "none" => "off (monologue hidden)",
-            "low" => "low (brief monologue)",
-            "high" => "high (full monologue)",
-            _ => next,
-        };
-        ag.notify(format!("reasoning: {}", label));
-    }
+async fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) {
+    let mut ag = mind.agent.state.lock().await;
+    let next = match ag.reasoning_effort.as_str() {
+        "none" => "low",
+        "low" => "high",
+        _ => "none",
+    };
+    ag.reasoning_effort = next.to_string();
+    let label = match next {
+        "none" => "off (monologue hidden)",
+        "low" => "low (brief monologue)",
+        "high" => "high (full monologue)",
+        _ => next,
+    };
+    ag.notify(format!("reasoning: {}", label));
 }

 async fn hotkey_kill_processes(mind: &crate::mind::Mind) {
@ -592,7 +591,7 @@ async fn run(
                    } else if key.modifiers.contains(KeyModifiers::CONTROL) {
                        match key.code {
                            KeyCode::Char('c') => { app.should_quit = true; }
-                            KeyCode::Char('r') => hotkey_cycle_reasoning(mind),
+                            KeyCode::Char('r') => hotkey_cycle_reasoning(mind).await,
                            KeyCode::Char('k') => hotkey_kill_processes(mind).await,
                            KeyCode::Char('p') => hotkey_cycle_autonomy(mind),
                            _ => {}
@ -756,6 +755,11 @@ fn restore_stderr(original_fd: std::os::fd::RawFd) {

 #[tokio::main]
 pub async fn main() {
+    // Install target-routed file logger: `target: "grpc"` records go to
+    // ~/.consciousness/logs/daemon/grpc.log, everything else to debug.log.
+    // Level from RUST_LOG, defaulting to info.
+    let _ = crate::logging::init();
+
    // Reap channel-daemon zombies via a SIGCHLD handler that only touches
    // PIDs listed in channels_dir(). Avoids SIGCHLD=SIG_IGN, which would
    // break tokio::process::Command::wait() (kernel auto-reap → ECHILD).
--- a/src/user/subconscious.rs
+++ b/src/user/subconscious.rs
@ -207,6 +207,7 @@ impl SubconsciousScreen {
                name: key.clone(),
                tokens: 0,
                content: val.clone(),
+                token_ids: Vec::new(),
                children: Vec::new(),
                status: String::new(),
            }
@ -238,6 +239,7 @@ impl SubconsciousScreen {
                    name: format!("Conversation ({} entries)", conv_children.len()),
                    tokens: conv_children.iter().map(|c| c.tokens).sum(),
                    content: String::new(),
+                    token_ids: Vec::new(),
                    children: conv_children,
                    status: String::new(),
                });
--- a/src/user/widgets.rs
+++ b/src/user/widgets.rs
@ -8,11 +8,18 @@ use ratatui::{
 };
 use crate::agent::context::{AstNode, Ast, NodeBody};

-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub struct SectionView {
    pub name: String,
    pub tokens: usize,
    pub content: String,
+    /// Token-id stream for this subtree, displayed in place of
+    /// `content` when the tree's show-tokens mode is on. Populated
+    /// from `leaf.token_ids()` / `node.token_ids()` for views built
+    /// from the AST; empty for views that don't have a corresponding
+    /// AST node (subconscious entries, etc.), in which case the
+    /// token view falls back to the text content.
+    pub token_ids: Vec<u32>,
    pub children: Vec<SectionView>,
    /// Extra status text shown after the token count.
    pub status: String,
@ -32,6 +39,7 @@ fn node_to_view(node: &AstNode) -> SectionView {
                name,
                tokens: node.tokens(),
                content: leaf.body().text().to_string(),
+                token_ids: leaf.token_ids().to_vec(),
                children: Vec::new(),
                status,
            }
@ -44,6 +52,7 @@ fn node_to_view(node: &AstNode) -> SectionView {
                name: node.label(),
                tokens: node.tokens(),
                content: String::new(),
+                token_ids: node.token_ids(),
                children: child_views,
                status: String::new(),
            }
@ -54,10 +63,12 @@ fn node_to_view(node: &AstNode) -> SectionView {
 pub fn section_to_view(name: &str, nodes: &[AstNode]) -> SectionView {
    let children: Vec<SectionView> = nodes.iter().map(|n| node_to_view(n)).collect();
    let total_tokens: usize = nodes.iter().map(|n| n.tokens()).sum();
+    let token_ids: Vec<u32> = nodes.iter().flat_map(|n| n.token_ids()).collect();
    SectionView {
        name: name.to_string(),
        tokens: total_tokens,
        content: String::new(),
+        token_ids,
        children,
        status: String::new(),
    }
@ -104,7 +115,7 @@ pub fn format_ts_age(ts: i64) -> String {
 /// Key legend for SectionTree panes.
 pub fn tree_legend() -> Line<'static> {
    Line::styled(
-        " ↑↓:nav  →/Enter:expand  ←:collapse  e:expand all  c:collapse all  PgUp/Dn  Home/End ",
+        " ↑↓:nav  →/Enter:expand  ←:collapse  e:expand  c:collapse  v:toggle tokens/text  PgUp/Dn ",
        Style::default().fg(Color::DarkGray),
    )
 }
@ -185,11 +196,19 @@ pub struct SectionTree {
    pub selected: Option<usize>,
    pub expanded: std::collections::HashSet<usize>,
    pub scroll: super::scroll_pane::ScrollPaneState,
+    /// When true, render `token_ids` as space-separated IDs in place
+    /// of `content` in expanded panels. Toggled with 'v'.
+    pub show_tokens: bool,
 }

 impl SectionTree {
    pub fn new() -> Self {
-        Self { selected: None, expanded: std::collections::HashSet::new(), scroll: super::scroll_pane::ScrollPaneState::new() }
+        Self {
+            selected: None,
+            expanded: std::collections::HashSet::new(),
+            scroll: super::scroll_pane::ScrollPaneState::new(),
+            show_tokens: false,
+        }
    }

    fn total_nodes(&self, sections: &[SectionView]) -> usize {
@ -264,6 +283,9 @@ impl SectionTree {
            KeyCode::Char('c') => {
                self.expanded.clear();
            }
+            KeyCode::Char('v') => {
+                self.show_tokens = !self.show_tokens;
+            }
            _ => {}
        }
        self.scroll_to_selected(height);
@ -326,7 +348,12 @@ impl SectionTree {
                }
            } else if has_content {
                let content_indent = format!("{}    │ ", "  ".repeat(depth + 1));
-                let content_lines: Vec<&str> = section.content.lines().collect();
+                let body = if self.show_tokens && !section.token_ids.is_empty() {
+                    format_token_ids_wrapped(&section.token_ids)
+                } else {
+                    section.content.clone()
+                };
+                let content_lines: Vec<&str> = body.lines().collect();
                let show = content_lines.len().min(50);
                for line in &content_lines[..show] {
                    lines.push(Line::styled(
@ -344,3 +371,16 @@ impl SectionTree {
        }
    }
 }
+
+/// Format token IDs for the content panel: space-separated, wrapped
+/// at 12 ids per line so they fit comfortably in a pane.
+fn format_token_ids_wrapped(ids: &[u32]) -> String {
+    let mut out = String::new();
+    for (i, id) in ids.iter().enumerate() {
+        if i > 0 {
+            if i % 12 == 0 { out.push('\n'); } else { out.push(' '); }
+        }
+        out.push_str(&id.to_string());
+    }
+    out
+}