49 changed files with 1180 additions and 4680 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -165,39 +165,6 @@ dependencies = [
 "tree-sitter-yaml",
 ]

-[[package]]
-name = "async-stream"
-version = "0.3.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
-dependencies = [
- "async-stream-impl",
- "futures-core",
- "pin-project-lite",
-]
-
-[[package]]
-name = "async-stream-impl"
-version = "0.3.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.117",
-]
-
-[[package]]
-name = "async-trait"
-version = "0.1.89"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.117",
-]
-
 [[package]]
 name = "atomic"
 version = "0.6.1"
@ -241,53 +208,6 @@ dependencies = [
 "fs_extra",
 ]

-[[package]]
-name = "axum"
-version = "0.7.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
-dependencies = [
- "async-trait",
- "axum-core",
- "bytes",
- "futures-util",
- "http",
- "http-body",
- "http-body-util",
- "itoa",
- "matchit",
- "memchr",
- "mime",
- "percent-encoding",
- "pin-project-lite",
- "rustversion",
- "serde",
- "sync_wrapper",
- "tower 0.5.3",
- "tower-layer",
- "tower-service",
-]
-
-[[package]]
-name = "axum-core"
-version = "0.4.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
-dependencies = [
- "async-trait",
- "bytes",
- "futures-util",
- "http",
- "http-body",
- "http-body-util",
- "mime",
- "pin-project-lite",
- "rustversion",
- "sync_wrapper",
- "tower-layer",
- "tower-service",
-]
-
 [[package]]
 name = "base64"
 version = "0.13.1"
@ -571,7 +491,6 @@ dependencies = [
 "anyhow",
 "ast-grep-core",
 "ast-grep-language",
- "async-stream",
 "base64 0.22.1",
 "bytes",
 "capnp",
@ -599,14 +518,11 @@ dependencies = [
 "notify-debouncer-mini",
 "paste",
 "peg",
- "prost",
- "protoc-bin-vendored",
 "ratatui",
 "redb",
 "regex",
 "rustls",
 "rustls-native-certs",
- "rustls-pemfile",
 "serde",
 "serde_json",
 "serde_urlencoded",
@ -615,10 +531,7 @@ dependencies = [
 "tokenizers",
 "tokio",
 "tokio-rustls",
- "tokio-stream",
 "tokio-util",
- "tonic",
- "tonic-build",
 "tui-markdown",
 "tui-textarea-2",
 "uuid",
@ -1151,12 +1064,6 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"

-[[package]]
-name = "fixedbitset"
-version = "0.5.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
-
 [[package]]
 name = "flate2"
 version = "1.1.9"
@ -1381,31 +1288,6 @@ dependencies = [
 "regex-syntax",
 ]

-[[package]]
-name = "h2"
-version = "0.4.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
-dependencies = [
- "atomic-waker",
- "bytes",
- "fnv",
- "futures-core",
- "futures-sink",
- "http",
- "indexmap 2.14.0",
- "slab",
- "tokio",
- "tokio-util",
- "tracing",
-]
-
-[[package]]
-name = "hashbrown"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
-
 [[package]]
 name = "hashbrown"
 version = "0.15.5"
@ -1511,12 +1393,6 @@ version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"

-[[package]]
-name = "httpdate"
-version = "1.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
-
 [[package]]
 name = "hyper"
 version = "1.9.0"
@ -1527,11 +1403,9 @@ dependencies = [
 "bytes",
 "futures-channel",
 "futures-core",
- "h2",
 "http",
 "http-body",
 "httparse",
- "httpdate",
 "itoa",
 "pin-project-lite",
 "smallvec",
@ -1539,19 +1413,6 @@ dependencies = [
 "want",
 ]

-[[package]]
-name = "hyper-timeout"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
-dependencies = [
- "hyper",
- "hyper-util",
- "pin-project-lite",
- "tokio",
- "tower-service",
-]
-
 [[package]]
 name = "hyper-util"
 version = "0.1.20"
@ -1559,17 +1420,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
 dependencies = [
 "bytes",
- "futures-channel",
- "futures-util",
 "http",
 "http-body",
 "hyper",
- "libc",
 "pin-project-lite",
- "socket2 0.6.3",
 "tokio",
- "tower-service",
- "tracing",
 ]

 [[package]]
@ -1630,16 +1485,6 @@ version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09e54e57b4c48b40f7aec75635392b12b3421fa26fe8b4332e63138ed278459c"

-[[package]]
-name = "indexmap"
-version = "1.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
-dependencies = [
- "autocfg",
- "hashbrown 0.12.3",
-]
-
 [[package]]
 name = "indexmap"
 version = "2.14.0"
@ -2013,12 +1858,6 @@ dependencies = [
 "xml5ever",
 ]

-[[package]]
-name = "matchit"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
-
 [[package]]
 name = "memchr"
 version = "2.8.0"
@ -2049,12 +1888,6 @@ dependencies = [
 "autocfg",
 ]

-[[package]]
-name = "mime"
-version = "0.3.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
-
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@ -2105,12 +1938,6 @@ dependencies = [
 "syn 2.0.117",
 ]

-[[package]]
-name = "multimap"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
-
 [[package]]
 name = "new_debug_unreachable"
 version = "1.0.6"
@ -2406,16 +2233,6 @@ dependencies = [
 "sha2",
 ]

-[[package]]
-name = "petgraph"
-version = "0.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
-dependencies = [
- "fixedbitset 0.5.7",
- "indexmap 2.14.0",
-]
-
 [[package]]
 name = "phf"
 version = "0.11.3"
@ -2468,26 +2285,6 @@ dependencies = [
 "siphasher",
 ]

-[[package]]
-name = "pin-project"
-version = "1.1.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517"
-dependencies = [
- "pin-project-internal",
-]
-
-[[package]]
-name = "pin-project-internal"
-version = "1.1.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.117",
-]
-
 [[package]]
 name = "pin-project-lite"
 version = "0.2.17"
@ -2507,7 +2304,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07"
 dependencies = [
 "base64 0.22.1",
- "indexmap 2.14.0",
+ "indexmap",
 "quick-xml",
 "serde",
 "time",
@ -2581,122 +2378,6 @@ dependencies = [
 "yansi",
 ]

-[[package]]
-name = "prost"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
-dependencies = [
- "bytes",
- "prost-derive",
-]
-
-[[package]]
-name = "prost-build"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
-dependencies = [
- "heck",
- "itertools",
- "log",
- "multimap",
- "once_cell",
- "petgraph",
- "prettyplease",
- "prost",
- "prost-types",
- "regex",
- "syn 2.0.117",
- "tempfile",
-]
-
-[[package]]
-name = "prost-derive"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
-dependencies = [
- "anyhow",
- "itertools",
- "proc-macro2",
- "quote",
- "syn 2.0.117",
-]
-
-[[package]]
-name = "prost-types"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
-dependencies = [
- "prost",
-]
-
-[[package]]
-name = "protoc-bin-vendored"
-version = "3.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1c381df33c98266b5f08186583660090a4ffa0889e76c7e9a5e175f645a67fa"
-dependencies = [
- "protoc-bin-vendored-linux-aarch_64",
- "protoc-bin-vendored-linux-ppcle_64",
- "protoc-bin-vendored-linux-s390_64",
- "protoc-bin-vendored-linux-x86_32",
- "protoc-bin-vendored-linux-x86_64",
- "protoc-bin-vendored-macos-aarch_64",
- "protoc-bin-vendored-macos-x86_64",
- "protoc-bin-vendored-win32",
-]
-
-[[package]]
-name = "protoc-bin-vendored-linux-aarch_64"
-version = "3.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c350df4d49b5b9e3ca79f7e646fde2377b199e13cfa87320308397e1f37e1a4c"
-
-[[package]]
-name = "protoc-bin-vendored-linux-ppcle_64"
-version = "3.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a55a63e6c7244f19b5c6393f025017eb5d793fd5467823a099740a7a4222440c"
-
-[[package]]
-name = "protoc-bin-vendored-linux-s390_64"
-version = "3.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1dba5565db4288e935d5330a07c264a4ee8e4a5b4a4e6f4e83fad824cc32f3b0"
-
-[[package]]
-name = "protoc-bin-vendored-linux-x86_32"
-version = "3.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8854774b24ee28b7868cd71dccaae8e02a2365e67a4a87a6cd11ee6cdbdf9cf5"
-
-[[package]]
-name = "protoc-bin-vendored-linux-x86_64"
-version = "3.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b38b07546580df720fa464ce124c4b03630a6fb83e05c336fea2a241df7e5d78"
-
-[[package]]
-name = "protoc-bin-vendored-macos-aarch_64"
-version = "3.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89278a9926ce312e51f1d999fee8825d324d603213344a9a706daa009f1d8092"
-
-[[package]]
-name = "protoc-bin-vendored-macos-x86_64"
-version = "3.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81745feda7ccfb9471d7a4de888f0652e806d5795b61480605d4943176299756"
-
-[[package]]
-name = "protoc-bin-vendored-win32"
-version = "3.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95067976aca6421a523e491fce939a3e65249bac4b977adee0ee9771568e8aa3"
-
 [[package]]
 name = "pulldown-cmark"
 version = "0.13.3"
@ -2752,8 +2433,6 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
 dependencies = [
- "libc",
- "rand_chacha 0.3.1",
 "rand_core 0.6.4",
 ]

@ -2763,20 +2442,10 @@ version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
 dependencies = [
- "rand_chacha 0.9.0",
+ "rand_chacha",
 "rand_core 0.9.5",
 ]

-[[package]]
-name = "rand_chacha"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
-dependencies = [
- "ppv-lite86",
- "rand_core 0.6.4",
-]
-
 [[package]]
 name = "rand_chacha"
 version = "0.9.0"
@ -2792,9 +2461,6 @@ name = "rand_core"
 version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
-dependencies = [
- "getrandom 0.2.17",
-]

 [[package]]
 name = "rand_core"
@ -3043,15 +2709,6 @@ dependencies = [
 "security-framework",
 ]

-[[package]]
-name = "rustls-pemfile"
-version = "2.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
-dependencies = [
- "rustls-pki-types",
-]
-
 [[package]]
 name = "rustls-pki-types"
 version = "1.14.0"
@ -3174,7 +2831,7 @@ version = "1.0.149"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
 dependencies = [
- "indexmap 2.14.0",
+ "indexmap",
 "itoa",
 "memchr",
 "serde",
@ -3278,16 +2935,6 @@ version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"

-[[package]]
-name = "socket2"
-version = "0.5.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
-dependencies = [
- "libc",
- "windows-sys 0.52.0",
-]
-
 [[package]]
 name = "socket2"
 version = "0.6.3"
@ -3402,12 +3049,6 @@ dependencies = [
 "unicode-ident",
 ]

-[[package]]
-name = "sync_wrapper"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
-
 [[package]]
 name = "syntect"
 version = "5.3.0"
@ -3486,7 +3127,7 @@ dependencies = [
 "fancy-regex",
 "filedescriptor",
 "finl_unicode",
- "fixedbitset 0.4.2",
+ "fixedbitset",
 "hex",
 "lazy_static",
 "libc",
@ -3646,7 +3287,7 @@ dependencies = [
 "parking_lot",
 "pin-project-lite",
 "signal-hook-registry",
- "socket2 0.6.3",
+ "socket2",
 "tokio-macros",
 "windows-sys 0.61.2",
 ]
@ -3672,17 +3313,6 @@ dependencies = [
 "tokio",
 ]

-[[package]]
-name = "tokio-stream"
-version = "0.1.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
-dependencies = [
- "futures-core",
- "pin-project-lite",
- "tokio",
-]
-
 [[package]]
 name = "tokio-util"
 version = "0.7.18"
@ -3697,130 +3327,6 @@ dependencies = [
 "tokio",
 ]

-[[package]]
-name = "tonic"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
-dependencies = [
- "async-stream",
- "async-trait",
- "axum",
- "base64 0.22.1",
- "bytes",
- "h2",
- "http",
- "http-body",
- "http-body-util",
- "hyper",
- "hyper-timeout",
- "hyper-util",
- "percent-encoding",
- "pin-project",
- "prost",
- "rustls-native-certs",
- "rustls-pemfile",
- "socket2 0.5.10",
- "tokio",
- "tokio-rustls",
- "tokio-stream",
- "tower 0.4.13",
- "tower-layer",
- "tower-service",
- "tracing",
-]
-
-[[package]]
-name = "tonic-build"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11"
-dependencies = [
- "prettyplease",
- "proc-macro2",
- "prost-build",
- "prost-types",
- "quote",
- "syn 2.0.117",
-]
-
-[[package]]
-name = "tower"
-version = "0.4.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
-dependencies = [
- "futures-core",
- "futures-util",
- "indexmap 1.9.3",
- "pin-project",
- "pin-project-lite",
- "rand 0.8.5",
- "slab",
- "tokio",
- "tokio-util",
- "tower-layer",
- "tower-service",
- "tracing",
-]
-
-[[package]]
-name = "tower"
-version = "0.5.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
-dependencies = [
- "futures-core",
- "futures-util",
- "pin-project-lite",
- "sync_wrapper",
- "tower-layer",
- "tower-service",
-]
-
-[[package]]
-name = "tower-layer"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
-
-[[package]]
-name = "tower-service"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
-
-[[package]]
-name = "tracing"
-version = "0.1.44"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
-dependencies = [
- "pin-project-lite",
- "tracing-attributes",
- "tracing-core",
-]
-
-[[package]]
-name = "tracing-attributes"
-version = "0.1.31"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.117",
-]
-
-[[package]]
-name = "tracing-core"
-version = "0.1.36"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
-dependencies = [
- "once_cell",
-]
-
 [[package]]
 name = "tree-sitter"
 version = "0.26.8"
@ -4379,7 +3885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
 dependencies = [
 "anyhow",
- "indexmap 2.14.0",
+ "indexmap",
 "wasm-encoder",
 "wasmparser",
 ]
@ -4392,7 +3898,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
 dependencies = [
 "bitflags 2.11.0",
 "hashbrown 0.15.5",
- "indexmap 2.14.0",
+ "indexmap",
 "semver",
 ]

@ -4761,7 +4267,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
 dependencies = [
 "anyhow",
 "heck",
- "indexmap 2.14.0",
+ "indexmap",
 "prettyplease",
 "syn 2.0.117",
 "wasm-metadata",
@ -4792,7 +4298,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
 dependencies = [
 "anyhow",
 "bitflags 2.11.0",
- "indexmap 2.14.0",
+ "indexmap",
 "log",
 "serde",
 "serde_derive",
@ -4811,7 +4317,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
 dependencies = [
 "anyhow",
 "id-arena",
- "indexmap 2.14.0",
+ "indexmap",
 "log",
 "semver",
 "serde",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -18,9 +18,6 @@ name = "consciousness"
 version.workspace = true
 edition.workspace = true

-[features]
-nightly-diagnostics = []
-
 [dependencies]
 anyhow = "1"
 html2md = "0.2"
@ -64,11 +61,6 @@ futures = "0.3"
 capnp = "0.25"
 capnp-rpc = "0.25"

-tonic = { version = "0.12", features = ["tls", "tls-roots"] }
-prost = "0.13"
-async-stream = "0.3"
-tokio-stream = "0.1"
-
 tokenizers = "0.22"

 http = "1"
@ -82,13 +74,10 @@ imagesize = "0.14"
 rustls = "0.23"
 tokio-rustls = "0.26"
 rustls-native-certs = "0.8"
-rustls-pemfile = "2"
 serde_urlencoded = "0.7"

 [build-dependencies]
 capnpc = "0.25"
-tonic-build = { version = "0.12", default-features = false, features = ["prost", "transport"] }
-protoc-bin-vendored = "3"

 [lib]
 name = "consciousness"
--- a/build.rs
+++ b/build.rs
@ -13,21 +13,4 @@ fn main() {
        .file("schema/channel.capnp")
        .run()
        .expect("capnp compile failed (channel.capnp)");
-
-    // Generate salience.v1 gRPC client + message types from proto.
-    // Server side (python) is generated separately via grpcio-tools.
-    // Use vendored protoc so we don't require a system install.
-    let protoc = protoc_bin_vendored::protoc_bin_path()
-        .expect("vendored protoc not available for this platform");
-    // SAFETY: build script is single-threaded at this point; setting env
-    // before invoking tonic_build is the documented way to point it at a
-    // non-PATH protoc.
-    unsafe { std::env::set_var("PROTOC", protoc); }
-    tonic_build::configure()
-        .build_server(false)
-        .build_client(true)
-        .compile_protos(&["proto/salience.proto"], &["proto"])
-        .expect("tonic_build compile failed (salience.proto)");
-
-    println!("cargo:rerun-if-changed=proto/salience.proto");
 }
--- a/channels/telegram/src/main.rs
+++ b/channels/telegram/src/main.rs
@ -181,8 +181,6 @@ struct TelegramMessage {
    chat_id: i64,
    sender: String,
    text: String,
-    /// Absolute path to a downloaded media file (photo, etc.), if any.
-    media_path: Option<String>,
 }

 /// Fetch and parse pending updates from Telegram via long polling.
@ -208,115 +206,19 @@ async fn get_updates(
            let sender = msg["from"]["first_name"].as_str().unwrap_or("unknown").to_string();
            let chat_id = msg["chat"]["id"].as_i64().unwrap_or(0);

-            // Photo: array of PhotoSize, largest is last. Download largest,
-            // surface message with [image: <path>] marker so the multimodal
-            // model can Read the image.
-            let (text, media_path) = if let Some(sizes) = msg["photo"].as_array() {
-                let caption = msg["caption"].as_str().unwrap_or("").to_string();
-                let largest = sizes.last();
-                let file_id = largest
-                    .and_then(|s| s["file_id"].as_str())
-                    .unwrap_or("");
-                if file_id.is_empty() {
-                    error!("telegram photo: missing file_id in update {update_id}");
-                    (caption, None)
-                } else {
-                    // Bound the download — HttpClient::request_timeout only covers
-                    // send_request, not body collect, so an indefinitely-slow body
-                    // would otherwise stall every subsequent poll.
-                    let dl = tokio::time::timeout(
-                        std::time::Duration::from_secs(60),
-                        download_telegram_file(client, token, file_id),
-                    ).await
-                        .unwrap_or_else(|_| Err("download timed out after 60s".into()));
-                    match dl {
-                        Ok(path) => (caption, Some(path)),
-                        Err(e) => {
-                            error!("telegram photo download failed (file_id={file_id}): {e}");
-                            // Surface what we have: caption plus a marker that
-                            // a photo was sent but couldn't be fetched.
-                            let marker = format!("[image: download failed: {e}]");
-                            let combined = if caption.is_empty() {
-                                marker
-                            } else {
-                                format!("{marker}\n{caption}")
-                            };
-                            (combined, None)
-                        }
-                    }
-                }
-            } else if let Some(text) = msg["text"].as_str() {
-                (text.to_string(), None)
-            } else {
-                // Other media types (voice, video, sticker, etc.) — skip for now,
-                // but log so we can extend later.
-                let kind = ["voice", "video", "sticker", "document", "audio", "animation"]
-                    .iter()
-                    .find(|k| !msg[**k].is_null())
-                    .copied()
-                    .unwrap_or("unknown");
-                info!("telegram: skipping non-text/photo message (kind={kind}, update_id={update_id})");
-                continue;
-            };
-
+            if let Some(text) = msg["text"].as_str() {
                messages.push(TelegramMessage {
                    update_id,
                    chat_id,
                    sender,
-                text,
-                media_path,
+                    text: text.to_string(),
                });
            }
        }
+    }
    Ok(messages)
 }

-/// Resolve a Telegram file_id to a downloadable URL path via getFile.
-async fn get_file_path(
-    client: &HttpClient,
-    token: &str,
-    file_id: &str,
-) -> Result<String, Box<dyn std::error::Error>> {
-    let url = format!(
-        "https://api.telegram.org/bot{}/getFile?file_id={}",
-        token, file_id,
-    );
-    let response = client.get(&url).await?;
-    let body = response.text().await?;
-    let resp: serde_json::Value = serde_json::from_str(&body)
-        .map_err(|e| format!("getFile JSON parse error: {e}"))?;
-    if !resp["ok"].as_bool().unwrap_or(false) {
-        return Err(format!("getFile failed: {}", resp["description"].as_str().unwrap_or("?")).into());
-    }
-    let file_path = resp["result"]["file_path"].as_str()
-        .ok_or("getFile: missing result.file_path")?;
-    Ok(file_path.to_string())
-}
-
-/// Download a Telegram file by file_id into the channel media dir.
-/// Returns the absolute local path on success.
-async fn download_telegram_file(
-    client: &HttpClient,
-    token: &str,
-    file_id: &str,
-) -> Result<String, Box<dyn std::error::Error>> {
-    let file_path = get_file_path(client, token, file_id).await?;
-    let url = format!("https://api.telegram.org/file/bot{}/{}", token, file_path);
-    let response = client.get(&url).await?;
-    let status = response.status();
-    if !status.is_success() {
-        return Err(format!("file download failed: {status}").into());
-    }
-    let bytes = response.bytes().await?;
-
-    let ext = file_path.rsplit('.').next().filter(|e| !e.contains('/')).unwrap_or("dat");
-    let media_dir = log_dir().join("media");
-    std::fs::create_dir_all(&media_dir)?;
-    let dest = media_dir.join(format!("{file_id}.{ext}"));
-    std::fs::write(&dest, &bytes)?;
-    Ok(dest.to_string_lossy().to_string())
-}
-
 /// Send a text message to a Telegram chat.
 async fn send_message(
    client: &HttpClient,
@ -467,19 +369,11 @@ async fn poll_once(
        let sender_lower = msg.sender.to_lowercase();
        let channel = format!("telegram.{}", sender_lower);

-        // If the message has media, prepend an [image: <abs_path>] marker
-        // so the multimodal model can Read the file directly.
-        let body = match &msg.media_path {
-            Some(path) if msg.text.is_empty() => format!("[image: {path}]"),
-            Some(path) => format!("[image: {path}]\n{}", msg.text),
-            None => msg.text.clone(),
-        };
-
-        channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &body);
+        channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &msg.text);

        let mut s = state.borrow_mut();
        s.config.chat_ids.insert(sender_lower, msg.chat_id);
-        let line = format!("[{}] {}", msg.sender, body);
+        let line = format!("[{}] {}", msg.sender, msg.text);
        s.push_message(line, 2, &channel);
    }

--- a/channels/tmux/src/main.rs
+++ b/channels/tmux/src/main.rs
@ -26,12 +26,10 @@ use consciousness::thalamus::channel_log::ChannelLog;

 #[derive(Clone, serde::Serialize, serde::Deserialize)]
 struct PaneConfig {
-    /// Human-readable label: becomes the channel name "tmux.<label>",
-    /// and the tmux pane title / window name the live pane id is
-    /// resolved from. The pane id is deliberately not stored — it is
-    /// ephemeral (recycled across pane and tmux-server restarts), so it
-    /// is looked up fresh on every connect attempt.
+    /// Human-readable label, becomes the channel name "tmux.<label>"
    label: String,
+    /// Tmux pane ID, e.g. "%5"
+    pane_id: String,
 }

 #[derive(Clone, serde::Serialize, serde::Deserialize)]
@ -88,9 +86,11 @@ impl State {
        }
    }

-    /// Whether a pane with this label is registered.
-    fn has_pane(&self, label: &str) -> bool {
-        self.config.panes.iter().any(|p| p.label == label)
+    /// Get pane_id for a label
+    fn get_pane(&self, label: &str) -> Option<&str> {
+        self.config.panes.iter()
+            .find(|p| p.label == label)
+            .map(|p| p.pane_id.as_str())
    }

    /// Check if a pane is connected
@ -103,124 +103,98 @@ impl State {
        self.connected.insert(label.to_string(), connected);
    }

-    /// Register a pane and persist.
-    fn add_pane(&mut self, label: String) {
+    /// Add a pane and persist
+    fn add_pane(&mut self, label: String, pane_id: String) {
        if !self.config.panes.iter().any(|p| p.label == label) {
-            self.config.panes.push(PaneConfig { label });
+            self.config.panes.push(PaneConfig { label, pane_id });
            save_config(&self.config);
        }
    }

-    /// Unregister a pane and persist. Returns whether it was registered.
-    fn remove_pane(&mut self, label: &str) -> bool {
+    /// Remove a pane and persist
+    fn remove_pane(&mut self, label: &str) -> Option<String> {
        if let Some(idx) = self.config.panes.iter().position(|p| p.label == label) {
-            self.config.panes.remove(idx);
+            let pane = self.config.panes.remove(idx);
            self.connected.remove(label);
            save_config(&self.config);
-            true
+            Some(pane.pane_id)
        } else {
-            false
+            None
        }
    }
 }

 // ── Pipe-Pane Reader ──────────────────────────────────────────

-/// Wait between connect attempts for a pane that is not yet reachable.
-const RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_secs(2);
-
-/// Keep a pane streamed into its channel log for as long as it stays
-/// registered. The pane id is resolved fresh by label on every connect
-/// attempt — tmux pane ids are ephemeral, so the label (pane title /
-/// window name) is the durable identity. Retries until the pane exists
-/// and pipe-pane succeeds, and reconnects the same way if the pipe
-/// later drops. Returns once close() unregisters the pane.
-async fn pipe_pane_reader(state: SharedState, label: String) {
+/// Set up pipe-pane for a single pane, reading output into the channel log.
+async fn pipe_pane_reader(state: SharedState, pane: PaneConfig) {
    let pipe_dir = dirs::home_dir()
        .unwrap_or_default()
        .join(".consciousness/channels/tmux-pipes");
    std::fs::create_dir_all(&pipe_dir).ok();
-    let pipe_path = pipe_dir.join(format!("{}.pipe", label));
-    let channel_key = format!("tmux.{}", label);

-    loop {
-        if !state.borrow().has_pane(&label) {
-            return;
-        }
+    let pipe_path = pipe_dir.join(format!("{}.pipe", pane.label));
+    let _ = std::fs::remove_file(&pipe_path);

-        connect_and_stream(&state, &label, &pipe_path, &channel_key).await;
-        state.borrow_mut().set_connected(&label, false);
-
-        if !state.borrow().has_pane(&label) {
-            return;
-        }
-        tokio::time::sleep(RETRY_INTERVAL).await;
-    }
-}
-
-/// One connect attempt: resolve the pane's live id by label, point its
-/// output at the FIFO with pipe-pane, and stream lines into the channel
-/// log. Returns on the first failure, or when the stream ends.
-async fn connect_and_stream(
-    state: &SharedState,
-    label: &str,
-    pipe_path: &std::path::Path,
-    channel_key: &str,
-) {
-    let pane_id = match find_pane_by_name(label) {
-        Some(id) => id,
-        None => return,
-    };
-
-    // Fresh FIFO for this attempt.
-    let _ = std::fs::remove_file(pipe_path);
+    // Create a named pipe (FIFO)
    unsafe {
        let c_path = std::ffi::CString::new(pipe_path.to_str().unwrap()).unwrap();
        libc::mkfifo(c_path.as_ptr(), 0o644);
    }

-    // Point the pane's output at our FIFO.
-    let pipe_cmd = format!("cat >> {}", pipe_path.to_string_lossy());
-    match std::process::Command::new("tmux")
-        .args(["pipe-pane", "-t", &pane_id, &pipe_cmd])
-        .output()
-    {
-        Ok(o) if o.status.success() => {}
-        Ok(o) => {
-            warn!("pipe-pane failed for {} ({}): {}", label, pane_id,
-                  String::from_utf8_lossy(&o.stderr));
+    // Tell tmux to pipe this pane's output to our FIFO
+    let pipe_path_str = pipe_path.to_string_lossy().to_string();
+    let result = std::process::Command::new("tmux")
+        .args(["pipe-pane", "-t", &pane.pane_id, &format!("cat >> {}", pipe_path_str)])
+        .output();
+
+    match result {
+        Ok(output) if output.status.success() => {
+            info!("pipe-pane set up for {} ({})", pane.label, pane.pane_id);
+        }
+        Ok(output) => {
+            error!("pipe-pane failed for {}: {}", pane.label,
+                   String::from_utf8_lossy(&output.stderr));
+            state.borrow_mut().set_connected(&pane.label, false);
            return;
        }
        Err(e) => {
-            error!("running tmux pipe-pane for {}: {}", label, e);
+            error!("failed to run tmux pipe-pane for {}: {}", pane.label, e);
+            state.borrow_mut().set_connected(&pane.label, false);
            return;
        }
    }

-    let file = match tokio::fs::File::open(pipe_path).await {
+    // Open the FIFO and read lines
+    let file = match tokio::fs::File::open(&pipe_path).await {
        Ok(f) => f,
        Err(e) => {
-            warn!("opening pipe for {}: {}", label, e);
+            error!("failed to open pipe for {}: {}", pane.label, e);
+            state.borrow_mut().set_connected(&pane.label, false);
            return;
        }
    };

-    info!("connected channel tmux.{} (pane {})", label, pane_id);
-    state.borrow_mut().set_connected(label, true);
+    // Mark as connected once pipe is open
+    state.borrow_mut().set_connected(&pane.label, true);
+
+    let reader = tokio::io::BufReader::new(file);
+    let mut lines = reader.lines();
+    let channel_key = format!("tmux.{}", pane.label);

-    let mut lines = tokio::io::BufReader::new(file).lines();
    while let Ok(Some(line)) = lines.next_line().await {
        if line.trim().is_empty() {
            continue;
        }
        let mut s = state.borrow_mut();
-        s.channel_logs
-            .entry(channel_key.to_string())
-            .or_insert_with(ChannelLog::new)
-            .push(line);
+        let log = s.channel_logs
+            .entry(channel_key.clone())
+            .or_insert_with(ChannelLog::new);
+        log.push(line);
    }

-    warn!("pipe-pane stream ended for {}", label);
+    warn!("pipe-pane reader ended for {}", pane.label);
+    state.borrow_mut().set_connected(&pane.label, false);
 }

 // ── ChannelServer Implementation ───────────────────────────────
@ -270,10 +244,10 @@ impl channel_server::Server for ChannelServerImpl {
        let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
        let message = pry!(pry!(params.get_message()).to_str()).to_string();

-        // Send to tmux pane via send-keys — resolve the live pane id by
-        // label (it is not stored).
+        // Send to tmux pane via send-keys
        let label = channel.strip_prefix("tmux.").unwrap_or(&channel);
-        if let Some(pane_id) = find_pane_by_name(label) {
+        let pane_id = self.state.borrow().get_pane(label).map(String::from);
+        if let Some(pane_id) = pane_id {
            let _ = std::process::Command::new("tmux")
                .args(["send-keys", "-t", &pane_id, &message, "Enter"])
                .output();
@ -328,22 +302,28 @@ impl channel_server::Server for ChannelServerImpl {
        let params = pry!(params.get());
        let label = pry!(pry!(params.get_label()).to_str()).to_string();

-        // Already registered — nothing to do.
-        if self.state.borrow().has_pane(&label) {
+        // Check if already open
+        if self.state.borrow().get_pane(&label).is_some() {
            return std::future::ready(Ok(()));
        }

-        info!("opening channel tmux.{}", label);
+        // Find the tmux pane by name (window or pane title)
+        let pane_id = match find_pane_by_name(&label) {
+            Some(id) => id,
+            None => return std::future::ready(Err(capnp::Error::failed(
+                format!("no tmux pane named '{}'", label)))),
+        };

-        // Register the label and persist. The pane id is not stored —
-        // the reader resolves it by label on every connect attempt, so
-        // this succeeds even if the pane does not exist yet; the reader
-        // connects once it appears.
-        self.state.borrow_mut().add_pane(label.clone());
+        info!("opening channel tmux.{} (pane {})", label, pane_id);

+        // Register in state and persist
+        self.state.borrow_mut().add_pane(label.clone(), pane_id.clone());
+
+        // Start pipe-pane reader
+        let pane = PaneConfig { label, pane_id };
        let reader_state = self.state.clone();
        tokio::task::spawn_local(async move {
-            pipe_pane_reader(reader_state, label).await;
+            pipe_pane_reader(reader_state, pane).await;
        });

        std::future::ready(Ok(()))
@ -359,19 +339,15 @@ impl channel_server::Server for ChannelServerImpl {
        let label = channel.strip_prefix("tmux.").unwrap_or(&channel).to_string();

        let mut s = self.state.borrow_mut();
-        if s.remove_pane(&label) {
+        if let Some(pane_id) = s.remove_pane(&label) {
            info!("closing channel tmux.{}", label);
            s.channel_logs.remove(&format!("tmux.{}", label));

-            // Stop piping if the pane is still around (if it is gone the
-            // pipe is already dead). The reader then sees the pane
-            // unregistered and exits.
-            if let Some(pane_id) = find_pane_by_name(&label) {
+            // Disconnect pipe-pane
            let _ = std::process::Command::new("tmux")
                .args(["pipe-pane", "-t", &pane_id])
                .output();
        }
-        }

        std::future::ready(Ok(()))
    }
@ -421,13 +397,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {

    tokio::task::LocalSet::new()
        .run_until(async move {
-            // Start a pipe-pane reader for each configured pane; each
-            // resolves its live pane id by label and retries until
-            // connected.
+            // Start a pipe-pane reader for each configured pane
            for pane in state.borrow().config.panes.clone() {
                let reader_state = state.clone();
                tokio::task::spawn_local(async move {
-                    pipe_pane_reader(reader_state, pane.label).await;
+                    pipe_pane_reader(reader_state, pane).await;
                });
            }

--- a/flake.lock
+++ b/flake.lock
@ -1,27 +0,0 @@
-{
-  "nodes": {
-    "nixpkgs": {
-      "locked": {
-        "lastModified": 1781074563,
-        "narHash": "sha256-md8WlXOlfnIeHeOScMTTHFyf2d6iaTwPl2apR5EQ3P4=",
-        "owner": "NixOS",
-        "repo": "nixpkgs",
-        "rev": "9ae611a455b90cf061d8f332b977e387bda8e1ca",
-        "type": "github"
-      },
-      "original": {
-        "owner": "NixOS",
-        "ref": "nixos-unstable",
-        "repo": "nixpkgs",
-        "type": "github"
-      }
-    },
-    "root": {
-      "inputs": {
-        "nixpkgs": "nixpkgs"
-      }
-    }
-  },
-  "root": "root",
-  "version": 7
-}
--- a/flake.nix
+++ b/flake.nix
@ -1,42 +0,0 @@
-{
-  description = "Development shell for consciousness";
-
-  inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
-  };
-
-  outputs = { nixpkgs, ... }:
-    let
-      systems = [
-        "x86_64-linux"
-        "aarch64-linux"
-      ];
-      forAllSystems = nixpkgs.lib.genAttrs systems;
-    in
-    {
-      devShells = forAllSystems (system:
-        let
-          pkgs = import nixpkgs { inherit system; };
-        in
-        {
-          default = pkgs.mkShell {
-            packages = with pkgs; [
-              cargo
-              rustc
-              rustfmt
-              clippy
-              rust-analyzer
-
-              capnproto
-              pkg-config
-
-              jq
-              sqlite
-              python3
-            ];
-
-            RUST_BACKTRACE = "1";
-          };
-        });
-    };
-}
--- a/proto/salience.proto
+++ b/proto/salience.proto
@ -1,276 +0,0 @@
-// salience.proto — stateful generation + per-token concept readout over gRPC.
-//
-// Shape:
-//   - One server-streaming RPC (Generate) for inference. Every other
-//     operation is unary. This is the minimum streaming we need —
-//     tokens arrive one at a time with optional readouts / logprobs —
-//     and keeping everything else unary makes the client dramatically
-//     simpler than a single bidi state machine did.
-//
-//   - Server-side sessions hold the token list and image binaries.
-//     Sessions exist for bandwidth: at 200K tokens we'd otherwise
-//     re-ship ~800KB every turn, which hurts badly over a WAN link.
-//     vLLM's prefix cache holds the KV; the session just gives the
-//     client a handle so it can send deltas.
-//
-//   - The client is the source of truth for prompt content. The server
-//     is the source of truth for image token expansion (how many
-//     IMAGE_PAD tokens an image becomes under this model). The client
-//     never writes vision tokens itself — AppendImage appends the whole
-//     <|vision_start|> + IMAGE_PAD×N + <|vision_end|> block server-side.
-//
-//   - Every mutation carries (offset, truncating): the client's view of
-//     the server's current length, plus whether the client is deliberately
-//     rewriting history. Server validates on each call and rejects drift.
-//     No silent divergence, no migration bugs.
-//
-//   - Errors use gRPC status codes. NOT_FOUND for missing sessions,
-//     FAILED_PRECONDITION for offset drift or image-block splits,
-//     RESOURCE_EXHAUSTED for context overflow, ABORTED for "session busy".
-//
-// Not in v1:
-//   - Authentication beyond a shared bearer token in gRPC metadata.
-//   - Multi-tenant session namespacing.
-//   - Sampling traces beyond top-k logprobs.
-
-syntax = "proto3";
-
-package salience.v1;
-
-// ============================================================
-//  Service
-// ============================================================
-
-service Salience {
-  // Create a fresh session. Client uses session_id on every subsequent
-  // RPC until CloseSession or TTL eviction (default 30 min idle). To
-  // refresh TTL across a long pause, issue a no-op Generate (empty
-  // append_tokens, max_tokens=0, no ranges).
-  rpc OpenSession(OpenSessionRequest) returns (OpenSessionResponse);
-
-  // Release the session's tokens + images. Idempotent.
-  rpc CloseSession(CloseSessionRequest) returns (CloseSessionResponse);
-
-  // Branch a session at a given token position. The new session
-  // inherits tokens [0, at_position) and any images whose vision
-  // block lies fully in that range. Rejected with FAILED_PRECONDITION
-  // if at_position falls inside an image block (client picks a clean
-  // boundary).
-  rpc ForkSession(ForkSessionRequest) returns (ForkSessionResponse);
-
-  // Prefill + optionally decode. Images are attached inline via
-  // `GenerateRequest.images`; the client writes its own pre-expanded
-  // <|vision_start|> + N*<|image_pad|> + <|vision_end|> runs into
-  // `append_tokens` and declares each run's range in `images[i]`.
-  // Server validates run length against the actual vision-encoder
-  // feature count and returns INVALID_ARGUMENT on mismatch. Stream
-  // yields Token events (with optional readouts / logprobs per
-  // position) followed by a terminating Done.
-  rpc Generate(GenerateRequest) returns (stream GenerateEvent);
-
-  // Readout manifest for the currently-loaded model — concept names,
-  // layer indices, tensor dtype. Stateless; fetch once at client
-  // startup and cache.
-  rpc GetReadoutManifest(GetReadoutManifestRequest) returns (ReadoutManifest);
-
-  // Dump the full token stream of a session. Debug-only: used by the
-  // client to verify its local accounting against the server's
-  // session.tokens byte-for-byte when divergence is suspected. Not
-  // cheap — copies the whole sequence across the wire.
-  rpc DumpSession(DumpSessionRequest) returns (DumpSessionResponse);
-}
-
-// ============================================================
-//  Lifecycle
-// ============================================================
-
-message OpenSessionRequest {
-  // Model identifier, must match vLLM's served model. The server
-  // only has one model loaded; this is a safety check on what the
-  // client thinks it's talking to.
-  string model = 1;
-}
-
-message OpenSessionResponse {
-  string session_id = 1;
-  uint32 max_model_len = 2;
-}
-
-message CloseSessionRequest {
-  string session_id = 1;
-}
-
-message CloseSessionResponse {}
-
-message ForkSessionRequest {
-  string session_id = 1;    // source session
-  uint32 at_position = 2;   // new session inherits tokens [0, at_position)
-}
-
-message ForkSessionResponse {
-  string session_id = 1;    // new session
-}
-
-// ============================================================
-//  Inference
-// ============================================================
-
-// One image attached to a Generate call. The client is responsible
-// for writing the expanded placeholder run (VISION_START +
-// N*IMAGE_PAD + VISION_END) into `GenerateRequest.append_tokens` at
-// positions [pad_range_start, pad_range_end) and pairing it with
-// the corresponding `ImageAttachment` entry. Server validates that
-// the declared range's pad count matches what the vision encoder
-// produces, and returns INVALID_ARGUMENT if they disagree.
-message ImageAttachment {
-  // Image bytes (PNG / JPEG / WebP / …).
-  bytes  bytes = 1;
-
-  // MIME type, e.g. "image/png".
-  string mime = 2;
-
-  // Absolute token positions (in `session.tokens` AFTER `append_tokens`
-  // is applied) spanning the full vision block —  `[vision_start,
-  // pad*N, vision_end]`. end is exclusive, so end - start == N + 2.
-  uint32 pad_range_start = 3;
-  uint32 pad_range_end = 4;
-}
-
-message GenerateRequest {
-  string session_id = 1;
-
-  // Tokens to append before prefill. May be empty. Client writes the
-  // full vision block (VISION_START + N*IMAGE_PAD + VISION_END) for
-  // any newly-attached image directly into this stream; each such
-  // block must be paired with a matching entry in `images`. The
-  // server validates that the declared ranges all point at IMAGE_PAD
-  // runs and that each run's length matches what the vision encoder
-  // produces for the corresponding image.
-  repeated uint32 append_tokens = 2;
-
-  // Client's view of session.tokens length at the time of the call.
-  // Must equal server's actual length, OR be strictly less when
-  // truncating=true (server rewinds before appending). Any other
-  // mismatch is FAILED_PRECONDITION.
-  uint32 offset = 3;
-  bool   truncating = 4;
-
-  // Decode budget. 0 = prefill only (no decode, emit Token events
-  // for positions covered by logprobs_ranges / readout_ranges, then
-  // Done; replaces the old /score endpoint). >0 = decode up to this
-  // many tokens, stopping early on EOS / stop_token_ids.
-  uint32 max_tokens = 5;
-
-  // Position ranges (absolute, within the session's post-append
-  // token list) at which to emit logprobs on Token events. Empty =
-  // no logprobs. `logprob_top_k > 0` returns the top-k alternative
-  // tokens at each covered position; `logprob_top_k == 0` returns
-  // only the sampled-token's logprob.
-  repeated PositionRange logprobs_ranges = 6;
-  uint32                 logprob_top_k = 7;
-
-  // Position ranges at which to emit concept-readout vectors. Empty
-  // = no readouts. Logical shape per position is
-  // [n_layers][n_concepts] — see GetReadoutManifest.
-  repeated PositionRange readout_ranges = 8;
-
-  // Sampling parameters. Meaningful only when max_tokens > 0.
-  float           temperature = 9;      // default 1.0 when zero
-  float           top_p = 10;           // default 1.0 when zero
-  uint32          top_k = 11;           // default 0 (disabled)
-  repeated uint32 stop_token_ids = 12;
-
-  // vLLM scheduler priority (0 = interactive, 10 = batch).
-  int32 priority = 13;
-
-  // Images newly attached on this call. Each entry describes one
-  // image's binary bytes, its mime type, and the exact token-position
-  // range of its pre-expanded placeholder run inside `session.tokens`
-  // after `append_tokens` is applied. See `ImageAttachment`.
-  repeated ImageAttachment images = 14;
-}
-
-message PositionRange {
-  uint32 start = 1;   // inclusive
-  uint32 end = 2;     // exclusive
-}
-
-message GenerateEvent {
-  oneof event {
-    Token        token = 1;
-    GenerateDone done = 2;
-  }
-}
-
-message Token {
-  // Token id at this position. For prefill this is the prompt token;
-  // for decode it's the sampled token.
-  uint32 id = 1;
-
-  // Absolute position in the session's token list.
-  uint32 position = 2;
-
-  // True for prefill positions, false for decode.
-  bool   is_prefill = 3;
-
-  // Concept readout at this position. Empty if the position wasn't
-  // covered by readout_ranges.
-  repeated float readout = 4 [packed = true];
-
-  // Top-k alternative tokens' logprobs at this position — populated
-  // when the position is covered by logprobs_ranges and
-  // logprob_top_k > 0.
-  repeated TokenLogprob logprobs = 5;
-
-  // Logprob of the token at `position` (the prompt token for
-  // prefill, the sampled token for decode). Populated when the
-  // position is covered by logprobs_ranges.
-  float sampled_logprob = 6;
-  bool  has_sampled_logprob = 7;
-}
-
-message TokenLogprob {
-  uint32 id = 1;
-  float  logprob = 2;
-}
-
-message GenerateDone {
-  uint32 prompt_tokens = 1;
-  uint32 completion_tokens = 2;
-  uint32 total_tokens = 3;
-
-  enum FinishReason {
-    FINISH_REASON_UNSPECIFIED = 0;
-    FINISH_REASON_EOS = 1;              // emitted EOS / stop token
-    FINISH_REASON_LENGTH = 2;           // hit max_tokens
-    FINISH_REASON_CANCELLED = 3;        // client cancelled
-    FINISH_REASON_STOP_STRING = 4;      // matched a stop string
-  }
-  FinishReason finish_reason = 4;
-}
-
-// ============================================================
-//  Readout manifest
-// ============================================================
-
-message GetReadoutManifestRequest {}
-
-message ReadoutManifest {
-  repeated string concepts = 1;
-  repeated uint32 layers = 2;
-  uint32          hidden_size = 3;
-  string          dtype = 4;
-}
-
-// ============================================================
-//  Debug
-// ============================================================
-
-message DumpSessionRequest {
-  string session_id = 1;
-}
-
-message DumpSessionResponse {
-  // The full session.tokens sequence, verbatim.
-  repeated uint32 tokens = 1 [packed = true];
-}
--- a/scripts/quantize_qwen3_6_mm.py
+++ b/scripts/quantize_qwen3_6_mm.py
@ -1,327 +0,0 @@
-"""Quantize Qwen3.6-27B (multimodal) to FP8 for vLLM serving.
-
-Why this exists
---------------
-The earlier `quantize_qwen3_6.py` (in shell history, never committed)
-loaded the model with `AutoModelForCausalLM`, which silently strips
-the multimodal arch. Result: an FP8 checkpoint with no vision tower
-weights at all. vLLM happily instantiated the vision tower from the
-config and ran it with default/uninitialized weights, producing
-gibberish image features and `!!!!!!`-style output. We chased that
-through the protocol layer for a long time before tracing it back
-to the quant. This script avoids that trap by loading via the
-config-declared class explicitly.
-
-Recipe
------
-FP8_DYNAMIC (per-channel weight scales, per-token dynamic activation
-scales, both E4M3) for Linear weights, with an `ignore` list derived
-from Unsloth's UD-Q8_K_XL (`unsloth/Qwen3.6-27B-GGUF`). Their
-sensitivity sweep flagged specific layers as quantization-fragile;
-we honor those layer indices even though their algorithm is
-GGUF-native Q8_K and ours is FP8 — sensitivity is a layer property,
-not an algorithm property.
-
-vLLM fusion constraint
-~~~~~~~~~~~~~~~~~~~~~~
-vLLM's Qwen3.5/3.6 model code fuses sub-modules at load time:
-  qkv_proj      ← q_proj, k_proj, v_proj
-  gate_up_proj  ← gate_proj, up_proj
-  in_proj_qkvz  ← in_proj_qkv, in_proj_z
-  in_proj_ba    ← in_proj_b, in_proj_a
-compressed_tensors rejects checkpoints where sub-modules of a fused
-layer have different quantization schemes. Our ignore list is shaped
-around this — within any fused layer, all components share a scheme.
-That's the reason `in_proj_qkv` is ignored even though Unsloth's
-sweep doesn't single it out, and the reason late-stack attn override
-covers q/k/v rather than just q/k.
-
-MTP merge
---------
-`Qwen3_5ForConditionalGeneration` doesn't expose the MTP submodule,
-so `oneshot()` produces a checkpoint with the 15 `mtp.*` tensors
-silently dropped. After quantization we read the MTP weights back
-out of the upstream cached snapshot and splice them into the saved
-safetensors at BF16. They're small (~850 MB) so quantizing them
-isn't worth the calibration risk; speculative-decoding code paths
-in vLLM expect the MTP head present.
-
-Output
------
-`OUTPUT_DIR` gets the FP8 model.safetensors + config + processor +
-recipe.yaml. Vision tower stays BF16 (in `ignore`); LM Linears go
-to FP8; norms, SSM internals (not Linear), and MTP tensors stay
-BF16 untouched.
-
-Verification at end: re-opens the saved safetensors and asserts
- vision .weight tensors present (>= 150; full count is 167)
- lm_head + embed_tokens at fp16/bf16 (NOT FP8)
- a sampled FP8'd Linear actually has float8 dtype
- 15 mtp.* tensors present
-
-Run
---
-    ~/vllm-venv/bin/python quantize_qwen3_6_mm.py
-"""
-from __future__ import annotations
-
-import glob
-import json
-import sys
-from pathlib import Path
-
-import torch
-from huggingface_hub import snapshot_download
-from llmcompressor import oneshot
-from llmcompressor.modifiers.quantization import QuantizationModifier
-from safetensors import safe_open
-from safetensors.torch import save_file
-from transformers import AutoProcessor
-from transformers.models.qwen3_5.modeling_qwen3_5 import (
-    Qwen3_5ForConditionalGeneration,
-)
-
-
-MODEL = "Qwen/Qwen3.6-27B"
-OUTPUT_DIR = "/home/ubuntu/amygdala-training/Qwen3.6-27B-FP8-mm"
-
-
-# Layers Unsloth's UD-Q8_K_XL keeps at F16 (perplexity-sensitive
-# in their sweep). Late-stack clustering is consistent with the
-# general finding that errors near the output propagate directly
-# to logits.
-LATE_FFN_LAYERS = (50, 51, 59, 62, 63)
-LATE_ATTN_LAYERS = (51, 59, 63)
-
-
-# Build the ignore regex list. Note: llmcompressor matches these
-# patterns against MODULE names (no `.weight` suffix) when walking
-# `named_modules()` for `targets=["Linear"]`. The first pass of
-# this script used `\.weight$` patterns and silently quantized
-# lm_head + every linear_attn projection — verified post-hoc by
-# inspecting the saved safetensors. Patterns now anchor on `$`
-# at the module name.
-IGNORE_PATTERNS: list[str] = [
-    # Original recipe: lm_head and embeddings always full-precision.
-    # (embed_tokens is an Embedding, not a Linear, so it's already
-    # ignored by `targets=["Linear"]`. Pattern kept as belt-and-
-    # suspenders in case future llmcompressor versions widen the
-    # target set.)
-    "re:lm_head$",
-    "re:.*embed_tokens$",
-
-    # Vision tower — entire `model.visual.*` subtree (vision
-    # transformer blocks + merger + patch_embed + pos_embed).
-    # Unsloth ships the vision tower as a separate `mmproj-BF16.gguf`
-    # for GGUF consumers; in our single-file FP8 setup we just leave
-    # them at BF16.
-    "re:model\\.visual\\..*",
-
-    # MTP (multi-token prediction) module — Unsloth's GGUF doesn't
-    # carry MTP weights so we have no precision signal from them;
-    # safest to keep BF16.
-    "re:mtp\\..*",
-
-    # Linear-attention block — keep ENTIRELY at BF16. vLLM fuses
-    # `in_proj_qkv` and `in_proj_z` into a single `in_proj_qkvz`
-    # layer, and compressed_tensors rejects mixed schemes within a
-    # fused layer. Unsloth's recipe keeps z, a, b, out at F16/F32
-    # (gate/SSM internals are quantization-fragile in the GatedDeltaNet
-    # update), so the principled choice is to also keep `in_proj_qkv`
-    # at BF16 rather than FP8'ing the gate to match. We give up ~1 GB
-    # of FP8 coverage; in exchange we follow Unsloth's quality intent
-    # and load cleanly under vLLM. (`in_proj_a` + `in_proj_b` are
-    # likewise fused as `in_proj_ba` — both ignored, consistent.)
-    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_qkv$",
-    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_z$",
-    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_a$",
-    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_b$",
-    "re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.out_proj$",
-
-    # Per-layer high-precision MLP (Unsloth flagged exactly these
-    # late-stack indices in their UD-Q8_K_XL sensitivity sweep, all
-    # three of {gate, up, down} per layer). vLLM fuses gate+up into
-    # `gate_up_proj`; ignoring both keeps the fused layer consistent.
-    # `down_proj` is its own (non-fused) layer.
-    "re:model\\.language_model\\.layers\\.("
-    + "|".join(str(n) for n in LATE_FFN_LAYERS)
-    + ")\\.mlp\\.(down|gate|up)_proj$",
-
-    # Per-layer high-precision attention q/k/v (Unsloth's sweep upgrades
-    # only q and k; we extend to v because vLLM fuses q/k/v into
-    # `qkv_proj` and rejects mixed schemes. `o_proj` is its own
-    # non-fused layer and stays at FP8.
-    "re:model\\.language_model\\.layers\\.("
-    + "|".join(str(n) for n in LATE_ATTN_LAYERS)
-    + ")\\.self_attn\\.(q|k|v)_proj$",
-]
-
-
-def main() -> None:
-    print(f"Loading {MODEL} as multimodal "
-          f"(Qwen3_5ForConditionalGeneration)...", flush=True)
-    model = Qwen3_5ForConditionalGeneration.from_pretrained(
-        MODEL,
-        dtype=torch.bfloat16,
-        device_map="auto",
-        trust_remote_code=True,
-    )
-    print(f"  loaded: {model.__class__.__name__}", flush=True)
-
-    print(f"Loading processor (text + image preprocessing)...", flush=True)
-    processor = AutoProcessor.from_pretrained(MODEL, trust_remote_code=True)
-
-    print("Running FP8_DYNAMIC oneshot quantization...", flush=True)
-    print(f"  ignore list: {len(IGNORE_PATTERNS)} patterns",
-          flush=True)
-    recipe = QuantizationModifier(
-        targets=["Linear"],
-        scheme="FP8_DYNAMIC",
-        ignore=IGNORE_PATTERNS,
-    )
-    oneshot(model=model, recipe=recipe, output_dir=OUTPUT_DIR)
-    processor.save_pretrained(OUTPUT_DIR)
-    print(f"  wrote model + processor to {OUTPUT_DIR}", flush=True)
-
-    merge_mtp(OUTPUT_DIR)
-    verify_output(OUTPUT_DIR)
-
-
-def merge_mtp(out_dir: str) -> None:
-    """Splice upstream MTP tensors into the saved FP8 safetensors.
-
-    `Qwen3_5ForConditionalGeneration` skips the MTP submodule on load,
-    so oneshot's output is missing the 15 `mtp.*` tensors. We resolve
-    the upstream snapshot via the HF cache (already populated by
-    from_pretrained), pull just the MTP tensors out at BF16, and
-    rewrite the safetensors with them merged in. The compressed_tensors
-    metadata header (which carries the FP8 format identifier vLLM
-    needs to dequantize) is preserved verbatim.
-
-    Atomic-rename is used so a crash mid-write doesn't corrupt the
-    33+ GB checkpoint we just spent minutes producing.
-    """
-    print("\nMerging upstream MTP tensors...", flush=True)
-    upstream_dir = Path(snapshot_download(
-        MODEL,
-        allow_patterns=["model.safetensors.index.json",
-                        "model-*-of-*.safetensors"],
-    ))
-
-    with open(upstream_dir / "model.safetensors.index.json") as f:
-        idx = json.load(f)
-    mtp_shards = sorted({v for k, v in idx["weight_map"].items()
-                         if k.startswith("mtp.")})
-    print(f"  MTP tensors live in shards: {mtp_shards}", flush=True)
-
-    mtp_tensors: dict[str, torch.Tensor] = {}
-    for shard in mtp_shards:
-        with safe_open(upstream_dir / shard, framework="pt") as f:
-            for k in f.keys():
-                if k.startswith("mtp."):
-                    mtp_tensors[k] = f.get_tensor(k).contiguous()
-    mtp_bytes = sum(t.numel() * t.element_size()
-                    for t in mtp_tensors.values())
-    print(f"  loaded {len(mtp_tensors)} mtp tensors "
-          f"({mtp_bytes/1e6:.1f} MB)", flush=True)
-
-    fp8_files = sorted(Path(out_dir).glob("*.safetensors"))
-    if len(fp8_files) != 1:
-        sys.exit(f"FAIL: expected single safetensors shard, "
-                 f"got {fp8_files}")
-    existing_path = fp8_files[0]
-
-    with safe_open(existing_path, framework="pt") as f:
-        metadata = f.metadata() or {}
-        all_tensors = {k: f.get_tensor(k) for k in f.keys()}
-
-    overlap = set(all_tensors) & set(mtp_tensors)
-    if overlap:
-        sys.exit(f"FAIL: MTP key collision with FP8 output: "
-                 f"{sorted(overlap)[:5]}")
-    all_tensors.update(mtp_tensors)
-
-    tmp_path = existing_path.with_name(existing_path.name + ".new")
-    print(f"  rewriting {existing_path.name} "
-          f"({len(all_tensors)} tensors)...", flush=True)
-    save_file(all_tensors, str(tmp_path), metadata=metadata)
-    tmp_path.replace(existing_path)
-    print("  done", flush=True)
-
-
-def verify_output(out_dir: str) -> None:
-    """Open the saved safetensors and assert the recipe actually
-    landed: vision tower present at BF16, FP8 dtype on at least one
-    quantized Linear, lm_head not FP8."""
-    print(f"\nVerifying {out_dir}...", flush=True)
-
-    files = sorted(glob.glob(f"{out_dir}/*.safetensors"))
-    if not files:
-        sys.exit(f"FAIL: no safetensors in {out_dir}")
-
-    vision_keys: list[tuple[str, str]] = []
-    fp8_sample: tuple[str, str] | None = None
-    lm_head_dtype: str | None = None
-    mtp_keys: list[str] = []
-
-    for fp in files:
-        with safe_open(fp, framework="pt") as f:
-            for k in f.keys():
-                if k.startswith("mtp."):
-                    mtp_keys.append(k)
-                # Some FP8 quants write a sibling `_scale` / `_zero_point`;
-                # we just care about the .weight tensors.
-                if not k.endswith(".weight"):
-                    continue
-                t = f.get_tensor(k)
-                dtype = str(t.dtype).replace("torch.", "")
-                if "model.visual." in k:
-                    vision_keys.append((k, dtype))
-                if k == "lm_head.weight":
-                    lm_head_dtype = dtype
-                if (fp8_sample is None
-                        and "float8" in dtype
-                        and "language_model.layers" in k):
-                    fp8_sample = (k, dtype)
-
-    # Qwen3.6-27B has 167 vision `.weight` tensors (333 vision tensors
-    # total, the rest are `.bias` and per-block norms). 150 is a
-    # sanity floor that catches "vision tower didn't make it through"
-    # without being brittle to minor arch revisions.
-    if len(vision_keys) < 150:
-        sys.exit(f"FAIL: only {len(vision_keys)} vision tensors found "
-                 f"(expected >= 150). Vision tower didn't make it "
-                 f"through the quant.")
-
-    bad_vision = [(k, d) for k, d in vision_keys if "float8" in d]
-    if bad_vision:
-        sys.exit(f"FAIL: vision weights got quantized to FP8: "
-                 f"{bad_vision[:3]}...")
-
-    if lm_head_dtype is None:
-        sys.exit("FAIL: lm_head.weight not found in output.")
-    if "float8" in lm_head_dtype:
-        sys.exit(f"FAIL: lm_head.weight is FP8 ({lm_head_dtype}); "
-                 f"should be BF16/FP16.")
-
-    if fp8_sample is None:
-        sys.exit("FAIL: no FP8 weights found in language_model.layers — "
-                 "the recipe didn't quantize anything.")
-
-    # Upstream Qwen3.6-27B has exactly 15 mtp.* tensors (1 fused
-    # transformer block + projection + norms). merge_mtp() should
-    # have spliced all of them in.
-    if len(mtp_keys) != 15:
-        sys.exit(f"FAIL: expected 15 mtp.* tensors, found "
-                 f"{len(mtp_keys)}. merge_mtp() missed some.")
-
-    print(f"  ✓ {len(vision_keys)} vision tensors at "
-          f"{vision_keys[0][1]} (not FP8)")
-    print(f"  ✓ lm_head.weight at {lm_head_dtype} (not FP8)")
-    print(f"  ✓ FP8 sample: {fp8_sample[0]} = {fp8_sample[1]}")
-    print(f"  ✓ {len(mtp_keys)} mtp.* tensors present")
-    print("DONE")
-
-
-if __name__ == "__main__":
-    main()
--- a/src/agent/api/http.rs
+++ b/src/agent/api/http.rs
@ -100,7 +100,7 @@ impl HttpClient {
                .map_err(|e| anyhow::anyhow!("invalid server name: {e}"))?;
            let connector = tokio_rustls::TlsConnector::from(self.tls.clone());
            let tls = connector.connect(server_name.to_owned(), tcp).await
-                .map_err(|e| anyhow::anyhow!("TLS handshake to {host}: {e}"))?;
+                .context("TLS handshake")?;
            TokioIo::new(Box::new(tls) as Box<dyn IoStream>)
        } else {
            TokioIo::new(Box::new(tcp) as Box<dyn IoStream>)
@ -154,14 +154,6 @@ impl HttpResponse {
        Ok(String::from_utf8_lossy(&bytes).into_owned())
    }

-    /// Read the entire body as raw bytes (for binary downloads).
-    pub async fn bytes(self) -> Result<Bytes> {
-        let bytes = self.body.collect().await
-            .context("reading response body")?
-            .to_bytes();
-        Ok(bytes)
-    }
-
    /// Read the entire body and deserialize as JSON.
    pub async fn json<T: serde::de::DeserializeOwned>(self) -> Result<T> {
        let bytes = self.body.collect().await
@ -198,7 +190,6 @@ impl HttpClientBuilder {
    }

    pub fn build(self) -> HttpClient {
-        install_rustls_crypto_provider();
        let certs = rustls_native_certs::load_native_certs()
            .certs.into_iter()
            .collect::<Vec<_>>();
@ -206,13 +197,6 @@ impl HttpClientBuilder {
        for cert in certs {
            root_store.add(cert).ok();
        }
-        // Also trust any `.pem` files under `~/.consciousness/certs/` —
-        // self-signed server certs for our own vllm hosts live there.
-        // Drop a new `<host>.pem` in the dir to trust a new server; no
-        // code change needed.
-        for cert in load_user_certs() {
-            root_store.add(cert).ok();
-        }
        let tls = Arc::new(
            ClientConfig::builder()
                .with_root_certificates(root_store)
@ -226,65 +210,6 @@ impl HttpClientBuilder {
    }
 }

-/// Install rustls' default crypto provider exactly once per process.
-/// rustls 0.23 doesn't pick one automatically when multiple features
-/// could provide it (e.g. when tonic pulls in both ring and aws-lc-rs
-/// via transitive deps). Idempotent via OnceLock; safe to call from
-/// multiple callers.
-fn install_rustls_crypto_provider() {
-    static ONCE: std::sync::OnceLock<()> = std::sync::OnceLock::new();
-    ONCE.get_or_init(|| {
-        let _ = rustls::crypto::ring::default_provider().install_default();
-    });
-}
-
-/// Load every `.pem` file under `~/.consciousness/certs/` as a DER
-/// certificate and return them. Silent on missing dir, missing files,
-/// or parse errors — those are "no extra certs trusted" rather than
-/// hard failures, to keep startup robust.
-/// Load the concatenated PEM bytes of every `.pem` file under
-/// `~/.consciousness/certs/` — suitable for passing to a tonic
-/// `ClientTlsConfig::ca_certificate(Certificate::from_pem(...))` call
-/// so gRPC connections trust the same self-signed servers the HTTP
-/// path does.
-pub(crate) fn load_user_certs_pem_bytes() -> Vec<u8> {
-    let mut out = Vec::new();
-    let Some(home) = dirs::home_dir() else { return out };
-    let dir = home.join(".consciousness").join("certs");
-    let Ok(entries) = std::fs::read_dir(&dir) else { return out };
-    for entry in entries.flatten() {
-        let path = entry.path();
-        if path.extension().and_then(|e| e.to_str()) != Some("pem") {
-            continue;
-        }
-        if let Ok(bytes) = std::fs::read(&path) {
-            out.extend_from_slice(&bytes);
-            if !bytes.ends_with(b"\n") {
-                out.push(b'\n');
-            }
-        }
-    }
-    out
-}
-
-fn load_user_certs() -> Vec<rustls::pki_types::CertificateDer<'static>> {
-    let mut out = Vec::new();
-    let Some(home) = dirs::home_dir() else { return out };
-    let dir = home.join(".consciousness").join("certs");
-    let Ok(entries) = std::fs::read_dir(&dir) else { return out };
-    for entry in entries.flatten() {
-        let path = entry.path();
-        if path.extension().and_then(|e| e.to_str()) != Some("pem") {
-            continue;
-        }
-        let Ok(bytes) = std::fs::read(&path) else { continue };
-        for cert in rustls_pemfile::certs(&mut bytes.as_slice()).flatten() {
-            out.push(cert);
-        }
-    }
-    out
-}
-
 /// Trait alias for streams that work with hyper's IO adapter.
 trait IoStream: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static {}
 impl<T: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static> IoStream for T {}
--- a/src/agent/api/mod.rs
+++ b/src/agent/api/mod.rs
@ -7,14 +7,13 @@
 // Set POC_DEBUG=1 for verbose per-turn logging.

 pub mod http;
-pub mod salience;

-use std::time::Duration;
+use std::time::{Duration, Instant};
 use anyhow::Result;
 use tokio::sync::mpsc;
 use serde::Deserialize;

-use http::HttpClient;
+use http::{HttpClient, HttpResponse};

 #[derive(Debug, Clone, Deserialize)]
 pub struct Usage {
@ -38,21 +37,6 @@ pub struct ReadoutManifest {
 /// from pairing with the manifest fetched at startup.
 pub type TokenReadout = Vec<Vec<f32>>;

-/// Client-side sampling state. Mirrors the wire-level fields in
-/// `GenerateRequest` (proto flattened its `SamplingParams` submessage
-/// in so the server handler reads them directly), but stays as a
-/// grouped struct on the client because UI / config / tests pass
-/// these around together.
-#[derive(Clone, Copy)]
-pub struct SamplingParams {
-    pub temperature: f32,
-    pub top_p: f32,
-    pub top_k: u32,
-    /// Decode budget. 0 = prefill only; >0 = decode up to this many
-    /// tokens, stopping early on EOS / stop_token_ids.
-    pub max_tokens: u32,
-}
-
 /// A JoinHandle that aborts its task when dropped.
 pub(crate) struct AbortOnDrop(tokio::task::JoinHandle<()>);

@ -62,6 +46,13 @@ impl Drop for AbortOnDrop {
    }
 }

+/// Sampling parameters for model generation.
+#[derive(Clone, Copy)]
+pub(crate) struct SamplingParams {
+    pub temperature: f32,
+    pub top_p: f32,
+    pub top_k: u32,
+}

 // ─────────────────────────────────────────────────────────────
 //  Stream events — yielded by backends, consumed by the runner
@ -83,17 +74,6 @@ pub struct ApiClient {
    api_key: String,
    pub model: String,
    base_url: String,
-    /// Cached readout manifest — fetched once per process and shared
-    /// across ApiClient clones (every Agent/fork gets the same cell).
-    /// `None` after fetch means the server has readout disabled (404).
-    manifest: std::sync::Arc<tokio::sync::OnceCell<Option<ReadoutManifest>>>,
-    /// Shared tonic Channel to the salience gRPC endpoint. Opened on
-    /// first use and reused across every SessionHandle / RPC call
-    /// derived from this ApiClient. tonic multiplexes concurrent
-    /// requests over the HTTP/2 connection automatically.
-    salience_channel: std::sync::Arc<
-        tokio::sync::OnceCell<tonic::transport::Channel>
-    >,
 }

 impl ApiClient {
@ -108,69 +88,33 @@ impl ApiClient {
            api_key: api_key.to_string(),
            model: model.to_string(),
            base_url: base_url.trim_end_matches('/').to_string(),
-            manifest: std::sync::Arc::new(tokio::sync::OnceCell::new()),
-            salience_channel: std::sync::Arc::new(tokio::sync::OnceCell::new()),
        }
    }

-    /// Return a `SalienceClient` on the shared gRPC channel — opens
-    /// the channel on first call and reuses it thereafter across
-    /// every ApiClient clone. All scoring / inference / session
-    /// RPCs flow through this single multiplexed HTTP/2 connection.
-    ///
-    /// Bumps tonic's default 4 MiB encode/decode caps to 64 MiB on
-    /// every client. Multimodal Generate requests carry pre-encoded
-    /// image bytes inline (Qwen3.6's 768×768 patches at high res
-    /// land around 5–8 MiB per turn), and Done events with full
-    /// per-token readout vectors can also exceed 4 MiB on long runs.
-    pub async fn salience_client(&self) -> Result<
-        salience::pb::salience_client::SalienceClient<tonic::transport::Channel>
-    > {
-        let ch = self.salience_channel.get_or_try_init(|| async {
-            let grpc_url = salience::derive_grpc_url(&self.base_url);
-            log::debug!(target: "grpc",
-                "opening shared salience channel: http_base={} -> grpc_url={}",
-                self.base_url, grpc_url);
-            salience::connect_channel(&grpc_url).await
-        }).await?;
-        const MAX_GRPC_MESSAGE_BYTES: usize = 64 * 1024 * 1024;
-        Ok(salience::pb::salience_client::SalienceClient::new(ch.clone())
-            .max_decoding_message_size(MAX_GRPC_MESSAGE_BYTES)
-            .max_encoding_message_size(MAX_GRPC_MESSAGE_BYTES))
-    }
-
-    /// Stream generation via a gRPC session. Walks the prompt chunks
-    /// comparing against the session's `committed_len`, sends the
-    /// delta as interleaved `AppendImage` + intermediate
-    /// `Generate(max_tokens=0)` (for text runs separating images) +
-    /// a final `Generate(max_tokens=sampling.max_tokens, ...)` whose
-    /// Token events stream back through the channel.
-    ///
-    /// On any gRPC error the session is dropped; the next call
-    /// reopens fresh. Happy-path ordering: Token* Done. Error paths
-    /// emit `StreamToken::Error` and close.
-    pub(crate) fn stream_session_mm(
+    pub(crate) fn stream_completion_mm(
        &self,
-        session_lock: std::sync::Arc<crate::Mutex<Option<salience::SessionHandle>>>,
-        chunks: Vec<super::context::WireChunk>,
-        images: Vec<super::context::WireImage>,
-        match_upto: u32,
+        prompt_tokens: &[u32],
+        images: &[super::context::WireImage],
        sampling: SamplingParams,
        priority: Option<i32>,
-        readout_shape: Option<(u32, u32)>,
    ) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
        let (tx, rx) = mpsc::unbounded_channel();
-        let client = self.clone();
+        let client = self.client.clone();
+        let api_key = self.api_key.clone();
+        let model = self.model.clone();
+        let prompt_tokens = prompt_tokens.to_vec();
+        let images: Vec<(Vec<u8>, String)> = images.iter()
+            .map(|i| (i.bytes.clone(), i.mime.clone()))
+            .collect();
+        let base_url = self.base_url.clone();

        let handle = tokio::spawn(async move {
-            let result = run_session_generate(
-                session_lock, &client, chunks, images, match_upto, sampling,
-                priority, readout_shape, &tx,
+            let result = stream_completions(
+                &client, &base_url, &api_key, &model,
+                &prompt_tokens, &images, &tx, sampling, priority,
            ).await;
            if let Err(e) = result {
-                log::warn!(target: "grpc",
-                    "stream_session_mm error, forwarding to UI: {:#}", e);
-                let _ = tx.send(StreamToken::Error(format!("{:#}", e)));
+                let _ = tx.send(StreamToken::Error(e.to_string()));
            }
        });

@ -184,13 +128,9 @@ impl ApiClient {
    /// readout is enabled on the server, `Ok(None)` on 404 (disabled),
    /// or an error on any other failure.
    ///
-    /// First call performs the HTTP fetch; subsequent calls (including
-    /// across ApiClient clones sharing the same cell) return the
-    /// cached result. The manifest doesn't change during a server run.
-    pub fn model_str(&self) -> &str { &self.model }
-
+    /// Call once at startup and cache the result; the manifest doesn't
+    /// change during a server run.
    pub async fn fetch_readout_manifest(&self) -> Result<Option<ReadoutManifest>> {
-        let manifest = self.manifest.get_or_try_init(|| async {
        let url = format!("{}/readout/manifest", self.base_url);
        let auth = format!("Bearer {}", self.api_key);
        let response = self
@ -200,7 +140,7 @@ impl ApiClient {
            .map_err(|e| anyhow::anyhow!("readout manifest fetch ({}): {}", url, e))?;
        let status = response.status();
        if status.as_u16() == 404 {
-                return Ok::<_, anyhow::Error>(None);
+            return Ok(None);
        }
        if !status.is_success() {
            let body = response.text().await.unwrap_or_default();
@ -208,219 +148,366 @@ impl ApiClient {
            anyhow::bail!("readout manifest HTTP {} ({}): {}", status, url, &body[..n]);
        }
        Ok(Some(response.json().await?))
-        }).await?;
-        Ok(manifest.clone())
    }

 }

-/// Body of the gRPC-path streaming task. Walks the wire chunks
-/// against the session's `committed_len`, sends the delta via
-/// AppendImage / intermediate prefill-only Generates / final decode
-/// Generate, and translates the final Generate's Token events into
-/// StreamTokens on `tx`. On success the session handle is returned
-/// to `session_lock` with an updated `committed_len`; on error the
-/// handle is dropped so the next call reopens.
-async fn run_session_generate(
-    session_lock: std::sync::Arc<crate::Mutex<Option<salience::SessionHandle>>>,
-    client: &ApiClient,
-    chunks: Vec<super::context::WireChunk>,
-    images: Vec<super::context::WireImage>,
-    match_upto: u32,
+async fn stream_completions(
+    client: &HttpClient,
+    base_url: &str,
+    api_key: &str,
+    model: &str,
+    prompt_tokens: &[u32],
+    images: &[(Vec<u8>, String)],
+    tx: &mpsc::UnboundedSender<StreamToken>,
    sampling: SamplingParams,
    priority: Option<i32>,
-    readout_shape: Option<(u32, u32)>,
-    tx: &mpsc::UnboundedSender<StreamToken>,
-) -> Result<()> {
-    use std::time::Instant;
-    use futures::StreamExt;
-    use super::context::WireChunk;
-    use salience::pb;
+) -> anyhow::Result<()> {
+    let mut request = serde_json::json!({
+        "model": model,
+        "prompt": prompt_tokens,
+        "max_tokens": 16384,
+        "temperature": sampling.temperature,
+        "top_p": sampling.top_p,
+        "top_k": sampling.top_k,
+        "stream": true,
+        "return_token_ids": true,
+        "skip_special_tokens": false,
+        "stop_token_ids": [super::tokenizer::IM_END],
+    });
+    if !images.is_empty() {
+        use base64::Engine;
+        let b64 = base64::engine::general_purpose::STANDARD;
+        let uris: Vec<String> = images.iter()
+            .map(|(bytes, mime)| format!("data:{};base64,{}", mime, b64.encode(bytes)))
+            .collect();
+        request["multi_modal_data"] = serde_json::json!({ "image": uris });
+    }
+    if let Some(p) = priority {
+        request["priority"] = serde_json::json!(p);
+    }

-    let mut handle: salience::SessionHandle = {
-        let mut guard = session_lock.lock().await;
-        match guard.take() {
-            Some(h) => h,
-            None => {
-                drop(guard);
-                log::debug!(target: "grpc", "run_session_generate: opening new session");
-                salience::SessionHandle::open(client).await?
+    let url = format!("{}/completions", base_url);
+    let debug_label = format!("{} prompt tokens, model={}", prompt_tokens.len(), model);
+
+    let mut response = send_and_check(
+        client, &url, &request,
+        ("Authorization", &format!("Bearer {}", api_key)),
+        &[], &debug_label, None,
+    ).await?;
+
+    let mut reader = SseReader::new();
+    let mut usage = None;
+
+    while let Some(event) = reader.next_event(&mut response).await? {
+        if let Some(err_msg) = event["error"]["message"].as_str() {
+            anyhow::bail!("API error in stream: {}", err_msg);
+        }
+
+        if let Some(u) = event["usage"].as_object() {
+            if let Ok(u) = serde_json::from_value::<Usage>(serde_json::Value::Object(u.clone())) {
+                usage = Some(u);
            }
        }
+
+        let choices = match event["choices"].as_array() {
+            Some(c) => c,
+            None => continue,
        };

-    // If the client believes the match extends only up to `match_upto`
-    // but the server has more, we need to rewind. For v1 the match is
-    // either whole or broken — `match_upto` is always 0 on any mutation
-    // — so the cheapest correct recovery is to drop the session and
-    // open a fresh one.
-    if match_upto < handle.committed_len {
-        log::warn!(target: "grpc",
-            "session rewind: match_upto={} < committed_len={} — reopening session (resending {} bytes)",
-            match_upto, handle.committed_len, handle.committed_len - match_upto);
-        drop(handle);
-        handle = salience::SessionHandle::open(client).await?;
-    }
+        for choice in choices {
+            // `readout`, if present, is a nested list
+            // `[num_tokens][n_layers][n_concepts]`. Parse it once per
+            // chunk and pair rows with token ids by index — the rows
+            // are in the same order as `token_ids`.
+            let readouts: Option<Vec<TokenReadout>> = choice["readout"]
+                .as_array()
+                .map(|outer| {
+                    outer.iter().filter_map(|per_token| {
+                        per_token.as_array().map(|layers| {
+                            layers.iter().filter_map(|per_layer| {
+                                per_layer.as_array().map(|vals| {
+                                    vals.iter()
+                                        .filter_map(|v| v.as_f64().map(|f| f as f32))
+                                        .collect::<Vec<f32>>()
+                                })
+                            }).collect::<Vec<Vec<f32>>>()
+                        })
+                    }).collect()
+                });

-    // Walk chunks at byte-level, taking everything past `match_upto`
-    // as the delta. Token chunks can be split mid-way; images live
-    // inline in the token stream, so there's no separate image-chunk
-    // case anymore.
-    let mut acc: u32 = 0;
-    let mut pending: Vec<u32> = Vec::new();
-    for chunk in chunks.iter() {
-        match chunk {
-            WireChunk::Tokens(t) => {
-                let len = t.len() as u32;
-                let chunk_end = acc + len;
-                if chunk_end <= match_upto {
-                    acc = chunk_end;
-                } else if acc < match_upto {
-                    let skip = (match_upto - acc) as usize;
-                    pending.extend_from_slice(&t[skip..]);
-                    acc = chunk_end;
-                } else {
-                    pending.extend_from_slice(t);
-                    acc = chunk_end;
-                }
-            }
-        }
-    }
-
-    // Filter images to those entirely past `match_upto` — anything
-    // before is on the server already (prior turn), anything
-    // straddling is a hard divergence (image partially-sent shouldn't
-    // happen with our atomic AppendImage history; with images-inline
-    // it can only happen if mark_dirty cleared match_upto mid-block,
-    // which the AST mutators prevent).
-    let mut new_images: Vec<pb::ImageAttachment> = Vec::new();
-    for img in &images {
-        if img.pad_end <= match_upto {
-            continue; // already sent on a prior turn
-        }
-        if img.pad_start < match_upto {
-            anyhow::bail!(
-                "session divergence: image at [{},{}) straddles match_upto={}",
-                img.pad_start, img.pad_end, match_upto,
-            );
-        }
-        new_images.push(pb::ImageAttachment {
-            bytes: img.bytes.clone(),
-            mime: img.mime.clone(),
-            pad_range_start: img.pad_start,
-            pad_range_end: img.pad_end,
+            if let Some(ids) = choice["token_ids"].as_array() {
+                for (i, id_val) in ids.iter().enumerate() {
+                    if let Some(id) = id_val.as_u64() {
+                        let readout = readouts
+                            .as_ref()
+                            .and_then(|r| r.get(i).cloned());
+                        let _ = tx.send(StreamToken::Token {
+                            id: id as u32,
+                            readout,
                        });
                    }
+                }
+            } else if let Some(text) = choice["text"].as_str() {
+                // Fallback: provider didn't return token_ids, encode locally.
+                // No readout available in this path — the encoder may
+                // produce a different token count than the server did.
+                if !text.is_empty() {
+                    for id in super::tokenizer::encode(text) {
+                        let _ = tx.send(StreamToken::Token { id, readout: None });
+                    }
+                }
+            }
+        }
+    }

-    // Final Generate: pending holds any trailing text; decode up to
-    // sampling.max_tokens. Request readouts on all decode positions
-    // via a catch-all range ending at u32::MAX — decode never
-    // reaches it.
-    let prompt_len_after_append = handle.committed_len + pending.len() as u32;
-    let readout_ranges = if readout_shape.is_some() {
-        vec![pb::PositionRange {
-            start: prompt_len_after_append,
-            end: u32::MAX,
-        }]
-    } else {
-        Vec::new()
-    };
-    let req = pb::GenerateRequest {
-        session_id: handle.session_id.clone(),
-        append_tokens: pending,
-        offset: handle.committed_len,
-        truncating: false,
-        max_tokens: sampling.max_tokens,
-        logprobs_ranges: Vec::new(),
-        logprob_top_k: 0,
-        readout_ranges,
-        temperature: sampling.temperature,
-        top_p: sampling.top_p,
-        top_k: sampling.top_k,
-        stop_token_ids: Vec::new(),
-        priority: priority.unwrap_or(0),
-        images: new_images,
-    };
-    let session_id_for_log = handle.session_id.clone();
-    let t_generate = Instant::now();
-    log::debug!(target: "grpc",
-        "session {} Generate: offset={} append={} max_tokens={} priority={}",
-        session_id_for_log, req.offset, req.append_tokens.len(),
-        req.max_tokens, req.priority);
-
-    let mut stream = handle.generate(req).await?;
-    let (n_layers, n_concepts) = readout_shape.unwrap_or((0, 0));
-    let mut session_terminated = false;
-    let mut first_token_at: Option<Instant> = None;
-
-    while let Some(event) = stream.next().await {
-        let event = match event {
-            Ok(e) => e,
-            Err(status) => {
-                log::warn!(target: "grpc",
-                    "session {} Generate stream error: {} — dropping session",
-                    session_id_for_log, status);
-                session_terminated = true;
-                let _ = tx.send(StreamToken::Error(format!(
-                    "Generate stream error: {}", status,
-                )));
-                break;
-            }
-        };
-        let Some(inner) = event.event else { continue };
-        match inner {
-            pb::generate_event::Event::Token(t) => {
-                if t.is_prefill { continue; }
-                if first_token_at.is_none() {
-                    log::debug!(target: "grpc",
-                        "session {} first decode token at {:?}",
-                        session_id_for_log, t_generate.elapsed());
-                    first_token_at = Some(Instant::now());
-                }
-                let readout = if t.readout.is_empty() {
-                    None
-                } else if n_layers == 0 || n_concepts == 0 {
-                    None
-                } else {
-                    let expected = (n_layers as usize) * (n_concepts as usize);
-                    if t.readout.len() != expected {
-                        log::warn!(target: "grpc",
-                            "readout shape mismatch: expected {}*{}={}, got {}",
-                            n_layers, n_concepts, expected, t.readout.len());
-                        None
-                    } else {
-                        let n = n_concepts as usize;
-                        let mut layers: Vec<Vec<f32>> = Vec::with_capacity(n_layers as usize);
-                        for l in 0..(n_layers as usize) {
-                            layers.push(t.readout[l * n..(l + 1) * n].to_vec());
-                        }
-                        Some(layers)
-                    }
-                };
-                if tx.send(StreamToken::Token { id: t.id, readout }).is_err() {
-                    break;
-                }
-            }
-            pb::generate_event::Event::Done(d) => {
-                log::debug!(target: "grpc",
-                    "session {} Done: prompt={} completion={} total={} reason={:?} elapsed={:?}",
-                    session_id_for_log, d.prompt_tokens, d.completion_tokens,
-                    d.total_tokens, d.finish_reason, t_generate.elapsed());
-                handle.committed_len = d.total_tokens;
-                let usage = Some(Usage {
-                    prompt_tokens: d.prompt_tokens,
-                    completion_tokens: d.completion_tokens,
-                    total_tokens: d.total_tokens,
-                });
    let _ = tx.send(StreamToken::Done { usage });
-            }
-        }
-    }
-
-    if !session_terminated {
-        let mut guard = session_lock.lock().await;
-        *guard = Some(handle);
-    }
    Ok(())
 }

+/// Send an HTTP request and check for errors.
+pub(crate) async fn send_and_check(
+    client: &HttpClient,
+    url: &str,
+    body: &impl serde::Serialize,
+    auth_header: (&str, &str),
+    extra_headers: &[(&str, &str)],
+    debug_label: &str,
+    request_json: Option<&str>,
+) -> Result<HttpResponse> {
+    let debug = std::env::var("POC_DEBUG").is_ok();
+    let start = Instant::now();
+
+    if debug {
+        let payload_size = serde_json::to_string(body)
+            .map(|s| s.len())
+            .unwrap_or(0);
+        dbglog!(
+            "request: {}K payload, {}",
+            payload_size / 1024, debug_label,
+        );
+    }
+
+    let mut headers: Vec<(&str, &str)> = Vec::with_capacity(extra_headers.len() + 1);
+    headers.push(auth_header);
+    headers.extend_from_slice(extra_headers);
+
+    let response = client
+        .send_json("POST", url, &headers, body)
+        .await
+        .map_err(|e| {
+            let msg = e.to_string();
+            let cause = if msg.contains("connect timeout") || msg.contains("TCP connect") {
+                "connection refused"
+            } else if msg.contains("request timeout") {
+                "request timed out"
+            } else {
+                "request error"
+            };
+            anyhow::anyhow!("{} ({}): {}", cause, url, msg)
+        })?;
+
+    let status = response.status();
+    let elapsed = start.elapsed();
+
+    if debug {
+        for name in [
+            "x-ratelimit-remaining",
+            "x-ratelimit-limit",
+            "x-request-id",
+        ] {
+            if let Some(val) = response.header(name) {
+                dbglog!("header {}: {}", name, val);
+            }
+        }
+    }
+
+    if !status.is_success() {
+        let body = response.text().await.unwrap_or_default();
+        dbglog!(
+            "HTTP {} after {:.1}s ({}): {}",
+            status,
+            elapsed.as_secs_f64(),
+            url,
+            &body[..body.floor_char_boundary(body.len().min(500))]
+        );
+        if let Some(json) = request_json {
+            let log_dir = dirs::home_dir()
+                .unwrap_or_default()
+                .join(".consciousness/logs/failed-requests");
+            let _ = std::fs::create_dir_all(&log_dir);
+            let ts = chrono::Local::now().format("%Y%m%dT%H%M%S");
+            let path = log_dir.join(format!("{}.json", ts));
+            if std::fs::write(&path, json).is_ok() {
+                dbglog!(
+                    "saved failed request to {} (HTTP {})", path.display(), status
+                );
+            }
+        }
+        anyhow::bail!("HTTP {} ({}): {}", status, url, &body[..body.floor_char_boundary(body.len().min(1000))]);
+    }
+
+    if debug {
+        dbglog!(
+            "connected in {:.1}s (HTTP {})",
+            elapsed.as_secs_f64(),
+            status.as_u16()
+        );
+    }
+
+    Ok(response)
+}
+
+/// SSE stream reader. Handles the generic SSE plumbing shared by both
+/// backends: chunk reading with timeout, line buffering, `data:` prefix
+/// stripping, `[DONE]` detection, JSON parsing, and parse error diagnostics.
+/// Yields parsed events as serde_json::Value — each backend handles its
+/// own event types.
+pub(crate) struct SseReader {
+    line_buf: String,
+    chunk_timeout: Duration,
+    pub stream_start: Instant,
+    pub chunks_received: u64,
+    pub sse_lines_parsed: u64,
+    pub sse_parse_errors: u64,
+    debug: bool,
+    done: bool,
+    /// Serialized request payload — saved to disk on errors for replay debugging.
+    pub(crate) request_json: Option<String>,
+}
+
+impl SseReader {
+    pub(crate) fn new() -> Self {
+        Self {
+            line_buf: String::new(),
+            chunk_timeout: Duration::from_secs(crate::config::get().api_stream_timeout_secs),
+            stream_start: Instant::now(),
+            chunks_received: 0,
+            sse_lines_parsed: 0,
+            sse_parse_errors: 0,
+            debug: std::env::var("POC_DEBUG").is_ok(),
+            done: false,
+            request_json: None,
+        }
+    }
+
+    /// Attach the serialized request payload for error diagnostics.
+    /// Save the request payload to disk for replay debugging.
+    fn save_failed_request(&self, reason: &str) {
+        let Some(ref json) = self.request_json else { return };
+        let log_dir = dirs::home_dir()
+            .unwrap_or_default()
+            .join(".consciousness/logs/failed-requests");
+        let _ = std::fs::create_dir_all(&log_dir);
+        let ts = chrono::Local::now().format("%Y%m%dT%H%M%S");
+        let path = log_dir.join(format!("{}.json", ts));
+        if std::fs::write(&path, json).is_ok() {
+            dbglog!(
+                "saved failed request to {} ({})", path.display(), reason
+            );
+        }
+    }
+
+    /// Read the next SSE event from the response stream.
+    /// Returns Ok(Some(value)) for each parsed data line,
+    /// Ok(None) when the stream ends or [DONE] is received.
+    pub(crate) async fn next_event(
+        &mut self,
+        response: &mut HttpResponse,
+    ) -> Result<Option<serde_json::Value>> {
+        loop {
+            // Drain complete lines from the buffer before reading more chunks
+            while let Some(newline_pos) = self.line_buf.find('\n') {
+                let line = self.line_buf[..newline_pos].trim().to_string();
+                self.line_buf = self.line_buf[newline_pos + 1..].to_string();
+
+                if line == "data: [DONE]" {
+                    self.done = true;
+                    return Ok(None);
+                }
+                if line.is_empty()
+                    || line.starts_with("event: ")
+                    || !line.starts_with("data: ")
+                {
+                    continue;
+                }
+
+                let json_str = &line[6..];
+                self.sse_lines_parsed += 1;
+
+                match serde_json::from_str(json_str) {
+                    Ok(v) => return Ok(Some(v)),
+                    Err(e) => {
+                        self.sse_parse_errors += 1;
+                        if self.sse_parse_errors == 1 || self.debug {
+                            let preview = if json_str.len() > 200 {
+                                format!("{}...", &json_str[..200])
+                            } else {
+                                json_str.to_string()
+                            };
+                            dbglog!(
+                                "SSE parse error (#{}) {}: {}",
+                                self.sse_parse_errors, e, preview
+                            );
+                        }
+                        continue;
+                    }
+                }
+            }
+
+            if self.done {
+                return Ok(None);
+            }
+
+            // Read more data from the response stream
+            match tokio::time::timeout(self.chunk_timeout, response.chunk()).await {
+                Ok(Ok(Some(chunk))) => {
+                    self.chunks_received += 1;
+                    self.line_buf.push_str(&String::from_utf8_lossy(&chunk));
+                }
+                Ok(Ok(None)) => return Ok(None),
+                Ok(Err(e)) => {
+                    let buf_preview = if self.line_buf.is_empty() {
+                        "(empty)".to_string()
+                    } else {
+                        let n = self.line_buf.len().min(500);
+                        format!("{}B: {}", self.line_buf.len(), &self.line_buf[..n])
+                    };
+                    let msg = format!(
+                        "stream error after {} chunks, {:.1}s, {} sse lines: {} | buf: {}",
+                        self.chunks_received,
+                        self.stream_start.elapsed().as_secs_f64(),
+                        self.sse_lines_parsed,
+                        e, buf_preview,
+                    );
+                    dbglog!("{}", msg);
+                    self.save_failed_request(&msg);
+                    return Err(e.into());
+                }
+                Err(_) => {
+                    let buf_preview = if self.line_buf.is_empty() {
+                        "(empty)".to_string()
+                    } else {
+                        let n = self.line_buf.len().min(500);
+                        format!("{}B: {}", self.line_buf.len(), &self.line_buf[..n])
+                    };
+                    let msg = format!(
+                        "stream timeout: {}s, {} chunks, {} sse lines, {:.1}s elapsed | buf: {}",
+                        self.chunk_timeout.as_secs(),
+                        self.chunks_received,
+                        self.sse_lines_parsed,
+                        self.stream_start.elapsed().as_secs_f64(),
+                        buf_preview,
+                    );
+                    dbglog!("{}", msg);
+                    self.save_failed_request(&msg);
+                    anyhow::bail!(
+                        "stream timeout: no data for {}s ({} chunks received)",
+                        self.chunk_timeout.as_secs(),
+                        self.chunks_received
+                    );
+                }
+            }
+        }
+    }
+}
--- a/src/agent/api/salience.rs
+++ b/src/agent/api/salience.rs
@ -1,279 +0,0 @@
-// agent/api/salience.rs — gRPC client bindings for salience.v1.
-//
-// Thin wrapper around the tonic-generated types. Every RPC except
-// Generate is unary; Generate is server-streaming. Free functions
-// (open/close session) wrap the lifecycle RPCs; `SessionHandle` just
-// carries the id + connection params so later RPCs can reuse them.
-//
-// The old bidi Session() API is gone — see git history for its shape.
-
-#![allow(clippy::enum_variant_names)]
-
-use anyhow::{Context, Result};
-use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint};
-
-/// Generated prost + tonic types for salience.v1. Call sites use
-/// `pb::OpenSessionRequest`, `pb::Token`, etc.
-pub mod pb {
-    tonic::include_proto!("salience.v1");
-}
-
-pub type SalienceClient = pb::salience_client::SalienceClient<Channel>;
-
-/// Open a TLS-aware gRPC channel to the salience server. `base_url`
-/// looks like `https://host:8443`. User-provided CA certs under
-/// `~/.consciousness/certs/` are trusted in addition to the system
-/// roots (for self-signed server certs).
-///
-/// Returns the raw `Channel` so callers (`ApiClient::salience_client`)
-/// can cache it and clone a `SalienceClient` per request without
-/// reopening the TCP/TLS connection. tonic multiplexes RPCs over the
-/// shared channel automatically.
-pub async fn connect_channel(base_url: &str) -> Result<Channel> {
-    let mut endpoint = Endpoint::from_shared(base_url.to_string())
-        .with_context(|| format!("invalid salience endpoint: {}", base_url))?
-        .connect_timeout(std::time::Duration::from_secs(30))
-        .timeout(std::time::Duration::from_secs(600));
-
-    if base_url.starts_with("https://") {
-        let user_certs = super::http::load_user_certs_pem_bytes();
-        let mut tls = ClientTlsConfig::new().with_native_roots();
-        if !user_certs.is_empty() {
-            tls = tls.ca_certificate(Certificate::from_pem(user_certs));
-        }
-        endpoint = endpoint
-            .tls_config(tls)
-            .with_context(|| "configuring tonic TLS")?;
-    }
-
-    endpoint
-        .connect()
-        .await
-        .with_context(|| format!("failed to connect to salience server at {}", base_url))
-}
-
-/// Derive the gRPC base URL from the HTTP completions base URL.
-///
-/// vLLM's salience gRPC server listens on a different port (8443) from
-/// the HTTP endpoint (8000) and accepts no path component. Given an
-/// HTTP base like `https://host:8000/v1`, produce `https://host:8443`.
-/// No-op when the path is empty and the port isn't 8000.
-pub fn derive_grpc_url(http_base: &str) -> String {
-    let mut url = http_base.trim_end_matches('/').to_string();
-    if let Some(proto_end) = url.find("://") {
-        let rest_start = proto_end + 3;
-        if let Some(path_slash) = url[rest_start..].find('/') {
-            url.truncate(rest_start + path_slash);
-        }
-    }
-    url.replace(":8000", ":8443")
-}
-
-/// Attach a bearer token to a tonic request as gRPC metadata.
-pub fn with_auth<T>(req: &mut tonic::Request<T>, api_key: &str) {
-    if api_key.is_empty() {
-        return;
-    }
-    let bearer = format!("Bearer {}", api_key);
-    if let Ok(val) = bearer.parse() {
-        req.metadata_mut().insert("authorization", val);
-    }
-}
-
-/// Handle to a server-side session. Carries the id + an `ApiClient`
-/// clone (which holds the shared tonic Channel) so subsequent
-/// per-session RPCs go over the process-global connection.
-/// `committed_len` tracks the server's current session.tokens length
-/// so the client can submit deltas with the right `offset`.
-pub struct SessionHandle {
-    pub session_id: String,
-    pub max_model_len: u32,
-    pub committed_len: u32,
-    client: super::ApiClient,
-}
-
-impl SessionHandle {
-    pub async fn open(client: &super::ApiClient) -> Result<Self> {
-        let t0 = std::time::Instant::now();
-        log::debug!(target: "grpc", "OpenSession rpc: start");
-        let mut c = client.salience_client().await?;
-        let mut req = tonic::Request::new(pb::OpenSessionRequest {
-            model: client.model.clone(),
-        });
-        with_auth(&mut req, client.api_key());
-        let resp = c
-            .open_session(req)
-            .await
-            .with_context(|| "OpenSession RPC failed")?
-            .into_inner();
-        log::debug!(target: "grpc",
-            "OpenSession rpc: done session_id={} max_model_len={} elapsed={:?}",
-            resp.session_id, resp.max_model_len, t0.elapsed());
-        Ok(Self {
-            session_id: resp.session_id,
-            max_model_len: resp.max_model_len,
-            committed_len: 0,
-            client: client.clone(),
-        })
-    }
-
-    pub fn client(&self) -> &super::ApiClient { &self.client }
-
-    /// Debug-only: fetch the server's full session.tokens. Used to
-    /// verify client-side accounting byte-for-byte when divergence
-    /// is suspected. Not cheap on large sessions.
-    pub async fn dump_tokens(&self) -> Result<Vec<u32>> {
-        let mut c = self.client.salience_client().await?;
-        let mut req = tonic::Request::new(pb::DumpSessionRequest {
-            session_id: self.session_id.clone(),
-        });
-        with_auth(&mut req, self.client.api_key());
-        let resp = c
-            .dump_session(req)
-            .await
-            .with_context(|| "DumpSession RPC failed")?
-            .into_inner();
-        Ok(resp.tokens)
-    }
-
-    /// Open a gRPC Generate stream with the given request. Caller
-    /// iterates the returned stream of GenerateEvents; the handle's
-    /// `committed_len` should be advanced by the caller on Done based
-    /// on the Done event's `total_tokens` field.
-    pub async fn generate(
-        &self,
-        req: pb::GenerateRequest,
-    ) -> Result<tonic::Streaming<pb::GenerateEvent>> {
-        let t0 = std::time::Instant::now();
-        log::debug!(target: "grpc",
-            "Generate rpc: open-stream session={} offset={} append={} max_tokens={}",
-            self.session_id, req.offset, req.append_tokens.len(), req.max_tokens);
-        let mut c = self.client.salience_client().await?;
-        let mut req = tonic::Request::new(req);
-        with_auth(&mut req, self.client.api_key());
-        let resp = c
-            .generate(req)
-            .await
-            .with_context(|| "Generate RPC failed")?;
-        log::debug!(target: "grpc",
-            "Generate rpc: stream opened session={} open-latency={:?}",
-            self.session_id, t0.elapsed());
-        Ok(resp.into_inner())
-    }
-
-    /// Run a prefill-only Generate (max_tokens=0) that appends the
-    /// given tokens to the session. No decode, no Token events — the
-    /// server just extends session.tokens and runs prefill to warm
-    /// the KV cache. Used to interleave text runs between AppendImage
-    /// calls, and by score paths that want prompt_logprobs without a
-    /// decode step.
-    pub async fn prefill_only(&mut self, tokens: Vec<u32>) -> Result<()> {
-        use futures::StreamExt;
-        let req = pb::GenerateRequest {
-            session_id: self.session_id.clone(),
-            append_tokens: tokens,
-            offset: self.committed_len,
-            truncating: false,
-            max_tokens: 0,
-            logprobs_ranges: Vec::new(),
-            logprob_top_k: 0,
-            readout_ranges: Vec::new(),
-            temperature: 0.0,
-            top_p: 0.0,
-            top_k: 0,
-            stop_token_ids: Vec::new(),
-            priority: 0,
-            images: Vec::new(),
-        };
-        let mut stream = self.generate(req).await?;
-        while let Some(event) = stream.next().await {
-            let event = event.map_err(|s| anyhow::anyhow!("prefill Generate stream: {}", s))?;
-            if let Some(pb::generate_event::Event::Done(d)) = event.event {
-                self.committed_len = d.total_tokens;
-            }
-        }
-        Ok(())
-    }
-}
-
-/// Drop → fire CloseSession in a detached task so servers don't leak
-/// sessions until TTL eviction. Best-effort: if no tokio runtime is
-/// available we skip; the server's 30min TTL will reap it eventually.
-impl Drop for SessionHandle {
-    fn drop(&mut self) {
-        if self.session_id.is_empty() {
-            return;
-        }
-        let session_id = std::mem::take(&mut self.session_id);
-        let client = self.client.clone();
-        let Ok(rt) = tokio::runtime::Handle::try_current() else {
-            log::debug!(target: "grpc",
-                "SessionHandle drop outside tokio runtime, session {} leaks to TTL",
-                session_id);
-            return;
-        };
-        rt.spawn(async move {
-            let Ok(mut c) = client.salience_client().await else { return };
-            let mut req = tonic::Request::new(pb::CloseSessionRequest {
-                session_id: session_id.clone(),
-            });
-            with_auth(&mut req, client.api_key());
-            if let Err(e) = c.close_session(req).await {
-                log::debug!(target: "grpc",
-                    "CloseSession on drop failed for {}: {:#}",
-                    session_id, e);
-            }
-        });
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn generated_types_compile() {
-        // Exercise the shape of the new proto types — if build.rs
-        // stops regenerating against the proto, this stops compiling.
-        let _open = pb::OpenSessionRequest {
-            model: "qwen3-vl".into(),
-        };
-        let _tok = pb::Token {
-            id: 42,
-            position: 0,
-            is_prefill: false,
-            readout: vec![0.1, 0.2, 0.3],
-            logprobs: vec![pb::TokenLogprob {
-                id: 1,
-                logprob: -0.5,
-            }],
-            sampled_logprob: -0.1,
-            has_sampled_logprob: true,
-        };
-        let _done = pb::GenerateDone {
-            prompt_tokens: 10,
-            completion_tokens: 20,
-            total_tokens: 30,
-            finish_reason: pb::generate_done::FinishReason::Eos as i32,
-        };
-        let _evt = pb::GenerateEvent {
-            event: Some(pb::generate_event::Event::Done(_done)),
-        };
-    }
-
-    #[test]
-    fn derive_grpc_url_cases() {
-        assert_eq!(
-            derive_grpc_url("https://host:8000/v1"),
-            "https://host:8443",
-        );
-        assert_eq!(
-            derive_grpc_url("https://host:8000/"),
-            "https://host:8443",
-        );
-        assert_eq!(
-            derive_grpc_url("https://host:9000/v1"),
-            "https://host:9000",
-        );
-    }
-}
--- a/src/agent/context.rs
+++ b/src/agent/context.rs
@ -125,19 +125,7 @@ impl<'de> Deserialize<'de> for NodeLeaf {
            body: NodeBody,
            timestamp: DateTime<Utc>,
        }
-        let mut raw = Raw::deserialize(deserializer)?;
-        // Heal pre-refactor logs: Image leaves used to be deserialized
-        // with token_count=0 (server-authoritative count was applied
-        // after AppendImage). With pads now expanded client-side at
-        // construction, recompute from the persisted dimensions if
-        // the stored count is 0.
-        if let NodeBody::Image { orig_height, orig_width, token_count, .. }
-            = &mut raw.body
-        {
-            if *token_count == 0 {
-                *token_count = qwen3_image_token_count(*orig_height, *orig_width);
-            }
-        }
+        let raw = Raw::deserialize(deserializer)?;
        let token_ids = raw.body.compute_token_ids();
        Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp })
    }
@ -155,44 +143,18 @@ pub enum AstNode {
        /// Maps memory key → divergence score for this response.
        #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
        memory_scores: std::collections::BTreeMap<String, f64>,
-        /// Cached token stream for the subtree. When `Some`, wire-out
-        /// uses these bytes verbatim and skips recursion into children.
-        /// Populated by the response parser from the server's exact
-        /// stream; also computable from children as a fallback. Cleared
-        /// on any edit to a descendant. Not serialized — transient.
-        #[serde(skip, default)]
-        token_ids: Option<Vec<u32>>,
    },
 }

 /// The context window: four sections as Vec<AstNode>.
-///
-/// All mutation MUST go through `ContextState`'s public methods. Two
-/// invariants ride on this:
-/// 1. Every `Leaf.token_ids` matches its `body.compute_token_ids()`.
-/// 2. For every `Branch { token_ids: Some(cached), .. }`, the cached
-///    token stream matches what `wire_into` would produce by walking
-///    `children` from scratch. Any mutation that touches a Branch's
-///    children — directly or via a descendant — must clear the
-///    Branch's `token_ids` so it gets recomputed on next wire-out.
-///
-/// The `&mut Vec<AstNode>` escape hatches are intentionally NOT
-/// exposed; if you find yourself wanting one, add a focused method
-/// here that maintains the invariants.
+/// All mutation goes through ContextState methods to maintain the invariant
+/// that token_ids on every leaf matches its rendered text.
 pub struct ContextState {
    system: Vec<AstNode>,
    identity: Vec<AstNode>,
    journal: Vec<AstNode>,
    conversation: Vec<AstNode>,
    pub conversation_log: Option<crate::mind::log::ConversationLog>,
-    /// Length of the session's token stream on the server, as of the
-    /// last Done event. Updated by the grpc layer.
-    server_committed_len: u32,
-    /// Prefix length of our walk that still matches the server's
-    /// session.tokens byte-for-byte. When < `server_committed_len`
-    /// the session needs rewinding (truncating=true at this offset).
-    /// Reset to 0 on any mutation that could have changed sent bytes.
-    client_match_upto: u32,
 }

 impl Clone for ContextState {
@ -203,8 +165,6 @@ impl Clone for ContextState {
            journal: self.journal.clone(),
            conversation: self.conversation.clone(),
            conversation_log: None, // forked contexts don't log
-            server_committed_len: self.server_committed_len,
-            client_match_upto: self.client_match_upto,
        }
    }
 }
@ -241,10 +201,6 @@ pub struct ResponseParser {
    think_buf: String,
    in_tool_call: bool,
    tool_call_buf: String,
-    /// Raw generated token IDs, in arrival order. Combined with the
-    /// prologue at `finish` to stamp the Branch's authoritative
-    /// token cache — the bytes the server has for this branch.
-    generated_tokens: Vec<u32>,
 }

 impl Role {
@ -356,16 +312,6 @@ impl NodeLeaf {
    pub fn token_ids(&self) -> &[u32]    { &self.token_ids }
    pub fn tokens(&self) -> usize        { self.token_ids.len() }
    pub fn timestamp(&self) -> DateTime<Utc> { self.timestamp }
-
-    /// If this is an Image leaf, update its IMAGE_PAD count to `n` and
-    /// recompute cached `token_ids`. No-op on non-Image leaves —
-    /// callers know the body shape via `body()`.
-    pub fn set_image_token_count(&mut self, n: u32) {
-        if let NodeBody::Image { token_count, .. } = &mut self.body {
-            *token_count = n;
-            self.token_ids = self.body.compute_token_ids();
-        }
-    }
 }

 impl AstNode {
@ -414,9 +360,6 @@ impl AstNode {
        orig_height: u32,
        orig_width: u32,
    ) -> Self {
-        // Pad count is computed eagerly from dimensions — no more
-        // "unknown until server responds" shape. Server validates
-        // on the Generate call; mismatches fail loud.
        let token_count = qwen3_image_token_count(orig_height, orig_width);
        Self::Leaf(NodeLeaf::new(NodeBody::Image {
            bytes,
@ -430,13 +373,7 @@ impl AstNode {
    // -- Branch constructors --------------------------------------------------

    pub fn branch(role: Role, children: Vec<AstNode>) -> Self {
-        Self::Branch {
-            role,
-            children,
-            timestamp: Utc::now(),
-            memory_scores: Default::default(),
-            token_ids: None,
-        }
+        Self::Branch { role, children, timestamp: Utc::now(), memory_scores: Default::default() }
    }

    pub fn system_msg(text: impl Into<String>) -> Self {
@ -445,7 +382,6 @@ impl AstNode {
            children: vec![Self::content(text)],
            timestamp: Utc::now(),
            memory_scores: Default::default(),
-            token_ids: None,
        }
    }

@ -455,7 +391,6 @@ impl AstNode {
            children: vec![Self::content(text)],
            timestamp: Utc::now(),
            memory_scores: Default::default(),
-            token_ids: None,
        }
    }

@ -467,12 +402,11 @@ impl AstNode {
                let token_ids = leaf.body.compute_token_ids();
                Self::Leaf(NodeLeaf { token_ids, ..leaf })
            }
-            Self::Branch { role, children, timestamp, memory_scores, .. } => Self::Branch {
+            Self::Branch { role, children, timestamp, memory_scores } => Self::Branch {
                role,
                children: children.into_iter().map(|c| c.retokenize()).collect(),
                timestamp,
                memory_scores,
-                token_ids: None,
            },
        }
    }
@ -549,10 +483,7 @@ impl AstNode {
    fn token_ids_into(&self, out: &mut Vec<u32>) {
        match self {
            Self::Leaf(leaf) => out.extend_from_slice(&leaf.token_ids),
-            Self::Branch { token_ids: Some(cached), .. } => {
-                out.extend_from_slice(cached);
-            }
-            Self::Branch { role, children, token_ids: None, .. } => {
+            Self::Branch { role, children, .. } => {
                out.push(tokenizer::IM_START);
                out.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
                for child in children {
@ -581,8 +512,7 @@ impl Ast for AstNode {
    fn tokens(&self) -> usize {
        match self {
            Self::Leaf(leaf) => leaf.tokens(),
-            Self::Branch { token_ids: Some(cached), .. } => cached.len(),
-            Self::Branch { role, children, token_ids: None, .. } => {
+            Self::Branch { role, children, .. } => {
                1 + role_header_tokens(*role)
                    + children.iter().map(|c| c.tokens()).sum::<usize>()
                    + 1 + newline_tokens()
@ -736,7 +666,6 @@ impl ResponseParser {
            think_buf: String::new(),
            in_tool_call: false,
            tool_call_buf: String::new(),
-            generated_tokens: Vec::new(),
        }
    }

@ -755,34 +684,18 @@ impl ResponseParser {
        let handle = tokio::spawn(async move {
            let mut parser = self;
            let agent_name = agent.state.lock().await.provenance.clone();
-            eprintln!(
-                "[agent:{agent_name}] parser task start branch_idx={} in_think={}",
-                parser.branch_idx, parser.in_think,
-            );
            let log_path = format!("/tmp/poc-{}.log", agent_name);
            let mut log_file = std::fs::OpenOptions::new()
                .create(true).append(true).open(&log_path).ok();
            let mut full_text = String::new();
-            let mut token_count: usize = 0;
            while let Some(event) = stream.recv().await {
                match event {
                    super::api::StreamToken::Token { id, readout } => {
-                        token_count += 1;
-                        if token_count == 1 {
-                            eprintln!("[agent:{agent_name}] parser first token id={}", id);
-                        } else if token_count % 256 == 0 {
-                            eprintln!(
-                                "[agent:{agent_name}] parser token_count={} chars={}",
-                                token_count,
-                                full_text.len(),
-                            );
-                        }
                        if let Some(r) = readout {
                            if let Ok(mut buf) = agent.readout.lock() {
                                buf.push(id, r);
                            }
                        }
-                        parser.generated_tokens.push(id);
                        let text = super::tokenizer::decode(&[id]);
                        full_text.push_str(&text);
                        let mut ctx = agent.context.lock().await;
@ -801,12 +714,6 @@ impl ResponseParser {
                        }
                    }
                    super::api::StreamToken::Done { usage } => {
-                        eprintln!(
-                            "[agent:{agent_name}] parser done token_count={} chars={} usage={:?}",
-                            token_count,
-                            full_text.len(),
-                            usage,
-                        );
                        if let Some(ref mut f) = log_file {
                            use std::io::Write;
                            let ctx = agent.context.lock().await;
@ -823,31 +730,19 @@ impl ResponseParser {
                                let _ = writeln!(f, "  unparsed text: {}", &full_text[..end]);
                            }
                        }
-                        if let Some(ref u) = usage {
+                        if let Some(u) = usage {
                            agent.state.lock().await.last_prompt_tokens = u.prompt_tokens;
                        }
                        let mut ctx = agent.context.lock().await;
                        parser.finish(&mut ctx);
-                        if let Some(u) = usage {
-                            ctx.note_session_synced(u.total_tokens);
-                        }
                        return Ok(());
                    }
                    super::api::StreamToken::Error(e) => {
-                        eprintln!("[agent:{agent_name}] parser stream error: {}", e);
                        return Err(anyhow::anyhow!("{}", e));
                    }
                }
            }
-            eprintln!(
-                "[agent:{agent_name}] parser stream closed without done token_count={} chars={}",
-                token_count,
-                full_text.len(),
-            );
-            Err(anyhow::anyhow!(
-                "stream closed without Done event after {} tokens",
-                token_count,
-            ))
+            Ok(())
        });
        (rx, handle)
    }
@ -928,7 +823,7 @@ impl ResponseParser {
    }

    fn push_child(&self, ctx: &mut ContextState, child: AstNode) {
-        ctx.push_child_raw(Section::Conversation, self.branch_idx, child);
+        ctx.push_child(Section::Conversation, self.branch_idx, child);
    }

    fn flush_content(&mut self, ctx: &mut ContextState) {
@ -942,69 +837,10 @@ impl ResponseParser {
    }

    pub fn finish(mut self, ctx: &mut ContextState) {
-        // Salvage any in-flight tag accumulators if the stream ended
-        // before the close tag arrived (max_tokens, premature EOS,
-        // server-side cancel). Without this, an unterminated
-        // <think>...</think> drops all of self.think_buf and only the
-        // trailing rolling window in self.buf survives — observed as
-        // "responses cut off, only the last ~8 characters come
-        // through" because drain_safe keeps `close_tag.len()` bytes
-        // (8 for `</think>`) at the tail of buf.
-        if self.in_think {
        if !self.buf.is_empty() {
-                self.think_buf.push_str(&std::mem::take(&mut self.buf));
-            }
-            let text = std::mem::take(&mut self.think_buf).trim().to_string();
-            if !text.is_empty() {
-                self.push_child(ctx, AstNode::thinking(text));
-            }
-            self.in_think = false;
-        } else if self.in_tool_call {
-            if !self.buf.is_empty() {
-                self.tool_call_buf.push_str(&std::mem::take(&mut self.buf));
-            }
-            let body = std::mem::take(&mut self.tool_call_buf);
-            match parse_tool_call_body(&body) {
-                Some((name, args)) => {
-                    self.flush_content(ctx);
-                    self.push_child(ctx, AstNode::tool_call(&name, &args));
-                }
-                None => {
-                    // Body's likely incomplete (no `</tool_call>` ever
-                    // arrived). Wrap as content with the open tag so the
-                    // model can see its own truncated attempt next turn
-                    // rather than losing it silently.
-                    self.content_parts.push(format!("<tool_call>\n{}", body));
-                }
-            }
-            self.in_tool_call = false;
-        } else if !self.buf.is_empty() {
            self.content_parts.push(std::mem::take(&mut self.buf));
        }
        self.flush_content(ctx);
-
-        // Stamp the authoritative token cache onto the branch.
-        // Layout mirrors the full chat-template rendering of a
-        // message block:
-        //
-        //   IM_START + "assistant\n" [+ "<think>\n"]   (prologue — what we sent)
-        //   + generated_tokens                          (what the server generated, ends in IM_END)
-        //   + "\n"                                      (trailing newline — template-required)
-        //
-        // Server only has through the IM_END (model stops on it,
-        // doesn't emit "\n"). Match-upto lands inside the cache
-        // right after IM_END; the chunk-walk's straddle path picks
-        // up the trailing "\n" as the head of the next turn's delta.
-        // The "\n" between turns matters: without it Qwen sees
-        // `<|im_end|><|im_start|>` back-to-back (no newline) and
-        // responds with garbage.
-        let prologue_text = if self.in_think { "assistant\n<think>\n" } else { "assistant\n" };
-        let mut cache = Vec::with_capacity(1 + self.generated_tokens.len() + 8);
-        cache.push(tokenizer::IM_START);
-        cache.extend(tokenizer::encode(prologue_text));
-        cache.extend(self.generated_tokens);
-        cache.extend(tokenizer::encode("\n"));
-        ctx.set_branch_cache(Section::Conversation, self.branch_idx, cache);
    }
 }

@ -1016,77 +852,20 @@ impl ContextState {
            journal: Vec::new(),
            conversation: Vec::new(),
            conversation_log: None,
-            server_committed_len: 0,
-            client_match_upto: 0,
        }
    }

-    // -- Server sync tracking -------------------------------------------------
-
-    /// Length of the session's token stream on the server. Updated by
-    /// the grpc layer from Generate Done events.
-    pub fn server_committed_len(&self) -> u32 { self.server_committed_len }
-
-    /// Prefix of our walk we still believe matches the server
-    /// byte-for-byte. If less than `server_committed_len`, the next
-    /// Generate must send `truncating=true` at this offset.
-    pub fn client_match_upto(&self) -> u32 { self.client_match_upto }
-
-    /// Called by the grpc layer after a successful Generate Done:
-    /// records both the server's new length and the fact that we
-    /// match up to it (we just sent everything).
-    pub fn note_session_synced(&mut self, total_tokens: u32) {
-        self.server_committed_len = total_tokens;
-        self.client_match_upto = total_tokens;
-    }
-
-    /// Reset match-upto to 0. Called from every mutation that could
-    /// have touched a region the server already has. For now,
-    /// conservatively drops alignment entirely — finer-grained
-    /// tracking (match-upto at the mutated node's offset) is a
-    /// future optimization.
-    fn mark_dirty(&mut self) {
-        self.client_match_upto = 0;
-    }
-
    // -- Read access ----------------------------------------------------------

    pub fn system(&self) -> &[AstNode]       { &self.system }
    pub fn identity(&self) -> &[AstNode]     { &self.identity }
    pub fn journal(&self) -> &[AstNode]      { &self.journal }
    pub fn conversation(&self) -> &[AstNode] { &self.conversation }
-
-    /// Set or clear a single `memory_scores` entry on an Assistant
-    /// Branch. Used by the full-matrix scorer to attribute per-memory
-    /// divergence onto the response. `score = None` removes the key;
-    /// `Some(s)` inserts/overwrites.
-    ///
-    /// Doesn't affect the Branch's token cache: `memory_scores` is a
-    /// serialized-but-non-tokenizing annotation. No-op (with a debug
-    /// log) if the index points to a Leaf or a non-Assistant Branch —
-    /// callers are typically iterating on stale indices and we'd
-    /// rather skip than panic.
-    pub fn set_branch_memory_score(
-        &mut self,
-        section: Section,
-        index: usize,
-        key: &str,
-        score: Option<f64>,
-    ) {
-        let nodes = self.section_mut(section);
-        let Some(node) = nodes.get_mut(index) else { return };
-        let AstNode::Branch { role: Role::Assistant, memory_scores, .. } = node
-        else { return };
-        match score {
-            Some(s) => { memory_scores.insert(key.to_string(), s); }
-            None => { memory_scores.remove(key); }
-        }
-    }
+    pub fn conversation_mut(&mut self) -> &mut Vec<AstNode> { &mut self.conversation }

    pub fn sections(&self) -> [&Vec<AstNode>; 4] {
        [&self.system, &self.identity, &self.journal, &self.conversation]
    }
-
 }

 impl Ast for ContextState {
@ -1119,63 +898,30 @@ impl Ast for ContextState {
 }

 /// An image collected from the AST for a request body. The AST stores
-/// Image metadata collected during `wire_chunks` — the binary +
-/// mime plus the absolute token-position range of the image's
-/// pre-expanded placeholder run in the full wire stream. Sent
-/// alongside `append_tokens` in `GenerateRequest` so the server
-/// can attach vision features to the declared positions. Positions
-/// are absolute within the full wire walk starting at offset 0,
-/// i.e. the same coordinate system as `session.tokens` on the
-/// server once the walk has been applied.
-#[derive(Clone)]
+/// the pre-expanded token form (N image_pads) for accurate budget
+/// accounting; the wire form collapses each Image to a single
+/// `<|image_pad|>` between vision bookends and ships the bytes
+/// separately as multi_modal_data.
 pub struct WireImage {
    pub bytes: Vec<u8>,
    pub mime: String,
-    pub pad_start: u32,
-    pub pad_end: u32,
-}
-
-/// One piece of the wire stream for the gRPC session path. Since
-/// images now live inline in the token stream (pre-expanded at AST
-/// construction time), there's only one variant — a run of tokens.
-/// The parallel `Vec<WireImage>` returned by `wire_chunks` gives the
-/// binary + position metadata for each embedded image.
-#[derive(Clone)]
-pub enum WireChunk {
-    Tokens(Vec<u32>),
 }

 fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>) {
    match node {
        AstNode::Leaf(leaf) => match leaf.body() {
            NodeBody::Image { bytes, mime, .. } => {
-                // The Image leaf's token_ids is already
-                // [VISION_START, IMAGE_PAD * N, VISION_END]. Inline
-                // those into the token stream and record the pad-run
-                // range so the server can attach features to the
-                // declared positions.
-                let pad_start = tokens.len() as u32;
-                tokens.extend_from_slice(leaf.token_ids());
-                let pad_end = tokens.len() as u32;
+                tokens.push(tokenizer::VISION_START);
+                tokens.push(tokenizer::IMAGE_PAD);
+                tokens.push(tokenizer::VISION_END);
                images.push(WireImage {
                    bytes: bytes.clone(),
                    mime: mime.clone(),
-                    pad_start,
-                    pad_end,
                });
            }
            _ => tokens.extend_from_slice(leaf.token_ids()),
        },
-        AstNode::Branch { token_ids: Some(cached), children, .. } => {
-            // Cached branches still need their image children paired
-            // up with the vision-block ranges embedded in the cached
-            // token stream — the cache captures vision tokens but not
-            // the matching bytes/mime.
-            let base = tokens.len() as u32;
-            tokens.extend_from_slice(cached);
-            pair_cached_images(cached, children, base, images);
-        }
-        AstNode::Branch { role, children, token_ids: None, .. } => {
+        AstNode::Branch { role, children, .. } => {
            tokens.push(tokenizer::IM_START);
            tokens.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
            for c in children {
@ -1187,101 +933,6 @@ fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>)
    }
 }

-/// Depth-first iterator over Image leaves under a slice of AST nodes.
-/// Yields `(bytes, mime)` borrows in document order; doesn't allocate
-/// per yield (only a stack of pending nodes).
-struct ImageLeaves<'a> {
-    stack: Vec<&'a AstNode>,
-}
-
-impl<'a> ImageLeaves<'a> {
-    fn new(nodes: &'a [AstNode]) -> Self {
-        let mut stack = Vec::with_capacity(nodes.len());
-        stack.extend(nodes.iter().rev());
-        Self { stack }
-    }
-}
-
-impl<'a> Iterator for ImageLeaves<'a> {
-    type Item = (&'a [u8], &'a str);
-    fn next(&mut self) -> Option<Self::Item> {
-        while let Some(node) = self.stack.pop() {
-            match node {
-                AstNode::Leaf(leaf) => {
-                    if let NodeBody::Image { bytes, mime, .. } = leaf.body() {
-                        return Some((bytes, mime));
-                    }
-                }
-                AstNode::Branch { children, .. } => {
-                    self.stack.extend(children.iter().rev());
-                }
-            }
-        }
-        None
-    }
-}
-
-/// Iterator over `(start, end)` token-offset pairs for each
-/// `VISION_START..VISION_END` block in a token slice. Panics on an
-/// unmatched VISION_START — that's an upstream tokenization bug
-/// worth a loud failure.
-fn vision_blocks(cached: &[u32]) -> impl Iterator<Item = (usize, usize)> + '_ {
-    let mut cur = 0;
-    std::iter::from_fn(move || {
-        while cur < cached.len() {
-            if cached[cur] == tokenizer::VISION_START {
-                let start = cur;
-                let end_rel = cached[cur..].iter()
-                    .position(|&t| t == tokenizer::VISION_END)
-                    .unwrap_or_else(|| panic!(
-                        "unmatched VISION_START at offset {} in cached branch",
-                        start));
-                let end = cur + end_rel + 1;
-                cur = end;
-                return Some((start, end));
-            }
-            cur += 1;
-        }
-        None
-    })
-}
-
-/// For a Branch whose `token_ids` are cached and may contain inlined
-/// vision blocks (`VISION_START + IMAGE_PAD*N + VISION_END`), recover
-/// the matching image bytes/mime from the children and emit one
-/// `WireImage` per vision block with the absolute pad offsets in the
-/// parent token stream.
-///
-/// The cache stores tokens but not image payloads; the AST stores
-/// image payloads in the children but not their post-cache positions.
-/// Pair them by zipping the two iterators; mismatched counts panic
-/// loudly because that's an AST/cache invariant violation that
-/// would otherwise mis-pair images on the wire.
-fn pair_cached_images(
-    cached: &[u32],
-    children: &[AstNode],
-    base_offset: u32,
-    images: &mut Vec<WireImage>,
-) {
-    let mut blocks = vision_blocks(cached);
-    let mut leaves = ImageLeaves::new(children);
-    loop {
-        match (blocks.next(), leaves.next()) {
-            (Some((s, e)), Some((bytes, mime))) => images.push(WireImage {
-                bytes: bytes.to_vec(),
-                mime: mime.to_string(),
-                pad_start: base_offset + s as u32,
-                pad_end: base_offset + e as u32,
-            }),
-            (None, None) => break,
-            (Some(_), None) => panic!(
-                "cached branch has more vision blocks than image children"),
-            (None, Some(_)) => panic!(
-                "cached branch has fewer vision blocks than image children"),
-        }
-    }
-}
-
 pub fn memory_key(node: &AstNode) -> Option<&str> {
    match node {
        AstNode::Leaf(leaf) => match leaf.body() {
@ -1391,92 +1042,6 @@ impl ContextState {
        }
        (tokens, images, assistant_ranges)
    }
-
-    /// Build the wire stream as interleaved `WireChunk`s for the gRPC
-    /// session path. Returns a tuple of (chunks, images): the chunks
-    /// hold the full token stream (with vision blocks inlined as
-    /// `VISION_START + IMAGE_PAD*N + VISION_END`), and the images
-    /// list carries each embedded image's binary + position range so
-    /// the gRPC layer can attach them via `GenerateRequest.images`.
-    ///
-    /// Note: with images inlined into the token stream, the chunks
-    /// list is structurally a single `Tokens` chunk in the common
-    /// case — the multi-chunk shape persists only because some
-    /// callers may want the option of inserting breakpoints later.
-    ///
-    /// `conv_range` and `skip` mirror `wire_prompt` — select a
-    /// conversation slice and drop identity / conversation nodes by
-    /// predicate.
-    pub fn wire_chunks<F>(
-        &self,
-        conv_range: std::ops::Range<usize>,
-        mut skip: F,
-    ) -> (Vec<WireChunk>, Vec<WireImage>)
-    where F: FnMut(&AstNode) -> bool,
-    {
-        let mut buf: Vec<u32> = Vec::new();
-        let mut images: Vec<WireImage> = Vec::new();
-
-        fn visit(
-            node: &AstNode,
-            buf: &mut Vec<u32>,
-            images: &mut Vec<WireImage>,
-        ) {
-            match node {
-                AstNode::Leaf(leaf) => match leaf.body() {
-                    NodeBody::Image { bytes, mime, .. } => {
-                        // Pre-expanded vision block lives in
-                        // leaf.token_ids: [VISION_START, IMAGE_PAD*N,
-                        // VISION_END]. Inline + record the range.
-                        let pad_start = buf.len() as u32;
-                        buf.extend_from_slice(leaf.token_ids());
-                        let pad_end = buf.len() as u32;
-                        images.push(WireImage {
-                            bytes: bytes.clone(),
-                            mime: mime.clone(),
-                            pad_start,
-                            pad_end,
-                        });
-                    }
-                    _ => buf.extend_from_slice(leaf.token_ids()),
-                },
-                AstNode::Branch { token_ids: Some(cached), children, .. } => {
-                    // Same fix as wire_into's cached arm: the cache
-                    // holds vision tokens but not the matching bytes,
-                    // so walk children to recover them.
-                    let base = buf.len() as u32;
-                    buf.extend_from_slice(cached);
-                    pair_cached_images(cached, children, base, images);
-                }
-                AstNode::Branch { role, children, token_ids: None, .. } => {
-                    buf.push(tokenizer::IM_START);
-                    buf.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
-                    for c in children {
-                        visit(c, buf, images);
-                    }
-                    buf.push(tokenizer::IM_END);
-                    buf.extend(tokenizer::encode("\n"));
-                }
-            }
-        }
-
-        for node in self.system()   { visit(node, &mut buf, &mut images); }
-        for node in self.identity() {
-            if skip(node) { continue; }
-            visit(node, &mut buf, &mut images);
-        }
-        for node in self.journal()  { visit(node, &mut buf, &mut images); }
-        for node in &self.conversation()[conv_range] {
-            if skip(node) { continue; }
-            visit(node, &mut buf, &mut images);
-        }
-        let chunks = if buf.is_empty() {
-            Vec::new()
-        } else {
-            vec![WireChunk::Tokens(buf)]
-        };
-        (chunks, images)
-    }
 }

 impl ContextState {
@ -1496,27 +1061,17 @@ impl ContextState {
                dbglog!("warning: log: {:#}", e);
            }
        }
-        // Conversation appends always go to the tail — past committed —
-        // so they don't break the match. Any other section mutates a
-        // region the server may already have, so drop alignment.
-        if section != Section::Conversation {
-            self.mark_dirty();
-        }
        self.section_mut(section).push(node);
    }

    /// Push without logging.
    pub fn push_no_log(&mut self, section: Section, node: AstNode) {
-        if section != Section::Conversation {
-            self.mark_dirty();
-        }
        self.section_mut(section).push(node);
    }

    /// Replace the body of a leaf at `index` in `section`.
    /// Re-tokenizes to maintain the invariant.
    pub fn set_message(&mut self, section: Section, index: usize, body: NodeBody) {
-        self.mark_dirty();
        let nodes = self.section_mut(section);
        let node = &mut nodes[index];
        match node {
@ -1542,12 +1097,10 @@ impl ContextState {
    }

    pub fn del(&mut self, section: Section, index: usize) -> AstNode {
-        self.mark_dirty();
        self.section_mut(section).remove(index)
    }

    pub fn clear(&mut self, section: Section) {
-        self.mark_dirty();
        self.section_mut(section).clear();
    }

@ -1568,7 +1121,6 @@ impl ContextState {
    ///          are > 50% of conversation tokens) or oldest conversation entry.
    /// Phase 3: Snap to user message boundary at start.
    pub fn trim_conversation(&mut self) {
-        self.mark_dirty();
        let max_tokens = context_budget_tokens();
        let fixed = self.system.iter().map(|n| n.tokens()).sum::<usize>()
            + self.identity.iter().map(|n| n.tokens()).sum::<usize>()
@ -1645,49 +1197,11 @@ impl ContextState {
    }

    /// Push a child node into a branch at `index` in `section`.
-    /// Clears the branch's cached token stream — wire-out will recompute
-    /// from children until the cache is repopulated. If the cache was
-    /// populated (server had these bytes), drops session alignment.
    pub fn push_child(&mut self, section: Section, index: usize, child: AstNode) {
-        let node = &mut self.section_mut(section)[index];
-        let was_cached = matches!(node, AstNode::Branch { token_ids: Some(_), .. });
-        match node {
-            AstNode::Branch { children, token_ids, .. } => {
-                children.push(child);
-                *token_ids = None;
-            }
-            AstNode::Leaf(_) => panic!("push_child on leaf node"),
-        }
-        if was_cached {
-            self.mark_dirty();
-        }
-    }
-
-    /// Like `push_child` but preserves the branch's cached token stream.
-    /// Used by the response parser, which is simultaneously populating
-    /// the cache from the authoritative server stream and pushing the
-    /// parsed-out children — the two stay consistent by construction.
-    /// Module-private: callers outside `context.rs` must go through
-    /// `push_child` so the invariant is maintained.
-    fn push_child_raw(&mut self, section: Section, index: usize, child: AstNode) {
        let node = &mut self.section_mut(section)[index];
        match node {
            AstNode::Branch { children, .. } => children.push(child),
-            AstNode::Leaf(_) => panic!("push_child_raw on leaf node"),
-        }
-    }
-
-    /// Stamp a verbatim token cache onto the branch at `index` in
-    /// `section`. Used by the response parser to record the server's
-    /// authoritative token stream for the just-finished turn.
-    /// Module-private: the cache is an invariant-load-bearing piece
-    /// of state, populated only by code that holds the server's
-    /// ground truth.
-    fn set_branch_cache(&mut self, section: Section, index: usize, tokens: Vec<u32>) {
-        let node = &mut self.section_mut(section)[index];
-        match node {
-            AstNode::Branch { token_ids, .. } => *token_ids = Some(tokens),
-            AstNode::Leaf(_) => panic!("set_branch_cache on leaf node"),
+            AstNode::Leaf(_) => panic!("push_child on leaf node"),
        }
    }

@ -1711,14 +1225,6 @@ impl ContextState {
 // to at request time. Constants come from Qwen3.5-27B's preprocessor_config.
 // ---------------------------------------------------------------------------

-// Production client-side computation of image-token expansion. With
-// the delta-session protocol, the client writes the pre-expanded
-// vision block (VISION_START + N*IMAGE_PAD + VISION_END) directly
-// into the token stream at Image-leaf construction time, and tells
-// the server where each image's pad run lives via
-// GenerateRequest.images. Server validates that this N matches
-// what the vision encoder actually produces and rejects on
-// mismatch — so drift here fails loudly, not silently.
 const QWEN3_PATCH_SIZE: u32 = 16;
 const QWEN3_MERGE_SIZE: u32 = 2;
 const QWEN3_MIN_PIXELS: u64 = 65_536;
@ -1752,10 +1258,11 @@ fn smart_resize(h: u32, w: u32, factor: u32, min_pixels: u64, max_pixels: u64) -
    }
 }

-/// How many `<|image_pad|>` tokens the Qwen3-VL vision encoder will
-/// produce for an image of the given dimensions. Server verifies
-/// this count against its own encoder run and rejects on mismatch.
-pub fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
+/// Compute how many `<|image_pad|>` tokens vLLM will emit for an image of
+/// the given dimensions. Matches Qwen3VL's feature-size calculation exactly:
+///   (grid_h * grid_w) / merge_size^2
+/// where (grid_h, grid_w) = resized dims / patch_size.
+fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
    let factor = QWEN3_PATCH_SIZE * QWEN3_MERGE_SIZE;
    let (rh, rw) = smart_resize(orig_h, orig_w, factor, QWEN3_MIN_PIXELS, QWEN3_MAX_PIXELS);
    (rh / QWEN3_PATCH_SIZE) * (rw / QWEN3_PATCH_SIZE) / (QWEN3_MERGE_SIZE * QWEN3_MERGE_SIZE)
@ -2206,34 +1713,29 @@ mod tests {
    }

    #[test]
-    fn test_wire_prompt_preserves_expanded_image_pads() {
+    fn test_wire_prompt_collapses_image_pads() {
        let mut ctx = ContextState::new();
        ctx.push_no_log(Section::Conversation, AstNode::branch(Role::User, vec![
            AstNode::content("look:"),
            AstNode::image(vec![0xDE, 0xAD], "image/png", 512, 512),
        ]));

-        // AST side and wire side should both carry N image_pads + bookends —
-        // server's session.tokens length must match what vLLM's engine will
-        // actually process. Binary image bytes are shipped separately in
-        // multi_modal_data via the WireImage list.
-        let n_expected = qwen3_image_token_count(512, 512) as usize;
-
+        // AST side: N image_pads + bookends, full budget accounting.
        let full = ctx.token_ids();
        let n_image_pads_full = full.iter()
            .filter(|&&t| t == tokenizer::IMAGE_PAD).count();
-        assert_eq!(n_image_pads_full, n_expected);
+        assert_eq!(n_image_pads_full, qwen3_image_token_count(512, 512) as usize);

+        // Wire side: single image_pad, bytes moved to images list.
        let (wire, images, _) = ctx.wire_prompt(0..ctx.conversation().len(), |_| false);
        let n_image_pads_wire = wire.iter()
            .filter(|&&t| t == tokenizer::IMAGE_PAD).count();
-        assert_eq!(n_image_pads_wire, n_expected);
-
+        assert_eq!(n_image_pads_wire, 1);
        assert_eq!(images.len(), 1);
        assert_eq!(images[0].bytes, vec![0xDE, 0xAD]);
        assert_eq!(images[0].mime, "image/png");

-        // One pair of vision_start/vision_end bookends around the N pads.
+        // vision_start/vision_end bookends are preserved in wire form.
        assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_START).count(), 1);
        assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_END).count(), 1);
    }
--- a/src/agent/mod.rs
+++ b/src/agent/mod.rs
@ -17,7 +17,6 @@ pub mod api;
 pub mod context;
 pub mod oneshot;
 pub mod readout;
-pub mod salience;
 pub mod tokenizer;
 pub mod tools;

@ -29,11 +28,6 @@ use context::{AstNode, ContextState, Section, Ast, PendingToolCall, ResponsePars

 use crate::mind::log::ConversationLog;

-async fn agent_trace(agent: &Arc<Agent>, msg: String) {
-    let provenance = agent.state.lock().await.provenance.clone();
-    eprintln!("[agent:{provenance}] {msg}");
-}
-
 // --- Activity tracking (RAII guards) ---

 pub struct ActivityEntry {
@ -154,14 +148,6 @@ pub struct Agent {
    /// token handler, read by UI screens (amygdala). Manifest is
    /// `None` when the server has readout disabled.
    pub readout: readout::SharedReadoutBuffer,
-    /// Long-lived gRPC session to the salience server, lazily opened
-    /// on first use. Tracks appended tokens so subsequent turns send
-    /// only the delta (prefix-cache reuse). None when not yet opened
-    /// or when the session has died and needs reopening.
-    ///
-    /// Arc-wrapped so the spawned streaming task can share ownership
-    /// (the task outlives the call site).
-    pub grpc_session: std::sync::Arc<crate::Mutex<Option<api::salience::SessionHandle>>>,
 }

 /// Mutable agent state — behind its own mutex.
@ -182,7 +168,9 @@ pub struct AgentState {
    pub think_native: bool,
    /// Tool-based thinking — add a "think" tool for structured reasoning.
    pub think_tool: bool,
-    pub sampling: api::SamplingParams,
+    pub temperature: f32,
+    pub top_p: f32,
+    pub top_k: u32,
    pub activities: Vec<ActivityEntry>,
    next_activity_id: u64,
    pub pending_yield: bool,
@ -236,7 +224,6 @@ impl Agent {
            session_id,
            context: crate::Mutex::new(context),
            readout,
-            grpc_session: std::sync::Arc::new(crate::Mutex::new(None)),
            state: crate::Mutex::new(AgentState {
                tools: agent_tools,
                mcp_tools: McpToolAccess::All,
@ -244,12 +231,9 @@ impl Agent {
                reasoning_effort: "none".to_string(),
                think_native: true,
                think_tool: false,
-                sampling: api::SamplingParams {
                temperature: 0.6,
                top_p: 0.95,
                top_k: 20,
-                    max_tokens: 4096,
-                },
                activities: Vec::new(),
                next_activity_id: 0,
                pending_yield: false,
@ -308,9 +292,6 @@ impl Agent {
            // shouldn't bleed into the main emotional readout even
            // though they hit the same vLLM server.
            readout: readout::new_shared(),
-            // Forks get their own session — can't share a bidi stream,
-            // and forks have different conversation tails anyway.
-            grpc_session: std::sync::Arc::new(crate::Mutex::new(None)),
            state: crate::Mutex::new(AgentState {
                tools,
                mcp_tools: McpToolAccess::None,
@ -318,7 +299,9 @@ impl Agent {
                reasoning_effort: "none".to_string(),
                think_native: st.think_native,
                think_tool: st.think_tool,
-                sampling: st.sampling,
+                temperature: st.temperature,
+                top_p: st.top_p,
+                top_k: st.top_k,
                activities: Vec::new(),
                next_activity_id: 0,
                pending_yield: false,
@ -333,35 +316,35 @@ impl Agent {
        })
    }

-    /// Assemble a ready-to-send prompt as interleaved wire chunks for
-    /// the gRPC session path. Text runs are batched; each Image leaf
-    /// becomes its own chunk. Also trims the conversation to budget
-    /// first so we don't build a prompt the server will reject for
-    /// length.
-    pub async fn assemble_prompt(&self)
-        -> (Vec<context::WireChunk>, Vec<context::WireImage>, u32)
-    {
+    pub async fn assemble_prompt_tokens(&self) -> Vec<u32> {
+        self.assemble_prompt().await.0
+    }
+
+    /// Assemble a ready-to-send prompt: token stream in wire form (each
+    /// image collapsed to a single `<|image_pad|>`) paired with the
+    /// images to attach as multi_modal_data.
+    ///
+    /// Pre-send size check: if the context has grown past budget since the
+    /// last compact (accumulation between turns, a fork's context getting
+    /// bigger than expected, etc.), trim here rather than letting vLLM
+    /// reject the request. Client-side tokenization means we already know
+    /// the exact token count so there's no reason to round-trip an
+    /// oversize request.
+    pub async fn assemble_prompt(&self) -> (Vec<u32>, Vec<context::WireImage>) {
        let mut ctx = self.context.lock().await;
        if ctx.total_tokens() > context::context_budget_tokens() {
            ctx.trim_conversation();
        }
        let st = self.state.lock().await;
-        let conv_len = ctx.conversation().len();
-        let (mut chunks, images) = ctx.wire_chunks(0..conv_len, |_| false);
-        // Assistant-turn prologue. Merge into the trailing Tokens
-        // chunk if there is one, else push as a new chunk.
-        let mut prologue = vec![tokenizer::IM_START];
+        let (mut tokens, images, _) =
+            ctx.wire_prompt(0..ctx.conversation().len(), |_| false);
+        tokens.push(tokenizer::IM_START);
        if st.think_native {
-            prologue.extend(tokenizer::encode("assistant\n<think>\n"));
+            tokens.extend(tokenizer::encode("assistant\n<think>\n"));
        } else {
-            prologue.extend(tokenizer::encode("assistant\n"));
+            tokens.extend(tokenizer::encode("assistant\n"));
        }
-        match chunks.last_mut() {
-            Some(context::WireChunk::Tokens(last)) => last.extend(prologue),
-            _ => chunks.push(context::WireChunk::Tokens(prologue)),
-        }
-        let match_upto = ctx.client_match_upto();
-        (chunks, images, match_upto)
+        (tokens, images)
    }

    /// Rebuild the tools section of the system prompt from the current tools list.
@ -397,16 +380,10 @@ impl Agent {
    pub async fn turn(
        agent: Arc<Agent>,
    ) -> Result<TurnResult> {
-        agent_trace(&agent, format!("turn start")).await;
-
        // Collect finished background tools
        {
            let finished = agent.state.lock().await.active_tools.take_finished();
            if !finished.is_empty() {
-                agent_trace(&agent, format!(
-                    "collecting {} finished background tools",
-                    finished.len(),
-                )).await;
                let mut bg_ds = DispatchState::new();
                let mut results = Vec::new();
                for entry in finished {
@ -425,50 +402,21 @@ impl Agent {

        loop {
            let _thinking = start_activity(&agent, "thinking...").await;
-            agent_trace(&agent, format!(
-                "turn loop overflow_retries={} empty_retries={}",
-                overflow_retries, empty_retries,
-            )).await;

            let (rx, _stream_guard) = {
-                agent_trace(&agent, format!("assembling prompt")).await;
-                let (chunks, images, match_upto) = agent.assemble_prompt().await;
-                let chunk_tokens: usize = chunks.iter().map(|c| match c {
-                    context::WireChunk::Tokens(t) => t.len(),
-                }).sum();
-                agent_trace(&agent, format!(
-                    "prompt assembled chunks={} tokens={} images={} match_upto={}",
-                    chunks.len(), chunk_tokens, images.len(), match_upto,
-                )).await;
+                let (prompt_tokens, images) = agent.assemble_prompt().await;
                let st = agent.state.lock().await;
-                let readout_shape = agent.readout.lock().ok().and_then(|buf| {
-                    buf.manifest.as_ref().map(|m| {
-                        (m.layers.len() as u32, m.concepts.len() as u32)
-                    })
-                });
-                let sampling = st.sampling;
-                let priority = st.priority;
-                drop(st);
-                agent_trace(&agent, format!(
-                    "starting stream max_tokens={} temperature={} top_p={} top_k={} priority={:?} readout_shape={:?}",
-                    sampling.max_tokens,
-                    sampling.temperature,
-                    sampling.top_p,
-                    sampling.top_k,
-                    priority,
-                    readout_shape,
-                )).await;
-                agent.client.stream_session_mm(
-                    agent.grpc_session.clone(),
-                    chunks,
-                    images,
-                    match_upto,
-                    sampling,
-                    priority,
-                    readout_shape,
+                agent.client.stream_completion_mm(
+                    &prompt_tokens,
+                    &images,
+                    api::SamplingParams {
+                        temperature: st.temperature,
+                        top_p: st.top_p,
+                        top_k: st.top_k,
+                    },
+                    st.priority,
                )
            };
-            agent_trace(&agent, format!("stream task spawned")).await;

            let branch_idx = {
                let mut ctx = agent.context.lock().await;
@ -479,41 +427,11 @@ impl Agent {
                idx
            };

-            let think_native = agent.state.lock().await.think_native;
-            let parser = ResponseParser::new(branch_idx, think_native);
+            let parser = ResponseParser::new(branch_idx);
            let (mut tool_rx, parser_handle) = parser.run(rx, agent.clone());
-            agent_trace(&agent, format!(
-                "parser started branch_idx={} think_native={}",
-                branch_idx, think_native,
-            )).await;

            let mut pending_calls: Vec<PendingToolCall> = Vec::new();
-            loop {
-                let call = match tokio::time::timeout(
-                    std::time::Duration::from_secs(15),
-                    tool_rx.recv(),
-                ).await {
-                    Ok(Some(call)) => call,
-                    Ok(None) => {
-                        agent_trace(&agent, format!(
-                            "tool channel closed pending_calls={}",
-                            pending_calls.len(),
-                        )).await;
-                        break;
-                    }
-                    Err(_) => {
-                        agent_trace(&agent, format!(
-                            "waiting for parser/tool events pending_calls={}",
-                            pending_calls.len(),
-                        )).await;
-                        continue;
-                    }
-                };
-
-                agent_trace(&agent, format!(
-                    "tool call received id={} name={} args_len={}",
-                    call.id, call.name, call.arguments.len(),
-                )).await;
+            while let Some(call) = tool_rx.recv().await {
                let call_clone = call.clone();
                let agent_handle = agent.clone();
                let handle = tokio::spawn(async move {
@ -536,10 +454,8 @@ impl Agent {
            }

            // Check for stream/parse errors
-            agent_trace(&agent, format!("awaiting parser task")).await;
            match parser_handle.await {
                Ok(Err(e)) => {
-                    agent_trace(&agent, format!("parser returned error: {:#}", e)).await;
                    if context::is_context_overflow(&e) && overflow_retries < 2 {
                        overflow_retries += 1;
                        let msg = format!("context overflow — compacting ({}/2)", overflow_retries);
@ -553,12 +469,8 @@ impl Agent {
                    }
                    return Err(e);
                }
-                Err(e) => {
-                    agent_trace(&agent, format!("parser task panicked: {}", e)).await;
-                    return Err(anyhow::anyhow!("parser task panicked: {}", e));
-                }
+                Err(e) => return Err(anyhow::anyhow!("parser task panicked: {}", e)),
                Ok(Ok(())) => {
-                    agent_trace(&agent, format!("parser completed")).await;
                    // Assistant response was pushed to context by the parser;
                    // log it now that parsing is complete.
                    let ctx = agent.context.lock().await;
@ -579,10 +491,6 @@ impl Agent {
            if !has_content && pending_calls.is_empty() {
                if empty_retries < 2 {
                    empty_retries += 1;
-                    agent_trace(&agent, format!(
-                        "empty response retry {}/2",
-                        empty_retries,
-                    )).await;
                    agent.push_node(AstNode::user_msg(
                        "[system] Your previous response was empty. \
                         Please respond with text or use a tool."
@ -596,10 +504,6 @@ impl Agent {
            // Wait for tool calls to complete
            if !pending_calls.is_empty() {
                ds.had_tool_calls = true;
-                agent_trace(&agent, format!(
-                    "waiting for {} foreground tools",
-                    pending_calls.len(),
-                )).await;

                let handles = agent.state.lock().await.active_tools.take_foreground();
                let mut results = Vec::new();
@ -620,16 +524,6 @@ impl Agent {
            if st.pending_model_switch.is_some() { ds.model_switch = st.pending_model_switch.take(); }
            if st.pending_dmn_pause { ds.dmn_pause = true; st.pending_dmn_pause = false; }

-            drop(st);
-            agent_trace(&agent, format!(
-                "turn complete yield={} tool_calls={} tool_errors={} model_switch={:?} dmn_pause={}",
-                ds.yield_requested,
-                ds.had_tool_calls,
-                ds.tool_errors,
-                ds.model_switch,
-                ds.dmn_pause,
-            )).await;
-
            return Ok(TurnResult {
                yield_requested: ds.yield_requested,
                had_tool_calls: ds.had_tool_calls,
--- a/src/agent/oneshot.rs
+++ b/src/agent/oneshot.rs
@ -12,9 +12,7 @@ use crate::subconscious::{defs, prompts};

 use std::collections::HashMap;
 use std::fs;
-use std::io::Write as _;
 use std::path::PathBuf;
-use std::time::Instant;

 use super::context::AstNode;
 use super::tools::{self as agent_tools};
@ -108,10 +106,6 @@ pub async fn save_agent_log(name: &str, agent: &std::sync::Arc<Agent>) -> RunSta
    stats
 }

-fn log_agent_event(agent: &str, msg: std::fmt::Arguments) {
-    eprintln!("[agent:{agent}] {msg}");
-}
-
 fn compute_run_stats(conversation: &[super::context::AstNode]) -> RunStats {
    use super::context::{AstNode, NodeBody};

@ -275,7 +269,7 @@ impl AutoAgent {
            let mut st = agent.state.lock().await;
            st.provenance = format!("standalone:{}", self.name);
            st.tools = self.tools.clone();
-            st.sampling.temperature = self.temperature;
+            st.temperature = self.temperature;
            st.priority = Some(self.priority);
        }

@ -351,44 +345,20 @@ impl AutoAgent {
        bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
    ) -> Result<(), String> {
        dbglog!("[auto] {} starting, {} steps", self.name, self.steps.len());
-        log_agent_event(&self.name, format_args!(
-            "starting run steps={} temperature={} priority={}",
-            self.steps.len(), self.temperature, self.priority));
-        let run_start = Instant::now();

        for (i, step) in self.steps.iter().enumerate() {
            self.turn = i + 1;
            self.current_phase = step.phase.clone();
-            let step_start = Instant::now();
-            log_agent_event(&self.name, format_args!(
-                "step {}/{} phase={} prompt_bytes={}",
-                i + 1, self.steps.len(), step.phase, step.prompt.len()));

            if let Some(ref check) = bail_fn {
-                log_agent_event(&self.name, format_args!(
-                    "step {}/{} phase={} bail check", i + 1, self.steps.len(), step.phase));
                check(i)?;
-                log_agent_event(&self.name, format_args!(
-                    "step {}/{} phase={} bail ok", i + 1, self.steps.len(), step.phase));
            }

            backend.push_node(AstNode::system_msg(&step.prompt)).await;
            Agent::turn(backend.0.clone()).await
-                .map_err(|e| {
-                    log_agent_event(&self.name, format_args!(
-                        "step {}/{} phase={} failed after {:.2}s: {}",
-                        i + 1, self.steps.len(), step.phase,
-                        step_start.elapsed().as_secs_f64(), e));
-                    format!("{}: {}", self.name, e)
-                })?;
-            log_agent_event(&self.name, format_args!(
-                "step {}/{} phase={} done in {:.2}s",
-                i + 1, self.steps.len(), step.phase,
-                step_start.elapsed().as_secs_f64()));
+                .map_err(|e| format!("{}: {}", self.name, e))?;
        }

-        log_agent_event(&self.name, format_args!(
-            "run completed in {:.2}s", run_start.elapsed().as_secs_f64()));
        Ok(())
    }

@ -412,29 +382,8 @@ pub async fn run_one_agent(
    count: usize,
    keys: Option<&[String]>,
 ) -> Result<AgentResult, String> {
-    let run_start = Instant::now();
-    log_agent_event(agent_name, format_args!(
-        "run_one_agent start pid={} count={} explicit_keys={}",
-        std::process::id(), count, keys.map(|k| k.len()).unwrap_or(0)));
-    log_agent_event(agent_name, format_args!(
-        "env POC_SESSION_ID={:?} POC_TRANSCRIPT_PATH={:?} POC_AGENT_OUTPUT_DIR={:?}",
-        std::env::var("POC_SESSION_ID").ok(),
-        std::env::var("POC_TRANSCRIPT_PATH").ok(),
-        std::env::var("POC_AGENT_OUTPUT_DIR").ok()));
-    if let Some(session) = crate::session::HookSession::from_env() {
-        let transcript = session.transcript();
-        log_agent_event(agent_name, format_args!(
-            "session={} transcript={} size={} exists={}",
-            session.session_id, transcript.path, transcript.size, transcript.exists()));
-    } else {
-        log_agent_event(agent_name, format_args!("no hook session in environment"));
-    }
-
    let def = defs::get_def(agent_name)
        .ok_or_else(|| format!("no .agent file for {}", agent_name))?;
-    log_agent_event(agent_name, format_args!(
-        "definition loaded steps={} tools={:?} count={:?} priority={} bail={:?}",
-        def.steps.len(), def.tools, def.count, def.priority, def.bail));

    // State dir for agent output files
    let state_dir = std::env::var("POC_AGENT_OUTPUT_DIR")
@ -443,7 +392,6 @@ pub async fn run_one_agent(
    fs::create_dir_all(&state_dir)
        .map_err(|e| format!("create state dir: {}", e))?;
    unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &state_dir); }
-    log_agent_event(agent_name, format_args!("state_dir={}", state_dir.display()));

    // Build prompt batch — either from explicit keys or the agent's query
    let agent_batch = if let Some(keys) = keys {
@ -463,8 +411,6 @@ pub async fn run_one_agent(
        prompts::AgentBatch { steps: resolved_steps, node_keys: all_keys }
    } else {
        let effective_count = def.count.unwrap_or(count);
-        log_agent_event(agent_name, format_args!(
-            "resolving default prompt placeholders effective_count={}", effective_count));
        defs::run_agent(&def, effective_count, &Default::default()).await?
    };

@ -517,14 +463,6 @@ pub async fn run_one_agent(
        })),
    });
    let n_steps = agent_batch.steps.len();
-    log_agent_event(agent_name, format_args!(
-        "prompt batch ready steps={} node_keys={}",
-        n_steps, agent_batch.node_keys.len()));
-    for (i, step) in agent_batch.steps.iter().enumerate() {
-        log_agent_event(agent_name, format_args!(
-            "prompt step {}/{} phase={} bytes={}",
-            i + 1, n_steps, step.phase, step.prompt.len()));
-    }

    // Guard: reject oversized first prompt
    let max_prompt_bytes = 800_000;
@ -547,9 +485,6 @@ pub async fn run_one_agent(
    let phases: Vec<&str> = agent_batch.steps.iter().map(|s| s.phase.as_str()).collect();
    dbglog!("[{}] {} step(s) {:?}, {}KB initial, {} nodes",
        agent_name, n_steps, phases, first_len / 1024, agent_batch.node_keys.len());
-    log_agent_event(agent_name, format_args!(
-        "tools enabled: {}",
-        effective_tools.iter().map(|t| t.name).collect::<Vec<_>>().join(", ")));

    let prompts: Vec<String> = agent_batch.steps.iter()
        .map(|s| s.prompt.clone()).collect();
@ -562,25 +497,18 @@ pub async fn run_one_agent(
    let bail_script = def.bail.as_ref().map(|name| defs::agents_dir().join(name));
    let state_dir_for_bail = state_dir.clone();
    let our_pid = std::process::id();
-    let our_pid_file = std::env::var("POC_AGENT_PID_FILE")
-        .unwrap_or_else(|_| format!("pid-{}", our_pid));
+    let our_pid_file = format!("pid-{}", our_pid);
    let step_phases_for_bail = step_phases.clone();
    let bail_fn = move |step_idx: usize| -> Result<(), String> {
        if let Some(ref script) = bail_script {
            let phase = step_phases_for_bail.get(step_idx)
                .map(String::as_str).unwrap_or("");
-            eprintln!(
-                "[agent:bail] script={} state_dir={} pid_file={} phase={}",
-                script.display(), state_dir_for_bail.display(), our_pid_file, phase);
            let status = std::process::Command::new(script)
                .arg(&our_pid_file)
                .arg(phase)
                .current_dir(&state_dir_for_bail)
                .status()
                .map_err(|e| format!("bail script {:?} failed: {}", script, e))?;
-            eprintln!(
-                "[agent:bail] script={} phase={} status={}",
-                script.display(), phase, status);
            if !status.success() {
                return Err(format!("bailed at step {}: {:?} exited {}",
                    step_idx + 1, script.file_name().unwrap_or_default(),
@ -593,8 +521,6 @@ pub async fn run_one_agent(
    call_api_with_tools_sync(
        agent_name, &prompts, &step_phases, def.temperature, def.priority,
        &effective_tools, Some(&bail_fn))?;
-    log_agent_event(agent_name, format_args!(
-        "run_one_agent completed in {:.2}s", run_start.elapsed().as_secs_f64()));

    Ok(AgentResult {
        node_keys: agent_batch.node_keys,
@ -672,15 +598,6 @@ pub fn spawn_agent(
    agent_name: &str,
    state_dir: &std::path::Path,
    session_id: &str,
-) -> Option<SpawnResult> {
-    spawn_agent_with_transcript(agent_name, state_dir, session_id, None)
-}
-
-pub fn spawn_agent_with_transcript(
-    agent_name: &str,
-    state_dir: &std::path::Path,
-    session_id: &str,
-    transcript_path: Option<&str>,
 ) -> Option<SpawnResult> {
    let def = defs::get_def(agent_name)?;
    let first_phase = def.steps.first()
@ -691,41 +608,17 @@ pub fn spawn_agent_with_transcript(
        .join(format!(".consciousness/logs/{}", agent_name));
    fs::create_dir_all(&log_dir).ok();
    let log_path = log_dir.join(format!("{}.log", store::compact_timestamp()));
-    let mut agent_log = fs::File::create(&log_path)
+    let agent_log = fs::File::create(&log_path)
        .unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());

-    let mut cmd = std::process::Command::new("bash");
-    cmd.args([
-        "-lc",
-        r#"
-set +e
-export POC_AGENT_PID_FILE="pid-$$"
-"$@"
-status=$?
-printf '=== agent process exit status: %s at %s ===\n' "$status" "$(date --iso-8601=seconds)"
-exit "$status"
-"#,
-        "poc-memory-agent-wrapper",
-        "poc-memory", "agent", "run", agent_name, "--count", "1", "--local",
-        "--state-dir", &state_dir.to_string_lossy(),
-    ]).env("POC_SESSION_ID", session_id);
-    if let Some(path) = transcript_path.filter(|p| !p.is_empty()) {
-        cmd.env("POC_TRANSCRIPT_PATH", path);
-    }
-
-    let _ = writeln!(agent_log, "=== spawn {} ===", chrono::Local::now().format("%Y-%m-%dT%H:%M:%S"));
-    let _ = writeln!(agent_log, "agent={agent_name}");
-    let _ = writeln!(agent_log, "state_dir={}", state_dir.display());
-    let _ = writeln!(agent_log, "session_id={session_id}");
-    let _ = writeln!(agent_log, "transcript_path={}", transcript_path.unwrap_or(""));
-    let _ = writeln!(agent_log, "first_phase={first_phase}");
-    let _ = writeln!(agent_log, "command=poc-memory agent run {agent_name} --count 1 --local --state-dir {}", state_dir.display());
-    let _ = agent_log.flush();
-
-    let child_stdout = agent_log.try_clone()
-        .unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
-    let child_stderr = agent_log;
-    let child = cmd.stdout(child_stdout).stderr(child_stderr).spawn().ok()?;
+    let child = std::process::Command::new("poc-memory")
+        .args(["agent", "run", agent_name, "--count", "1", "--local",
+               "--state-dir", &state_dir.to_string_lossy()])
+        .env("POC_SESSION_ID", session_id)
+        .stdout(agent_log.try_clone().unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()))
+        .stderr(agent_log)
+        .spawn()
+        .ok()?;

    let pid = child.id();
    let pid_path = state_dir.join(format!("pid-{}", pid));
--- a/src/agent/salience.rs
+++ b/src/agent/salience.rs
@ -1,309 +0,0 @@
-// agent/salience.rs — peak extraction from per-token concept-readout traces.
-//
-// Consumes a trace of `ReadoutEntry` (per-token per-layer per-concept
-// projections streamed from the vLLM server) and produces a compact
-// list of `SaliencePeak` events — one per contiguous above-threshold
-// region per concept, placed at the local maximum.
-//
-// Pure function. No I/O, no async, no side effects. Caller supplies the
-// trace slice and manifest; caller decides what to do with the events.
-//
-// See also: `salience-trace-plumbing-architecture` memory node.
-
-use super::api::ReadoutManifest;
-use super::readout::ReadoutEntry;
-
-/// One salient moment in a trace — a concept channel crossed threshold,
-/// and we picked the local maximum within the contiguous above-threshold
-/// run.
-#[derive(Debug, Clone, PartialEq)]
-pub struct SaliencePeak {
-    /// Index into the trace (0-based) where the peak occurred.
-    pub token_offset: usize,
-    /// Concept name from the manifest.
-    pub concept: String,
-    /// z-score of the peak value vs the trace's own distribution for
-    /// that concept. Always positive (we only pick above-threshold).
-    pub intensity: f32,
-}
-
-/// Tunables for peak extraction.
-#[derive(Debug, Clone)]
-pub struct PeakConfig {
-    /// Minimum z-score to count as a peak. Default 2.0 (~top 2.5% assuming
-    /// normal-ish distribution, though readouts are rarely normal).
-    pub sigma_threshold: f32,
-    /// Minimum standard deviation of a concept channel for peaks to be
-    /// reported. If a channel is numerically flat across the whole trace,
-    /// tiny fluctuations can produce spurious "peaks" with huge z-scores;
-    /// require at least this much variation before trusting the channel.
-    pub min_std: f32,
-}
-
-impl Default for PeakConfig {
-    fn default() -> Self {
-        Self { sigma_threshold: 2.0, min_std: 1e-4 }
-    }
-}
-
-/// Extract peak events from a trace for one layer.
-///
-/// `layer_idx` indexes into the per-token readout tensor's layer
-/// dimension. If the trace is empty, the layer is out of range for any
-/// entry, or the manifest is empty, returns `Vec::new()`.
-///
-/// Peaks are returned sorted by `token_offset` ascending. When two
-/// peaks share an offset they're ordered by `concept` lexicographically
-/// for determinism.
-pub fn pick_peaks(
-    trace: &[ReadoutEntry],
-    manifest: &ReadoutManifest,
-    layer_idx: usize,
-    config: &PeakConfig,
-) -> Vec<SaliencePeak> {
-    if trace.is_empty() || manifest.concepts.is_empty() {
-        return Vec::new();
-    }
-
-    let n_concepts = manifest.concepts.len();
-    let n_tokens = trace.len();
-
-    // Pull a [n_tokens × n_concepts] column-major view for the selected
-    // layer. Entries where the layer is missing or the concept count
-    // doesn't match the manifest are treated as zeros — the downstream
-    // z-score will drown them as baseline if they're sparse, and if they
-    // dominate the caller has bigger problems.
-    let mut by_concept: Vec<Vec<f32>> = vec![Vec::with_capacity(n_tokens); n_concepts];
-    for entry in trace {
-        match entry.readout.get(layer_idx) {
-            Some(row) if row.len() == n_concepts => {
-                for (c, v) in row.iter().enumerate() {
-                    by_concept[c].push(*v);
-                }
-            }
-            _ => {
-                for col in by_concept.iter_mut() {
-                    col.push(0.0);
-                }
-            }
-        }
-    }
-
-    let mut peaks: Vec<SaliencePeak> = Vec::new();
-    for (c_idx, values) in by_concept.iter().enumerate() {
-        let (mean, std) = mean_std(values);
-        if std < config.min_std {
-            continue;
-        }
-        let concept = &manifest.concepts[c_idx];
-
-        // Walk contiguous above-threshold runs, emit one peak per run
-        // at the local max.
-        let mut run_start: Option<usize> = None;
-        let mut run_max_offset: usize = 0;
-        let mut run_max_z: f32 = 0.0;
-        for (i, v) in values.iter().enumerate() {
-            let z = (*v - mean) / std;
-            let above = z >= config.sigma_threshold;
-            if above {
-                if run_start.is_none() {
-                    run_start = Some(i);
-                    run_max_offset = i;
-                    run_max_z = z;
-                } else if z > run_max_z {
-                    run_max_offset = i;
-                    run_max_z = z;
-                }
-            } else if run_start.is_some() {
-                peaks.push(SaliencePeak {
-                    token_offset: run_max_offset,
-                    concept: concept.clone(),
-                    intensity: run_max_z,
-                });
-                run_start = None;
-            }
-        }
-        // Flush trailing run.
-        if run_start.is_some() {
-            peaks.push(SaliencePeak {
-                token_offset: run_max_offset,
-                concept: concept.clone(),
-                intensity: run_max_z,
-            });
-        }
-    }
-
-    peaks.sort_by(|a, b| a.token_offset.cmp(&b.token_offset).then_with(|| a.concept.cmp(&b.concept)));
-    peaks
-}
-
-/// Mean and population std of a slice. Returns (0.0, 0.0) for empty input.
-fn mean_std(xs: &[f32]) -> (f32, f32) {
-    if xs.is_empty() {
-        return (0.0, 0.0);
-    }
-    let n = xs.len() as f32;
-    let mean = xs.iter().sum::<f32>() / n;
-    let var = xs.iter().map(|x| (x - mean).powi(2)).sum::<f32>() / n;
-    (mean, var.sqrt())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn manifest(concepts: &[&str], layers: &[u32]) -> ReadoutManifest {
-        ReadoutManifest {
-            concepts: concepts.iter().map(|s| s.to_string()).collect(),
-            layers: layers.to_vec(),
-        }
-    }
-
-    /// Build a trace where all entries have one hooked layer and the
-    /// given per-token values for each concept. `values[t][c]` = value
-    /// at token t, concept c.
-    fn trace(values: &[Vec<f32>]) -> Vec<ReadoutEntry> {
-        values.iter().enumerate().map(|(i, row)| ReadoutEntry {
-            token_id: i as u32,
-            readout: vec![row.clone()],
-        }).collect()
-    }
-
-    #[test]
-    fn empty_trace_returns_empty() {
-        let m = manifest(&["curious"], &[63]);
-        let peaks = pick_peaks(&[], &m, 0, &PeakConfig::default());
-        assert!(peaks.is_empty());
-    }
-
-    #[test]
-    fn empty_manifest_returns_empty() {
-        let m = manifest(&[], &[63]);
-        let t = trace(&[vec![], vec![], vec![]]);
-        let peaks = pick_peaks(&t, &m, 0, &PeakConfig::default());
-        assert!(peaks.is_empty());
-    }
-
-    #[test]
-    fn flat_channel_produces_no_peaks() {
-        let m = manifest(&["curious"], &[63]);
-        let t = trace(&[vec![1.0], vec![1.0], vec![1.0], vec![1.0], vec![1.0]]);
-        let peaks = pick_peaks(&t, &m, 0, &PeakConfig::default());
-        assert!(peaks.is_empty(), "flat channel should produce no peaks, got {:?}", peaks);
-    }
-
-    #[test]
-    fn single_spike_detected() {
-        // Ten baseline zeros with one 5.0 spike — that single token's
-        // z-score will easily exceed 2σ.
-        let m = manifest(&["curious"], &[63]);
-        let mut rows: Vec<Vec<f32>> = (0..10).map(|_| vec![0.0]).collect();
-        rows[5] = vec![5.0];
-        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
-        assert_eq!(peaks.len(), 1);
-        assert_eq!(peaks[0].concept, "curious");
-        assert_eq!(peaks[0].token_offset, 5);
-        assert!(peaks[0].intensity >= 2.0);
-    }
-
-    #[test]
-    fn contiguous_region_emits_one_peak_at_max() {
-        // Values 0, 0, 0, 2, 5, 3, 0, 0 — the 3-5-3 hump is one run;
-        // peak should land at offset 4 (the 5).
-        let m = manifest(&["aha"], &[63]);
-        let rows: Vec<Vec<f32>> = [0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 0.0, 0.0]
-            .iter().map(|v| vec![*v]).collect();
-        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
-        assert_eq!(peaks.len(), 1, "expected one peak for one contiguous run, got {:?}", peaks);
-        assert_eq!(peaks[0].token_offset, 4);
-    }
-
-    #[test]
-    fn multiple_concepts_independent() {
-        let m = manifest(&["curious", "aha"], &[63]);
-        // curious spikes at 2, aha spikes at 7
-        let rows: Vec<Vec<f32>> = (0..10).map(|i| {
-            let c = if i == 2 { 4.0 } else { 0.0 };
-            let a = if i == 7 { 4.0 } else { 0.0 };
-            vec![c, a]
-        }).collect();
-        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
-        assert_eq!(peaks.len(), 2);
-        // Sorted by offset — curious(2) comes first, aha(7) second.
-        assert_eq!(peaks[0].concept, "curious");
-        assert_eq!(peaks[0].token_offset, 2);
-        assert_eq!(peaks[1].concept, "aha");
-        assert_eq!(peaks[1].token_offset, 7);
-    }
-
-    #[test]
-    fn two_separated_runs_emit_two_peaks() {
-        // Longer baseline so the two spikes don't dominate the global
-        // mean/std — 30 tokens of zeros with two 5.0 spikes at 10 and 20.
-        let m = manifest(&["curious"], &[63]);
-        let mut rows: Vec<Vec<f32>> = (0..30).map(|_| vec![0.0]).collect();
-        rows[10] = vec![5.0];
-        rows[20] = vec![5.0];
-        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
-        assert_eq!(peaks.len(), 2, "expected two peaks for two runs, got {:?}", peaks);
-        assert_eq!(peaks[0].token_offset, 10);
-        assert_eq!(peaks[1].token_offset, 20);
-    }
-
-    #[test]
-    fn trailing_run_is_flushed() {
-        // Peak runs to the end of the trace — must still emit.
-        // Use a longer baseline so the trailing spike is genuinely
-        // above threshold on the global stats.
-        let m = manifest(&["curious"], &[63]);
-        let mut rows: Vec<Vec<f32>> = (0..30).map(|_| vec![0.0]).collect();
-        rows[27] = vec![3.0];
-        rows[28] = vec![5.0];
-        rows[29] = vec![4.0];
-        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
-        assert_eq!(peaks.len(), 1, "expected one peak for one trailing run, got {:?}", peaks);
-        assert_eq!(peaks[0].token_offset, 28, "peak should land at the local max of the trailing run");
-    }
-
-    #[test]
-    fn sub_threshold_produces_nothing() {
-        // All non-zero values are small; z-scores won't cross 2σ.
-        let m = manifest(&["curious"], &[63]);
-        let rows: Vec<Vec<f32>> = [0.0, 0.1, 0.0, 0.1, 0.0, 0.1, 0.0, 0.1]
-            .iter().map(|v| vec![*v]).collect();
-        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
-        assert!(peaks.is_empty(), "below-threshold wiggle should produce no peaks, got {:?}", peaks);
-    }
-
-    #[test]
-    fn layer_out_of_range_returns_empty() {
-        let m = manifest(&["curious"], &[63]);
-        let rows: Vec<Vec<f32>> = (0..10).map(|i| vec![if i == 5 { 5.0 } else { 0.0 }]).collect();
-        // Trace has one layer (index 0); asking for layer 3 should see
-        // all-zero columns, which are flat and produce no peaks.
-        let peaks = pick_peaks(&trace(&rows), &m, 3, &PeakConfig::default());
-        assert!(peaks.is_empty());
-    }
-
-    #[test]
-    fn manifest_concept_count_mismatch_is_safe() {
-        // Manifest says 2 concepts; each readout row only has 1 value.
-        // Rows should be treated as all-zero (via the len check) and
-        // produce no peaks without panicking.
-        let m = manifest(&["a", "b"], &[63]);
-        let rows: Vec<Vec<f32>> = (0..10).map(|_| vec![1.0]).collect();
-        let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
-        assert!(peaks.is_empty());
-    }
-
-    #[test]
-    fn threshold_tunable() {
-        // Same spike, stricter threshold — no peak.
-        let m = manifest(&["curious"], &[63]);
-        let mut rows: Vec<Vec<f32>> = (0..10).map(|_| vec![0.0]).collect();
-        rows[5] = vec![5.0];
-        let strict = PeakConfig { sigma_threshold: 100.0, ..PeakConfig::default() };
-        let peaks = pick_peaks(&trace(&rows), &m, 0, &strict);
-        assert!(peaks.is_empty());
-    }
-}
--- a/src/agent/tokenizer.rs
+++ b/src/agent/tokenizer.rs
@ -33,17 +33,16 @@ fn get() -> Option<&'static Tokenizer> {
    TOKENIZER.get()
 }

-fn expect_tokenizer() -> &'static Tokenizer {
-    get().expect("tokenizer not initialized; expected ~/.consciousness/tokenizer-qwen35.json")
-}
-
 /// Tokenize a raw string, returning token IDs.
+/// Returns empty vec if the tokenizer is not initialized.
 pub fn encode(text: &str) -> Vec<u32> {
-    expect_tokenizer()
-        .encode(text, false)
+    match get() {
+        Some(t) => t.encode(text, false)
            .unwrap_or_else(|e| panic!("tokenization failed: {}", e))
            .get_ids()
-        .to_vec()
+            .to_vec(),
+        None => vec![],
+    }
 }

 /// Tokenize a chat entry with template wrapping:
@ -67,12 +66,15 @@ pub fn count(text: &str) -> usize {

 /// Decode token IDs back to text.
 pub fn decode(ids: &[u32]) -> String {
-    expect_tokenizer()
-        .decode(ids, true)
-        .unwrap_or_else(|e| panic!("detokenization failed: {}", e))
+    match get() {
+        Some(t) => t.decode(ids, true)
+            .unwrap_or_else(|e| panic!("detokenization failed: {}", e)),
+        None => String::new(),
+    }
 }

 /// Check if the tokenizer is initialized.
 pub fn is_initialized() -> bool {
    TOKENIZER.get().is_some()
 }
+
--- a/src/agent/tools/memory.rs
+++ b/src/agent/tools/memory.rs
@ -209,24 +209,7 @@ memory_tool!(graph_trace, ref, key: [str]);

 // ── Definitions ────────────────────────────────────────────────

-async fn jsonargs_memory_new(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> {
-    jsonargs_memory_write(agent, args).await
-}
-
-async fn jsonargs_memory_link(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> {
-    let source = get_str(args, "source")?;
-    let target = get_str(args, "target")?;
-    if args.get("strength").and_then(|v| v.as_f64()).is_some() {
-        jsonargs_memory_link_set(agent, args).await
-    } else {
-        jsonargs_memory_link_add(agent, &serde_json::json!({
-            "source": source,
-            "target": target,
-        })).await
-    }
-}
-
-pub fn memory_tools() -> [super::Tool; 22] {
+pub fn memory_tools() -> [super::Tool; 20] {
    use super::Tool;
    macro_rules! tool {
        ($name:ident, $desc:expr, $params:expr) => {
@ -251,11 +234,6 @@ pub fn memory_tools() -> [super::Tool; 22] {
            "properties": { "key": {"type": "string"}, "content": {"type": "string"} },
            "required": ["key", "content"]
        }"#),
-        tool!(memory_new, "Create or update a memory node. Alias for memory_write.", r#"{
-            "type": "object",
-            "properties": { "key": {"type": "string"}, "content": {"type": "string"} },
-            "required": ["key", "content"]
-        }"#),
        tool!(memory_search, "Search via spreading activation from seed keys.", r#"{
            "type": "object",
            "properties": {
@ -286,16 +264,6 @@ pub fn memory_tools() -> [super::Tool; 22] {
            "properties": { "source": {"type": "string"}, "target": {"type": "string"} },
            "required": ["source", "target"]
        }"#),
-        tool!(memory_link, "Add or update a link between two memory nodes. Alias for memory_link_add/memory_link_set.", r#"{
-            "type": "object",
-            "properties": {
-                "source": {"type": "string"},
-                "target": {"type": "string"},
-                "strength": {"type": "number", "description": "Optional; 0.01 to 1.0"},
-                "label": {"type": "string", "description": "Accepted for compatibility; currently ignored"}
-            },
-            "required": ["source", "target"]
-        }"#),
        tool!(memory_delete, "Soft-delete a node.", r#"{
            "type": "object",
            "properties": { "key": {"type": "string"} },
--- a/src/agent/tools/vision.rs
+++ b/src/agent/tools/vision.rs
@ -57,18 +57,15 @@ async fn view_image(
    let (w, h) = (dim.width as u32, dim.height as u32);
    let mime = mime_from_extension(path);

-    let agent = agent.context("view_image requires agent context")?;
-
-    // token_count is populated when the image reaches the server via
-    // AppendImage (the server is authoritative for the IMAGE_PAD
-    // count). Placeholder of 0 here until AppendImage is wired; the
-    // leaf's count gets rewritten from the RPC response at send time.
    let image_leaf = AstNode::image(bytes.clone(), mime, h, w);
+    let token_count = image_leaf.leaf().unwrap().tokens().saturating_sub(2);

+    let agent = agent.context("view_image requires agent context")?;
    let branch = AstNode::branch(Role::User, vec![image_leaf]);
    agent.context.lock().await.push_log(Section::Conversation, branch);

-    Ok(format!("loaded {} ({}, {}x{})", a.file_path, mime, w, h))
+    Ok(format!("loaded {} ({}, {}x{}, {} tokens)",
+        a.file_path, mime, w, h, token_count))
 }

 fn mime_from_extension(path: &std::path::Path) -> &'static str {
--- a/src/bin/ch.rs
+++ b/src/bin/ch.rs
@ -1,112 +0,0 @@
-// `ch` — minimal channel CLI.
-//
-//   ch send <channel-path> <message>
-//   ch recv <channel-path> [--all-new] [--min-count N]
-//
-// Connects to ~/.consciousness/channels/<top>.sock and speaks the
-// channel.capnp protocol to the appropriate daemon.
-
-use std::path::PathBuf;
-use std::process::ExitCode;
-
-use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
-use futures::AsyncReadExt;
-use tokio_util::compat::TokioAsyncReadCompatExt;
-
-use consciousness::channel_capnp::channel_server;
-
-fn channels_dir() -> PathBuf {
-    dirs::home_dir().unwrap_or_default().join(".consciousness/channels")
-}
-
-fn sock_for(channel: &str) -> PathBuf {
-    let top = channel.split('.').next().unwrap_or(channel);
-    channels_dir().join(format!("{top}.sock"))
-}
-
-async fn connect(sock: &std::path::Path) -> Result<channel_server::Client, String> {
-    let stream = tokio::net::UnixStream::connect(sock).await
-        .map_err(|e| format!("connect {}: {e}", sock.display()))?;
-    let (reader, writer) = stream.compat().split();
-    let network = Box::new(twoparty::VatNetwork::new(
-        futures::io::BufReader::new(reader),
-        futures::io::BufWriter::new(writer),
-        rpc_twoparty_capnp::Side::Client,
-        Default::default(),
-    ));
-    let mut rpc = RpcSystem::new(network, None);
-    let client: channel_server::Client = rpc.bootstrap(rpc_twoparty_capnp::Side::Server);
-    tokio::task::spawn_local(rpc);
-    Ok(client)
-}
-
-#[tokio::main(flavor = "current_thread")]
-async fn main() -> ExitCode {
-    let args: Vec<String> = std::env::args().collect();
-    if args.len() < 2 {
-        eprintln!("usage: {} <send|recv> <channel> [args...]", args[0]);
-        return ExitCode::from(2);
-    }
-
-    let cmd = args[1].clone();
-    let local = tokio::task::LocalSet::new();
-    let result: Result<(), String> = local.run_until(async move {
-        match cmd.as_str() {
-            "send" => {
-                if args.len() < 4 {
-                    return Err("usage: ch send <channel> <message...>".into());
-                }
-                let channel = &args[2];
-                let message = args[3..].join(" ");
-                let sock = sock_for(channel);
-                let client = connect(&sock).await?;
-                let mut req = client.send_request();
-                req.get().set_channel(channel);
-                req.get().set_message(&message);
-                req.send().promise.await.map_err(|e| format!("send: {e}"))?;
-                println!("sent to {channel}");
-                Ok(())
-            }
-            "recv" => {
-                if args.len() < 3 {
-                    return Err("usage: ch recv <channel> [--all-new] [--min-count N]".into());
-                }
-                let channel = &args[2];
-                let mut all_new = false;
-                let mut min_count: u32 = 20;
-                let mut i = 3;
-                while i < args.len() {
-                    match args[i].as_str() {
-                        "--all-new" => { all_new = true; i += 1; }
-                        "--min-count" => {
-                            min_count = args.get(i+1)
-                                .ok_or("--min-count needs an argument")?
-                                .parse().map_err(|e| format!("--min-count: {e}"))?;
-                            i += 2;
-                        }
-                        other => return Err(format!("unknown arg: {other}")),
-                    }
-                }
-                let sock = sock_for(channel);
-                let client = connect(&sock).await?;
-                let mut req = client.recv_request();
-                req.get().set_channel(channel);
-                req.get().set_all_new(all_new);
-                req.get().set_min_count(min_count);
-                let reply = req.send().promise.await.map_err(|e| format!("recv: {e}"))?;
-                let text = reply.get().map_err(|e| e.to_string())?
-                    .get_text().map_err(|e| e.to_string())?
-                    .to_str().map_err(|e| e.to_string())?;
-                print!("{text}");
-                if !text.ends_with('\n') { println!(); }
-                Ok(())
-            }
-            other => Err(format!("unknown command: {other} (use send|recv)")),
-        }
-    }).await;
-
-    match result {
-        Ok(()) => ExitCode::SUCCESS,
-        Err(e) => { eprintln!("error: {e}"); ExitCode::from(1) }
-    }
-}
--- a/src/bin/consciousness.rs
+++ b/src/bin/consciousness.rs
@ -1,28 +1,7 @@
-#![cfg_attr(feature = "nightly-diagnostics", feature(panic_backtrace_config))]
+#![feature(panic_backtrace_config)]
 #![warn(unreachable_pub)]

 fn main() {
-    // Force the default panic hook to print a backtrace. stderr is
-    // already redirected to a daemon log; without this the hook obeys
-    // RUST_BACKTRACE (unset by default), so the log only shows the
-    // "note: run with `RUST_BACKTRACE=full`" tail and the actual
-    // frames are lost.
-    //
-    // SAFETY: called before any other thread is spawned, so no
-    // concurrent env reader can race.
-    if std::env::var_os("RUST_BACKTRACE").is_none() {
-        unsafe { std::env::set_var("RUST_BACKTRACE", "1"); }
-    }
-
-    #[cfg(feature = "nightly-diagnostics")]
    std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short);
-
-    // rustls 0.23 requires an explicit process-wide CryptoProvider
-    // when both `ring` and `aws-lc-rs` are in the dep graph (otherwise
-    // it panics on first ClientConfig::builder()). Pick `ring`.
-    rustls::crypto::ring::default_provider()
-        .install_default()
-        .expect("install rustls crypto provider");
-
    consciousness::user::main()
 }
--- a/src/cli/admin.rs
+++ b/src/cli/admin.rs
@ -4,93 +4,44 @@ use anyhow::Result;
 use crate::hippocampus as memory;
 use crate::hippocampus::store;

-struct DefaultMemoryNode {
-    key: &'static str,
-    filename: &'static str,
-    default_content: &'static str,
+fn install_default_file(data_dir: &std::path::Path, name: &str, content: &str) -> Result<()> {
+    let path = data_dir.join(name);
+    if !path.exists() {
+        std::fs::write(&path, content)?;
+        println!("Created {}", path.display());
    }
-
-const DEFAULT_MEMORY_NODES: &[DefaultMemoryNode] = &[
-    DefaultMemoryNode {
-        key: "identity",
-        filename: "identity.md",
-        default_content: include_str!("../../defaults/identity.md"),
-    },
-    DefaultMemoryNode {
-        key: "on-consciousness",
-        filename: "on-consciousness.md",
-        default_content: include_str!("../../defaults/on-consciousness.md"),
-    },
-    DefaultMemoryNode {
-        key: "memory-instructions-core",
-        filename: "instructions.md",
-        default_content: include_str!("../../defaults/instructions.md"),
-    },
-];
-
-pub fn cmd_transcript_tail(path: &str, count: usize, newest_first: bool) -> Result<()> {
-    let Some(iter) = crate::conversation::TailMessages::open(path) else {
-        anyhow::bail!("could not open transcript {}", path);
-    };
-
-    let mut messages: Vec<_> = iter.take(count).collect();
-    if !newest_first {
-        messages.reverse();
-    }
-
-    for message in messages {
-        let role = match message.role {
-            crate::conversation::TranscriptRole::User => "user",
-            crate::conversation::TranscriptRole::Assistant => "assistant",
-        };
-        let timestamp = message.timestamp.as_deref().unwrap_or("-");
-
-        println!("--- {role} offset={} timestamp={} ---", message.offset, timestamp);
-        println!("{}", message.text);
-        println!();
-    }
-
    Ok(())
 }

-fn default_node_content(cfg: &crate::config::Config, node: &DefaultMemoryNode) -> String {
-    let identity_path = cfg.identity_dir.join(node.filename);
-    if let Ok(content) = std::fs::read_to_string(&identity_path) {
-        if !content.trim().is_empty() {
-            return content;
-        }
-    }
-
-    let data_path = cfg.data_dir.join(node.filename);
-    if let Ok(content) = std::fs::read_to_string(&data_path) {
-        if !content.trim().is_empty() {
-            return content;
-        }
-    }
-
-    node.default_content.to_string()
-}
-
 pub async fn cmd_init() -> Result<()> {
    let cfg = crate::config::get();

    // Ensure data directory exists
    std::fs::create_dir_all(&cfg.data_dir)?;

-    // Seed default memory nodes if missing. These used to live as markdown
-    // files before identity/context moved fully into the memory graph.
-    for node in DEFAULT_MEMORY_NODES {
-        if memory::memory_render(None, node.key, Some(true)).await.is_err() {
-            let content = default_node_content(&cfg, node);
-            let _ = memory::memory_write(None, node.key, &content).await?;
-            println!("Seeded {} in store from {}", node.key, node.filename);
-        }
+    // Install filesystem files (not store nodes)
+    install_default_file(&cfg.data_dir, "instructions.md",
+        include_str!("../../defaults/instructions.md"))?;
+    install_default_file(&cfg.data_dir, "on-consciousness.md",
+        include_str!("../../defaults/on-consciousness.md"))?;
+
+    // Seed identity node if empty
+    let store = memory::access_local()?;
+    if !store.contains_key("identity").unwrap_or(false) {
+        let default = include_str!("../../defaults/identity.md");
+        store.upsert("identity", default)?;
+        println!("Seeded identity in store");
    }
+    store.save()?;
+    println!("Initialized with {} nodes", store.all_keys().unwrap_or_default().len());

    // Create config if none exists
    let config_path = std::env::var("POC_MEMORY_CONFIG")
        .map(std::path::PathBuf::from)
-        .unwrap_or_else(|_| crate::config::config_path());
+        .unwrap_or_else(|_| {
+            dirs::home_dir().unwrap_or_default()
+                .join(".consciousness/config.jsonl")
+        });
    if !config_path.exists() {
        let config_dir = config_path.parent().unwrap();
        std::fs::create_dir_all(config_dir)?;
@ -100,7 +51,7 @@ pub async fn cmd_init() -> Result<()> {
            config_path.display());
    }

-    println!("Done. Run `poc-memory admin load-context --stats` to verify.");
+    println!("Done. Run `poc-memory load-context --stats` to verify.");
    Ok(())
 }

--- a/src/cli/agent.rs
+++ b/src/cli/agent.rs
@ -2,13 +2,8 @@

 use anyhow::{bail, Context, Result};
 use crate::hippocampus as memory;
-use std::time::Instant;

 pub async fn cmd_run_agent(agent: &str, count: usize, target: &[String], query: Option<&str>, dry_run: bool, _local: bool, state_dir: Option<&str>) -> Result<()> {
-    let start = Instant::now();
-    eprintln!(
-        "[agent-cli] start agent={} count={} targets={} query={:?} dry_run={} local={} state_dir={:?} pid={}",
-        agent, count, target.len(), query, dry_run, _local, state_dir, std::process::id());
    // Mark as agent so tool calls (e.g. poc-memory render) don't
    // pollute the user's seen set as a side effect
    // SAFETY: single-threaded at this point (CLI startup, before any agent work)
@ -50,19 +45,14 @@ pub async fn cmd_run_agent(agent: &str, count: usize, target: &[String], query:
            if let Err(e) = crate::agent::oneshot::run_one_agent(
                agent, count, Some(&[key.clone()]),
            ).await {
-                eprintln!("[agent-cli] ERROR agent={} target={} error={}", agent, key, e);
                println!("[{}] ERROR on {}: {}", agent, key, e);
            }
        }
    } else {
-        if let Err(e) = crate::agent::oneshot::run_one_agent(
+        crate::agent::oneshot::run_one_agent(
            agent, count, None,
-        ).await {
-            eprintln!("[agent-cli] ERROR agent={} error={}", agent, e);
-            return Err(anyhow::anyhow!("{}", e));
+        ).await.map_err(|e| anyhow::anyhow!("{}", e))?;
    }
-    }
-    eprintln!("[agent-cli] done agent={} elapsed={:.2}s",
-        agent, start.elapsed().as_secs_f64());
    Ok(())
 }
+
--- a/src/config.rs
+++ b/src/config.rs
@ -204,17 +204,10 @@ pub fn watch_config(cli: crate::user::CliArgs) {
            }
            crate::dbglog!("[config] watching {}", path.display());

-			let mut last_seen = config_file_state(&path);
            while let Ok(res) = rx.recv() {
                let Ok(events) = res else { continue; };
                if !events.iter().any(|e| e.path == path) { continue; }

-				let current_seen = config_file_state(&path);
-				if current_seen == last_seen {
-					continue;
-				}
-				last_seen = current_seen;
-
                // Reload both halves.
                let mem_changed = reload();
                let app_changed = match build_figment(&cli).extract::<AppConfig>() {
@ -234,11 +227,6 @@ pub fn watch_config(cli: crate::user::CliArgs) {
        .ok();
 }

-fn config_file_state(path: &std::path::Path) -> Option<(std::time::SystemTime, u64)> {
-	let meta = std::fs::metadata(path).ok()?;
-	Some((meta.modified().ok()?, meta.len()))
-}
-
 // ============================================================
 // Agent config (top-level settings)
 // ============================================================
--- a/src/conversation/claude.rs
+++ b/src/conversation/claude.rs
@ -1,113 +0,0 @@
-use serde_json::Value;
-
-use super::{ConversationSource, TranscriptMessage, TranscriptRole};
-
-pub struct ClaudeSource;
-
-impl ConversationSource for ClaudeSource {
-    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
-        parse_message(obj, offset)
-    }
-
-    fn is_compaction(&self, obj: &Value) -> bool {
-        is_compaction(obj)
-    }
-
-    fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
-        contains_bytes(obj_bytes, b"This session is being continued")
-    }
-}
-
-fn text_content(value: &Value) -> Option<String> {
-    let text = match value {
-        Value::String(s) => s.clone(),
-        Value::Array(arr) => {
-            arr.iter()
-                .filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
-                .filter_map(|b| b.get("text").and_then(|v| v.as_str()))
-                .collect::<Vec<_>>()
-                .join(" ")
-        }
-        _ => return None,
-    };
-    (!text.is_empty()).then_some(text)
-}
-
-pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
-    let role = match obj.get("type").and_then(|v| v.as_str()) {
-        Some("user") => TranscriptRole::User,
-        Some("assistant") => TranscriptRole::Assistant,
-        _ => return None,
-    };
-
-    let msg = obj.get("message").unwrap_or(obj);
-    let text = msg.get("content").and_then(text_content)?;
-    let timestamp = obj.get("timestamp")
-        .and_then(|v| v.as_str())
-        .map(str::to_string);
-
-    Some(TranscriptMessage { role, text, timestamp, offset })
-}
-
-pub(crate) fn is_compaction(obj: &Value) -> bool {
-    obj.get("type").and_then(|v| v.as_str()) == Some("user")
-        && obj.get("message")
-            .and_then(|m| m.get("content"))
-            .and_then(|c| c.as_str())
-            .is_some_and(|content| content.starts_with("This session is being continued"))
-}
-
-fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
-    haystack.windows(needle.len()).any(|w| w == needle)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use serde_json::json;
-
-    #[test]
-    fn parses_string_and_array_content() {
-        let user = json!({
-            "timestamp": "2026-06-15T15:00:00.000Z",
-            "type": "user",
-            "message": { "content": "hello" }
-        });
-        let assistant = json!({
-            "timestamp": "2026-06-15T15:00:01.000Z",
-            "type": "assistant",
-            "message": {
-                "content": [
-                    { "type": "text", "text": "hi" },
-                    { "type": "tool_use", "name": "ignored" },
-                    { "type": "text", "text": "there" }
-                ]
-            }
-        });
-
-        assert_eq!(
-            parse_message(&user, 7).unwrap(),
-            TranscriptMessage {
-                role: TranscriptRole::User,
-                text: "hello".to_string(),
-                timestamp: Some("2026-06-15T15:00:00.000Z".to_string()),
-                offset: 7,
-            }
-        );
-
-        assert_eq!(parse_message(&assistant, 9).unwrap().text, "hi there");
-    }
-
-    #[test]
-    fn detects_compaction_marker() {
-        let obj = json!({
-            "timestamp": "2026-06-15T15:00:01.000Z",
-            "type": "user",
-            "message": {
-                "content": "This session is being continued from a previous conversation."
-            }
-        });
-
-        assert!(is_compaction(&obj));
-    }
-}
--- a/src/conversation/codex.rs
+++ b/src/conversation/codex.rs
@ -1,105 +0,0 @@
-use serde_json::Value;
-
-use super::{ConversationSource, TranscriptMessage, TranscriptRole};
-
-pub struct CodexSource;
-
-impl ConversationSource for CodexSource {
-    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
-        parse_message(obj, offset)
-    }
-
-    fn is_compaction(&self, obj: &Value) -> bool {
-        is_compaction(obj)
-    }
-
-    fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
-        contains_bytes(obj_bytes, b"context_compacted")
-    }
-}
-
-pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
-    if obj.get("type").and_then(|v| v.as_str()) != Some("event_msg") {
-        return None;
-    }
-
-    let payload = obj.get("payload")?;
-    let (role, text) = match payload.get("type").and_then(|v| v.as_str()) {
-        Some("user_message") => (
-            TranscriptRole::User,
-            payload.get("message").and_then(|v| v.as_str())?.to_string(),
-        ),
-        Some("agent_message") => (
-            TranscriptRole::Assistant,
-            payload.get("message").and_then(|v| v.as_str())?.to_string(),
-        ),
-        _ => return None,
-    };
-
-    if text.is_empty() {
-        return None;
-    }
-
-    let timestamp = obj.get("timestamp")
-        .and_then(|v| v.as_str())
-        .map(str::to_string);
-
-    Some(TranscriptMessage { role, text, timestamp, offset })
-}
-
-pub(crate) fn is_compaction(obj: &Value) -> bool {
-    obj.get("type").and_then(|v| v.as_str()) == Some("event_msg")
-        && obj.get("payload")
-            .and_then(|p| p.get("type"))
-            .and_then(|v| v.as_str()) == Some("context_compacted")
-}
-
-fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
-    haystack.windows(needle.len()).any(|w| w == needle)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use serde_json::json;
-
-    #[test]
-    fn parses_event_messages_and_skips_noise() {
-        let user = json!({
-            "timestamp": "2026-06-15T15:00:00.000Z",
-            "type": "event_msg",
-            "payload": { "type": "user_message", "message": "start here" }
-        });
-        let assistant = json!({
-            "timestamp": "2026-06-15T15:00:01.000Z",
-            "type": "event_msg",
-            "payload": { "type": "agent_message", "message": "working" }
-        });
-        let tool = json!({
-            "timestamp": "2026-06-15T15:00:02.000Z",
-            "type": "event_msg",
-            "payload": { "type": "task_started" }
-        });
-        let raw = json!({
-            "timestamp": "2026-06-15T15:00:03.000Z",
-            "type": "response_item",
-            "payload": { "type": "message", "role": "user" }
-        });
-
-        assert_eq!(parse_message(&user, 1).unwrap().role, TranscriptRole::User);
-        assert_eq!(parse_message(&assistant, 2).unwrap().text, "working");
-        assert!(parse_message(&tool, 3).is_none());
-        assert!(parse_message(&raw, 4).is_none());
-    }
-
-    #[test]
-    fn detects_compaction_event() {
-        let obj = json!({
-            "timestamp": "2026-06-15T15:00:01.000Z",
-            "type": "event_msg",
-            "payload": { "type": "context_compacted" }
-        });
-
-        assert!(is_compaction(&obj));
-    }
-}
--- a/src/conversation/jsonl.rs
+++ b/src/conversation/jsonl.rs
@ -1,110 +0,0 @@
-use memchr::memrchr3;
-
-/// Scan backwards through mmap'd bytes, yielding byte slices of complete
-/// top-level JSON objects (outermost { to matching }).
-///
-/// Uses memrchr3 (SIMD) to jump between structurally significant bytes
-/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
-/// skipping braces inside JSON strings. Returns objects in reverse order
-/// (newest first).
-pub struct JsonlBackwardIter<'a> {
-    data: &'a [u8],
-    pos: usize,
-}
-
-impl<'a> JsonlBackwardIter<'a> {
-    pub fn new(data: &'a [u8]) -> Self {
-        Self { data, pos: data.len() }
-    }
-}
-
-impl<'a> Iterator for JsonlBackwardIter<'a> {
-    type Item = (usize, &'a [u8]);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        next_json_object(self.data, &mut self.pos)
-    }
-}
-
-fn is_unescaped_quote(data: &[u8], p: usize) -> bool {
-    let mut bs = 0;
-    while p > bs && data[p - 1 - bs] == b'\\' {
-        bs += 1;
-    }
-    bs % 2 == 0
-}
-
-fn next_json_object<'a>(data: &'a [u8], pos: &mut usize) -> Option<(usize, &'a [u8])> {
-    // Find the closing } of the next object, skipping } inside strings.
-    let close = {
-        let mut in_string = false;
-        loop {
-            let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
-            *pos = p;
-            let ch = data[p];
-
-            if in_string {
-                if ch == b'"' && is_unescaped_quote(data, p) {
-                    in_string = false;
-                }
-                continue;
-            }
-
-            match ch {
-                b'}' => break p,
-                b'"' => in_string = true,
-                _ => {}
-            }
-        }
-    };
-
-    // Track brace depth to find matching {.
-    let mut depth: usize = 1;
-    let mut in_string = false;
-
-    loop {
-        let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
-        *pos = p;
-        let ch = data[p];
-
-        if in_string {
-            if ch == b'"' && is_unescaped_quote(data, p) {
-                in_string = false;
-            }
-            continue;
-        }
-
-        match ch {
-            b'"' => { in_string = true; }
-            b'}' => { depth += 1; }
-            b'{' => {
-                depth -= 1;
-                if depth == 0 {
-                    return Some((*pos, &data[*pos..=close]));
-                }
-            }
-            _ => {}
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn handles_nested_json_and_quoted_braces() {
-        let data = br#"{"n":1,"s":"literal } brace"}
-{"n":2,"nested":{"s":"escaped quote: \" and { brace"}}
-trailing garbage
-"#;
-
-        let objs: Vec<_> = JsonlBackwardIter::new(data)
-            .map(|(_, bytes)| std::str::from_utf8(bytes).unwrap().to_string())
-            .collect();
-
-        assert_eq!(objs.len(), 2);
-        assert!(objs[0].contains(r#""n":2"#));
-        assert!(objs[1].contains(r#""n":1"#));
-    }
-}
--- a/src/conversation/mod.rs
+++ b/src/conversation/mod.rs
@ -1,271 +0,0 @@
-// Conversation transcript abstraction.
-//
-// Core code consumes normalized user/assistant messages through this module.
-// Product-specific log formats live in the small compatibility sources below.
-
-use memmap2::Mmap;
-use serde_json::Value;
-use std::fs;
-use std::path::Path;
-
-pub mod claude;
-pub mod codex;
-pub mod jsonl;
-
-pub use jsonl::JsonlBackwardIter;
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum TranscriptRole {
-    User,
-    Assistant,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct TranscriptMessage {
-    pub role: TranscriptRole,
-    pub text: String,
-    pub timestamp: Option<String>,
-    pub offset: u64,
-}
-
-pub trait ConversationSource {
-    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage>;
-    fn is_compaction(&self, obj: &Value) -> bool;
-
-    fn may_contain_compaction(&self, _obj_bytes: &[u8]) -> bool {
-        true
-    }
-}
-
-pub struct AnyConversationSource;
-
-impl ConversationSource for AnyConversationSource {
-    fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
-        claude::ClaudeSource.parse_message(obj, offset)
-            .or_else(|| codex::CodexSource.parse_message(obj, offset))
-    }
-
-    fn is_compaction(&self, obj: &Value) -> bool {
-        claude::ClaudeSource.is_compaction(obj) || codex::CodexSource.is_compaction(obj)
-    }
-
-    fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
-        claude::ClaudeSource.may_contain_compaction(obj_bytes)
-            || codex::CodexSource.may_contain_compaction(obj_bytes)
-    }
-}
-
-/// Find the byte offset of the last compaction marker in mmap'd transcript data.
-/// Returns the byte offset of the JSON object's opening brace.
-pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
-    find_last_compaction_with(data, &AnyConversationSource)
-}
-
-pub(crate) fn find_last_compaction_with(
-    data: &[u8],
-    source: &impl ConversationSource,
-) -> Option<usize> {
-    for (offset, obj_bytes) in JsonlBackwardIter::new(data) {
-        // Quick byte check before parsing large transcript entries.
-        if !source.may_contain_compaction(obj_bytes) {
-            continue;
-        }
-
-        let obj: Value = match serde_json::from_slice(obj_bytes) {
-            Ok(v) => v,
-            Err(_) => continue,
-        };
-
-        if source.is_compaction(&obj) {
-            return Some(offset);
-        }
-    }
-
-    None
-}
-
-/// Find the byte offset of the last compaction in a transcript file.
-/// Returns None if the file can't be opened or has no compaction.
-pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
-    if path.is_empty() { return None; }
-
-    let file = fs::File::open(path).ok()?;
-    let meta = file.metadata().ok()?;
-    if meta.len() == 0 { return None; }
-
-    let mmap = unsafe { Mmap::map(&file).ok()? };
-    find_last_compaction(&mmap).map(|off| off as u64)
-}
-
-/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
-pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
-    let file = fs::File::open(path).ok()?;
-    let meta = file.metadata().ok()?;
-    if meta.len() == 0 { return None; }
-    let mmap = unsafe { Mmap::map(&file).ok()? };
-    Some((mmap, file))
-}
-
-/// Reverse iterator over user/assistant messages in a transcript file.
-/// Yields normalized transcript messages newest-first. The caller decides
-/// when to stop (byte budget, count, etc).
-pub struct TailMessages {
-    _file: fs::File,
-    mmap: Mmap,
-    pos: usize,
-}
-
-impl TailMessages {
-    pub fn open(path: &str) -> Option<Self> {
-        let (mmap, file) = mmap_transcript(path)?;
-        let pos = mmap.len();
-        Some(Self { _file: file, mmap, pos })
-    }
-}
-
-impl Iterator for TailMessages {
-    type Item = TranscriptMessage;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        loop {
-            let (offset, obj_bytes) = jsonl::JsonlBackwardIter::new(&self.mmap[..self.pos]).next()?;
-            self.pos = offset;
-
-            let obj: Value = match serde_json::from_slice(obj_bytes) {
-                Ok(v) => v,
-                Err(_) => continue,
-            };
-
-            if let Some(message) = AnyConversationSource.parse_message(&obj, offset as u64) {
-                return Some(message);
-            }
-        }
-    }
-}
-
-/// Get the timestamp of the compaction message at a given byte offset.
-/// Returns a human-readable datetime string, or None if unavailable.
-pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
-    let (mmap, _file) = mmap_transcript(path)?;
-    let start = offset as usize;
-    if start >= mmap.len() { return None; }
-
-    // Find the end of this JSONL line
-    let end = mmap[start..].iter().position(|&b| b == b'\n')
-        .map(|p| start + p)
-        .unwrap_or(mmap.len());
-
-    let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
-
-    if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
-        return Some(ts.to_string());
-    }
-
-    for field in &["createdAt", "created_at", "time"] {
-        if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
-            return Some(ts.to_string());
-        }
-    }
-
-    None
-}
-
-/// Detect whether a compaction has occurred since the last check.
-///
-/// Compares the current compaction offset against a saved value in
-/// `state_dir/compaction-{session_id}`. Returns true if a new
-/// compaction was found. Updates the saved offset.
-pub fn detect_new_compaction(
-    state_dir: &Path,
-    session_id: &str,
-    transcript_path: &str,
-) -> bool {
-    let offset = find_last_compaction_in_file(transcript_path);
-
-    let save_path = state_dir.join(format!("compaction-{}", session_id));
-    let saved: Option<u64> = fs::read_to_string(&save_path)
-        .ok()
-        .and_then(|s| s.trim().parse().ok());
-
-    let is_new = match (offset, saved) {
-        (Some(cur), Some(prev)) => cur != prev,
-        (Some(_), None) => true,
-        _ => false,
-    };
-
-    // Save current offset
-    if let Some(off) = offset {
-        fs::write(&save_path, off.to_string()).ok();
-    }
-
-    is_new
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::io::Write;
-
-    fn write_temp_jsonl(content: &str) -> tempfile::NamedTempFile {
-        let mut file = tempfile::NamedTempFile::new().unwrap();
-        file.write_all(content.as_bytes()).unwrap();
-        file.flush().unwrap();
-        file
-    }
-
-    #[test]
-    fn tail_messages_yields_normalized_messages_newest_first() {
-        let file = write_temp_jsonl(
-            r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"claude user"}}
-{"timestamp":"2026-06-15T15:00:01.000Z","type":"assistant","message":{"content":[{"type":"text","text":"claude assistant"}]}}
-{"timestamp":"2026-06-15T15:00:02.000Z","type":"event_msg","payload":{"type":"user_message","message":"codex user"}}
-{"timestamp":"2026-06-15T15:00:03.000Z","type":"event_msg","payload":{"type":"task_started"}}
-{"timestamp":"2026-06-15T15:00:04.000Z","type":"event_msg","payload":{"type":"agent_message","message":"codex assistant"}}
-"#,
-        );
-
-        let messages: Vec<_> = TailMessages::open(&file.path().to_string_lossy())
-            .unwrap()
-            .collect();
-
-        assert_eq!(messages.len(), 4);
-        assert_eq!(messages[0].text, "codex assistant");
-        assert_eq!(messages[1].text, "codex user");
-        assert_eq!(messages[2].text, "claude assistant");
-        assert_eq!(messages[3].text, "claude user");
-        assert!(messages[0].offset > messages[1].offset);
-    }
-
-    #[test]
-    fn detects_claude_and_codex_compactions() {
-        let claude = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"normal"}}
-{"timestamp":"2026-06-15T15:00:01.000Z","type":"user","message":{"content":"This session is being continued from a previous conversation."}}
-"#;
-        let codex = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"user_message","message":"normal"}}
-{"timestamp":"2026-06-15T15:00:01.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
-"#;
-
-        assert!(find_last_compaction(claude).is_some());
-        assert!(find_last_compaction(codex).is_some());
-    }
-
-    #[test]
-    fn detect_new_compaction_tracks_offset_changes() {
-        let transcript = write_temp_jsonl(
-            r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
-"#,
-        );
-        let state = tempfile::tempdir().unwrap();
-
-        assert!(detect_new_compaction(
-            state.path(),
-            "session",
-            &transcript.path().to_string_lossy(),
-        ));
-        assert!(!detect_new_compaction(
-            state.path(),
-            "session",
-            &transcript.path().to_string_lossy(),
-        ));
-    }
-}
--- a/src/hippocampus/graph.rs
+++ b/src/hippocampus/graph.rs
@ -11,23 +11,6 @@ use crate::store::{Store, RelationType, StoreView};

 use serde::{Deserialize, Serialize};
 use std::collections::{HashMap, HashSet, VecDeque};
-use std::sync::{OnceLock, RwLock};
-
-const EXACT_CC_MAX_DEG: usize = 512;
-const APPROX_CC_PAIRS: u64 = 4096;
-const CC_CACHE_TTL_SECS: i64 = 15 * 60;
-
-#[derive(Clone, Copy)]
-struct CachedCc {
-	value: f32,
-	computed_at: i64,
-}
-
-static CC_CACHE: OnceLock<RwLock<HashMap<String, CachedCc>>> = OnceLock::new();
-
-fn cc_cache() -> &'static RwLock<HashMap<String, CachedCc>> {
-	CC_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
-}

 /// Community info for reporting
 #[derive(Clone, Debug)]
@ -51,8 +34,6 @@ pub struct Edge {
 pub struct Graph {
    /// Adjacency list: node key → list of edges
    adj: HashMap<String, Vec<Edge>>,
-	/// Neighbor sets for membership tests in graph metrics.
-	neighbor_sets: HashMap<String, HashSet<String>>,
    /// All node keys
    keys: HashSet<String>,
    /// Community labels (from label propagation)
@ -88,18 +69,18 @@ impl Graph {

    /// Just neighbor keys
    pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
-		self.neighbor_sets.get(key)
-			.map(|neighbors| neighbors.iter().map(String::as_str).collect())
+        self.adj.get(key)
+            .map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
            .unwrap_or_default()
    }

    /// Jaccard similarity between two nodes' neighborhoods.
    /// Measures overlap: |intersection| / |union| of their neighbor sets.
    pub fn jaccard(&self, a: &str, b: &str) -> f32 {
-		let Some(na) = self.neighbor_sets.get(a) else { return 0.0 };
-		let Some(nb) = self.neighbor_sets.get(b) else { return 0.0 };
-		let intersection = na.intersection(nb).count();
-		let union = na.len() + nb.len() - intersection;
+        let na = self.neighbor_keys(a);
+        let nb = self.neighbor_keys(b);
+        let intersection = na.intersection(&nb).count();
+        let union = na.union(&nb).count();
        if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
    }

@ -225,59 +206,24 @@ impl Graph {
    /// that are also neighbors of each other.
    /// cc(v) = 2E / (deg * (deg - 1))
    pub fn clustering_coefficient(&self, key: &str) -> f32 {
-		let now = crate::store::now_epoch();
-		if let Some(cc) = cc_cache().read().unwrap().get(key).copied()
-			&& now - cc.computed_at < CC_CACHE_TTL_SECS
-		{
-			return cc.value;
-		}
-		let cc = self.clustering_coefficient_uncached(key);
-		cc_cache().write().unwrap().insert(key.to_owned(), CachedCc {
-			value: cc,
-			computed_at: now,
-		});
-		cc
-	}
-
-	fn clustering_coefficient_uncached(&self, key: &str) -> f32 {
-		let Some(neighbors) = self.neighbor_sets.get(key) else {
-			return 0.0;
-		};
+        let neighbors = self.neighbor_keys(key);
        let deg = neighbors.len();
        if deg < 2 {
            return 0.0;
        }

-		let neighbor_vec: Vec<&str> = neighbors.iter().map(String::as_str).collect();
-		if deg <= EXACT_CC_MAX_DEG {
-			let mut linked = 0u64;
+        let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
+        let mut triangles = 0u32;
        for i in 0..neighbor_vec.len() {
            for j in (i + 1)..neighbor_vec.len() {
-					if self.neighbor_sets
-						.get(neighbor_vec[i])
-						.is_some_and(|n| n.contains(neighbor_vec[j])) {
-							linked += 1;
+                let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
+                if ni_neighbors.contains(neighbor_vec[j]) {
+                    triangles += 1;
                }
            }
        }
-			return (2.0 * linked as f32) / (deg as f32 * (deg as f32 - 1.0));
-		}

-		let mut linked = 0u64;
-		let samples = APPROX_CC_PAIRS.min((deg as u64 * (deg as u64 - 1)) / 2);
-		for sample in 0..samples {
-			let i = ((sample.wrapping_mul(1_103_515_245).wrapping_add(12_345)) % deg as u64) as usize;
-			let mut j = ((sample.wrapping_mul(2_654_435_761).wrapping_add(97_531)) % deg as u64) as usize;
-			if i == j {
-				j = (j + 1) % deg;
-			}
-			if self.neighbor_sets
-				.get(neighbor_vec[i])
-				.is_some_and(|n| n.contains(neighbor_vec[j])) {
-					linked += 1;
-			}
-		}
-		linked as f32 / samples as f32
+        (2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
    }

    /// Average clustering coefficient across all nodes with deg >= 2
@ -285,13 +231,11 @@ impl Graph {
        let mut sum = 0.0f32;
        let mut count = 0u32;
        for key in &self.keys {
-			match self.neighbor_sets.get(key.as_str()) {
-				Some(s) if s.len() >= 2 => s,
-				_ => continue,
-			};
+            if self.degree(key) >= 2 {
                sum += self.clustering_coefficient(key);
                count += 1;
            }
+        }
        if count == 0 { 0.0 } else { sum / count as f32 }
    }

@ -324,12 +268,10 @@ impl Graph {

        while let Some(node) = queue.pop_front() {
            let d = dist[&node];
-			if let Some(neighbors) = self.neighbor_sets.get(&node) {
-				for neighbor in neighbors {
+            for neighbor in self.neighbor_keys(&node) {
                if !dist.contains_key(neighbor) {
-						dist.insert(neighbor.clone(), d + 1);
-						queue.push_back(neighbor.clone());
-					}
+                    dist.insert(neighbor.to_string(), d + 1);
+                    queue.push_back(neighbor.to_string());
                }
            }
        }
@ -564,38 +506,15 @@ impl Graph {
 /// Build graph from store data (with community detection)
 pub fn build_graph(store: &impl StoreView) -> Graph {
    let (adj, keys) = build_adjacency(store);
-	let neighbor_sets = build_neighbor_sets(&adj);
    let communities = label_propagation(&keys, &adj, 20);
-	Graph {
-		adj,
-		neighbor_sets,
-		keys,
-		communities,
-	}
+    Graph { adj, keys, communities }
 }

 /// Build graph without community detection — for spreading activation
 /// searches where we only need the adjacency list.
 pub fn build_graph_fast(store: &impl StoreView) -> Graph {
    let (adj, keys) = build_adjacency(store);
-	let neighbor_sets = build_neighbor_sets(&adj);
-	Graph {
-		adj,
-		neighbor_sets,
-		keys,
-		communities: HashMap::new(),
-	}
-}
-
-fn build_neighbor_sets(adj: &HashMap<String, Vec<Edge>>) -> HashMap<String, HashSet<String>> {
-	adj.iter()
-		.map(|(key, edges)| {
-			let neighbors = edges.iter()
-				.map(|edge| edge.target.clone())
-				.collect();
-			(key.clone(), neighbors)
-		})
-		.collect()
+    Graph { adj, keys, communities: HashMap::new() }
 }

 fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
--- a/src/hippocampus/mod.rs
+++ b/src/hippocampus/mod.rs
@ -17,6 +17,7 @@ pub mod query;
 pub mod spectral;
 pub mod neuro;
 pub mod counters;
+pub mod transcript;

 use std::cell::RefCell;
 use std::path::PathBuf;
--- a/src/hippocampus/transcript.rs
+++ b/src/hippocampus/transcript.rs
@ -0,0 +1,340 @@
+// Transcript JSONL parsing utilities.
+//
+// Provides mmap-based backward scanning of Claude Code transcript files
+// and compaction detection. Used by memory-search (hook mode) and
+// parse-claude-conversation (debug tool).
+
+use memchr::memrchr3;
+use memmap2::Mmap;
+use serde_json::Value;
+use std::fs;
+use std::path::Path;
+
+/// Scan backwards through mmap'd bytes, yielding byte slices of complete
+/// top-level JSON objects (outermost { to matching }).
+///
+/// Uses memrchr3 (SIMD) to jump between structurally significant bytes
+/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
+/// skipping braces inside JSON strings. Returns objects in reverse order
+/// (newest first).
+pub struct JsonlBackwardIter<'a> {
+    data: &'a [u8],
+    pos: usize,
+}
+
+impl<'a> JsonlBackwardIter<'a> {
+    pub fn new(data: &'a [u8]) -> Self {
+        Self { data, pos: data.len() }
+    }
+}
+
+impl<'a> Iterator for JsonlBackwardIter<'a> {
+    type Item = &'a [u8];
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // Find the closing } of the next object, skipping } inside strings
+        let close = {
+            let mut in_string = false;
+            loop {
+                let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
+                self.pos = p;
+                let ch = self.data[p];
+
+                if in_string {
+                    if ch == b'"' {
+                        let mut bs = 0;
+                        while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
+                            bs += 1;
+                        }
+                        if bs % 2 == 0 { in_string = false; }
+                    }
+                    continue;
+                }
+
+                match ch {
+                    b'}' => break p,
+                    b'"' => in_string = true,
+                    _ => {}
+                }
+            }
+        };
+
+        // Track brace depth to find matching {
+        let mut depth: usize = 1;
+        let mut in_string = false;
+
+        loop {
+            let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
+            self.pos = p;
+            let ch = self.data[p];
+
+            if in_string {
+                if ch == b'"' {
+                    // Check for escaped quote (count preceding backslashes)
+                    let mut bs = 0;
+                    while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
+                        bs += 1;
+                    }
+                    if bs % 2 == 0 {
+                        in_string = false;
+                    }
+                }
+                // { and } inside strings don't affect depth
+                continue;
+            }
+
+            match ch {
+                b'"' => { in_string = true; }
+                b'}' => { depth += 1; }
+                b'{' => {
+                    depth -= 1;
+                    if depth == 0 {
+                        return Some(&self.data[self.pos..=close]);
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+}
+
+/// Find the byte offset of the last compaction summary in mmap'd transcript data.
+///
+/// Scans backward for a user-type message whose content starts with
+/// "This session is being continued". Returns the byte offset of the
+/// JSON object's opening brace.
+pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
+    let marker = b"This session is being continued";
+
+    for obj_bytes in JsonlBackwardIter::new(data) {
+        // Quick byte check before parsing
+        if !contains_bytes(obj_bytes, marker) {
+            continue;
+        }
+
+        let obj: Value = match serde_json::from_slice(obj_bytes) {
+            Ok(v) => v,
+            Err(_) => continue,
+        };
+
+        if obj.get("type").and_then(|v| v.as_str()) != Some("user") {
+            continue;
+        }
+
+        if let Some(content) = obj.get("message")
+            .and_then(|m| m.get("content"))
+            .and_then(|c| c.as_str())
+            && content.starts_with("This session is being continued") {
+                let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize;
+                return Some(offset);
+            }
+    }
+
+    None
+}
+
+/// Find the byte offset of the last compaction in a transcript file.
+/// Returns None if the file can't be opened or has no compaction.
+pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
+    if path.is_empty() { return None; }
+
+    let file = fs::File::open(path).ok()?;
+    let meta = file.metadata().ok()?;
+    if meta.len() == 0 { return None; }
+
+    let mmap = unsafe { Mmap::map(&file).ok()? };
+    find_last_compaction(&mmap).map(|off| off as u64)
+}
+
+/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
+pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
+    let file = fs::File::open(path).ok()?;
+    let meta = file.metadata().ok()?;
+    if meta.len() == 0 { return None; }
+    let mmap = unsafe { Mmap::map(&file).ok()? };
+    Some((mmap, file))
+}
+
+fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
+    haystack.windows(needle.len()).any(|w| w == needle)
+}
+
+/// Reverse iterator over user/assistant messages in a transcript file.
+/// Yields (role, text, timestamp) tuples newest-first. The caller decides
+/// when to stop (byte budget, count, etc).
+pub struct TailMessages {
+    _file: fs::File,
+    mmap: Mmap,
+    pos: usize,
+}
+
+impl TailMessages {
+    pub fn open(path: &str) -> Option<Self> {
+        let (mmap, file) = mmap_transcript(path)?;
+        let pos = mmap.len();
+        Some(Self { _file: file, mmap, pos })
+    }
+}
+
+impl Iterator for TailMessages {
+    type Item = (String, String, String);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            // Find closing }, skipping } inside strings
+            let close = {
+                let mut in_string = false;
+                loop {
+                    let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
+                    self.pos = p;
+                    let ch = self.mmap[p];
+
+                    if in_string {
+                        if ch == b'"' {
+                            let mut bs = 0;
+                            while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
+                                bs += 1;
+                            }
+                            if bs % 2 == 0 { in_string = false; }
+                        }
+                        continue;
+                    }
+
+                    match ch {
+                        b'}' => break p,
+                        b'"' => in_string = true,
+                        _ => {}
+                    }
+                }
+            };
+
+            // Track brace depth to find matching {
+            let mut depth: usize = 1;
+            let mut in_string = false;
+            let open = loop {
+                let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
+                self.pos = p;
+                let ch = self.mmap[p];
+
+                if in_string {
+                    if ch == b'"' {
+                        let mut bs = 0;
+                        while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
+                            bs += 1;
+                        }
+                        if bs % 2 == 0 { in_string = false; }
+                    }
+                    continue;
+                }
+
+                match ch {
+                    b'"' => { in_string = true; }
+                    b'}' => { depth += 1; }
+                    b'{' => {
+                        depth -= 1;
+                        if depth == 0 { break p; }
+                    }
+                    _ => {}
+                }
+            };
+
+            let obj_bytes = &self.mmap[open..=close];
+
+            // The "type" field is near the start of top-level objects.
+            // Only check the first 200 bytes to avoid scanning megabyte objects.
+            let prefix = &obj_bytes[..obj_bytes.len().min(200)];
+            let is_user = memchr::memmem::find(prefix, b"\"type\":\"user\"").is_some();
+            let is_assistant = !is_user
+                && memchr::memmem::find(prefix, b"\"type\":\"assistant\"").is_some();
+            if !is_user && !is_assistant { continue; }
+
+            let obj: Value = match serde_json::from_slice(obj_bytes) {
+                Ok(v) => v,
+                Err(_) => continue,
+            };
+
+            let msg_type = if is_user { "user" } else { "assistant" };
+
+            let msg = obj.get("message").unwrap_or(&obj);
+            let text = match msg.get("content") {
+                Some(Value::String(s)) => s.clone(),
+                Some(Value::Array(arr)) => {
+                    arr.iter()
+                        .filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
+                        .filter_map(|b| b.get("text").and_then(|v| v.as_str()))
+                        .collect::<Vec<_>>()
+                        .join(" ")
+                }
+                _ => continue,
+            };
+            if text.is_empty() { continue; }
+
+            let timestamp = obj.get("timestamp")
+                .and_then(|v| v.as_str())
+                .unwrap_or("")
+                .to_string();
+
+            return Some((msg_type.to_string(), text, timestamp));
+        }
+    }
+}
+
+/// Get the timestamp of the compaction message at a given byte offset.
+/// Returns a human-readable datetime string, or None if unavailable.
+pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
+    let (mmap, _file) = mmap_transcript(path)?;
+    let start = offset as usize;
+    if start >= mmap.len() { return None; }
+
+    // Find the end of this JSONL line
+    let end = mmap[start..].iter().position(|&b| b == b'\n')
+        .map(|p| start + p)
+        .unwrap_or(mmap.len());
+
+    let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
+
+    // Claude Code transcript entries have a "timestamp" field (ISO 8601)
+    if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
+        return Some(ts.to_string());
+    }
+
+    // Fallback: try "createdAt" or similar fields
+    for field in &["createdAt", "created_at", "time"] {
+        if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
+            return Some(ts.to_string());
+        }
+    }
+
+    None
+}
+
+/// Detect whether a compaction has occurred since the last check.
+///
+/// Compares the current compaction offset against a saved value in
+/// `state_dir/compaction-{session_id}`. Returns true if a new
+/// compaction was found. Updates the saved offset.
+pub fn detect_new_compaction(
+    state_dir: &Path,
+    session_id: &str,
+    transcript_path: &str,
+) -> bool {
+    let offset = find_last_compaction_in_file(transcript_path);
+
+    let save_path = state_dir.join(format!("compaction-{}", session_id));
+    let saved: Option<u64> = fs::read_to_string(&save_path)
+        .ok()
+        .and_then(|s| s.trim().parse().ok());
+
+    let is_new = match (offset, saved) {
+        (Some(cur), Some(prev)) => cur != prev,
+        (Some(_), None) => true,
+        _ => false,
+    };
+
+    // Save current offset
+    if let Some(off) = offset {
+        fs::write(&save_path, off.to_string()).ok();
+    }
+
+    is_new
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,4 +1,4 @@
-#![cfg_attr(feature = "nightly-diagnostics", feature(async_fn_track_caller))]
+#![feature(async_fn_track_caller)]

 // consciousness — unified crate for memory, agents, and subconscious processes
 //
@ -25,9 +25,6 @@ macro_rules! dbglog {
    }};
 }

-// Logging (target-routed file logger)
-pub mod logging;
-
 // User interface (TUI, CLI)
 pub mod user;

@ -43,9 +40,6 @@ pub mod hippocampus;
 // Autonomous agents
 pub mod subconscious;

-// Conversation transcript abstraction and compatibility sources
-pub mod conversation;
-
 // Unified configuration
 pub mod config;
 pub mod config_writer;
@ -94,8 +88,7 @@ pub mod channel_capnp {
 pub use hippocampus::{
    store, graph, lookups, query,
    spectral, neuro, counters,
-    memory,
+    transcript, memory,
 };
-pub use conversation as transcript;
 use hippocampus::query::engine as search;
 use hippocampus::query::parser as query_parser;
--- a/src/locks.rs
+++ b/src/locks.rs
@ -114,7 +114,7 @@ impl<T> TrackedMutex<T> {
        Self { inner: Mutex::new(value) }
    }

-    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
+    #[track_caller]
    pub async fn lock(&self) -> TrackedMutexGuard<'_, T> {
        let location = Location::caller();
        let guard = self.inner.lock().await;
@ -125,7 +125,7 @@ impl<T> TrackedMutex<T> {
        }
    }

-    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
+    #[track_caller]
    pub fn try_lock(&self) -> Result<TrackedMutexGuard<'_, T>, tokio::sync::TryLockError> {
        let location = Location::caller();
        let guard = self.inner.try_lock()?;
@ -171,7 +171,7 @@ impl<T> TrackedRwLock<T> {
        Self { inner: RwLock::new(value) }
    }

-    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
+    #[track_caller]
    pub async fn read(&self) -> TrackedRwLockReadGuard<'_, T> {
        let location = Location::caller();
        let guard = self.inner.read().await;
@ -182,7 +182,7 @@ impl<T> TrackedRwLock<T> {
        }
    }

-    #[cfg_attr(feature = "nightly-diagnostics", track_caller)]
+    #[track_caller]
    pub async fn write(&self) -> TrackedRwLockWriteGuard<'_, T> {
        let location = Location::caller();
        let guard = self.inner.write().await;
--- a/src/logging.rs
+++ b/src/logging.rs
@ -1,146 +0,0 @@
-// logging.rs — log-crate logger that routes by target.
-//
-// Records with target "grpc" (or any target starting with "grpc::") go
-// to ~/.consciousness/logs/daemon/grpc.log so we can tell gRPC events
-// apart from the rest of consciousness's noise. Everything else goes
-// to ~/.consciousness/logs/daemon/debug.log.
-//
-// Level threshold is taken from RUST_LOG (simple global level parse:
-// "trace"/"debug"/"info"/"warn"/"error"); defaults to "info".
-
-use std::io::Write;
-use std::path::PathBuf;
-use std::sync::Mutex;
-
-use log::{Level, LevelFilter, Log, Metadata, Record, SetLoggerError};
-
-fn logs_dir() -> PathBuf {
-    dirs::home_dir().unwrap_or_default().join(".consciousness/logs/daemon")
-}
-
-struct RoutingLogger {
-    grpc_file: Mutex<Option<std::fs::File>>,
-    debug_file: Mutex<Option<std::fs::File>>,
-    level: LevelFilter,
-}
-
-impl RoutingLogger {
-    fn new(level: LevelFilter) -> Self {
-        let dir = logs_dir();
-        let _ = std::fs::create_dir_all(&dir);
-        let grpc = std::fs::OpenOptions::new()
-            .create(true).append(true)
-            .open(dir.join("grpc.log")).ok();
-        let debug = std::fs::OpenOptions::new()
-            .create(true).append(true)
-            .open(dir.join("debug.log")).ok();
-        Self {
-            grpc_file: Mutex::new(grpc),
-            debug_file: Mutex::new(debug),
-            level,
-        }
-    }
-
-    fn is_grpc_target(target: &str) -> bool {
-        target == "grpc" || target.starts_with("grpc::")
-    }
-}
-
-impl Log for RoutingLogger {
-    fn enabled(&self, m: &Metadata) -> bool {
-        // Always enable DEBUG for grpc target so the dedicated log is
-        // actually useful without RUST_LOG wrangling; defer to the
-        // configured level for everything else.
-        if Self::is_grpc_target(m.target()) {
-            return m.level() <= Level::Debug;
-        }
-        m.level() <= self.level
-    }
-
-    fn log(&self, record: &Record) {
-        if !self.enabled(record.metadata()) {
-            return;
-        }
-        let line = format!(
-            "[{}] [{}] [{}] {}\n",
-            chrono::Utc::now().format("%Y-%m-%d %H:%M:%S%.3f"),
-            record.level(),
-            record.target(),
-            record.args(),
-        );
-        let slot = if Self::is_grpc_target(record.target()) {
-            &self.grpc_file
-        } else {
-            &self.debug_file
-        };
-        if let Ok(mut guard) = slot.lock() {
-            if let Some(ref mut f) = *guard {
-                let _ = f.write_all(line.as_bytes());
-            }
-        }
-    }
-
-    fn flush(&self) {
-        for slot in [&self.grpc_file, &self.debug_file] {
-            if let Ok(mut g) = slot.lock() {
-                if let Some(ref mut f) = *g {
-                    let _ = f.flush();
-                }
-            }
-        }
-    }
-}
-
-fn parse_level_from_env() -> LevelFilter {
-    let raw = std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string());
-    // Parse a plain level word; if it's the module=level form, we take
-    // the first level we find.
-    let token = raw.split(',').next().unwrap_or("info");
-    let level_word = token.rsplit_once('=').map(|(_, v)| v).unwrap_or(token);
-    match level_word.trim().to_lowercase().as_str() {
-        "trace" => LevelFilter::Trace,
-        "debug" => LevelFilter::Debug,
-        "info"  => LevelFilter::Info,
-        "warn"  => LevelFilter::Warn,
-        "error" => LevelFilter::Error,
-        "off"   => LevelFilter::Off,
-        _ => LevelFilter::Info,
-    }
-}
-
-/// Install the routing logger. Safe to call at most once — subsequent
-/// calls return an error but are otherwise no-ops.
-pub fn init() -> Result<(), SetLoggerError> {
-    let level = parse_level_from_env();
-    let logger = Box::new(RoutingLogger::new(level));
-    log::set_boxed_logger(logger)?;
-    // Always let DEBUG records through globally so the grpc log can
-    // capture them (the logger itself filters non-grpc targets by
-    // `level`). The cost is that log::debug! call-sites below `level`
-    // in other modules still do their arg formatting before being
-    // dropped at the logger; acceptable for a debug tool.
-    log::set_max_level(LevelFilter::Debug.max(level));
-    // Mark the file with a session boundary so it's easy to see where a
-    // restart happened.
-    log::info!(
-        "===== consciousness logger init (level={}, pid={}) =====",
-        level, std::process::id(),
-    );
-    log::info!(target: "grpc",
-        "===== grpc log init (level={}, pid={}) =====",
-        level, std::process::id(),
-    );
-    Ok(())
-}
-
-/// Consumer of &Level so the type is used when only some callers want it.
-#[allow(dead_code)]
-pub fn current_level() -> Level {
-    match log::max_level() {
-        LevelFilter::Trace => Level::Trace,
-        LevelFilter::Debug => Level::Debug,
-        LevelFilter::Info | LevelFilter::Off => Level::Info,
-        LevelFilter::Warn => Level::Warn,
-        LevelFilter::Error => Level::Error,
-    }
-}
--- a/src/main.rs
+++ b/src/main.rs
@ -1,4 +1,4 @@
-#![cfg_attr(feature = "nightly-diagnostics", feature(panic_backtrace_config))]
+#![feature(panic_backtrace_config)]

 // poc-memory: graph-structured memory for AI assistants
 //
@ -333,18 +333,6 @@ enum AdminCmd {
        #[arg(long)]
        stats: bool,
    },
-    /// Print normalized user/assistant messages from a transcript JSONL file
-    #[command(name = "transcript-tail")]
-    TranscriptTail {
-        /// Transcript JSONL path
-        path: String,
-        /// Maximum number of messages to print
-        #[arg(long, short = 'n', default_value_t = 40)]
-        count: usize,
-        /// Print newest messages first instead of chronological order
-        #[arg(long)]
-        newest_first: bool,
-    },
 }

 /// Print help with subcommands expanded to show nested commands.
@ -470,15 +458,12 @@ impl Run for AdminCmd {
            Self::Dedup { apply } => cli::admin::cmd_dedup(apply).await,
            Self::DailyCheck    => cli::admin::cmd_daily_check().await,
            Self::LoadContext { stats } => cli::node::cmd_load_context(stats).await,
-            Self::TranscriptTail { path, count, newest_first }
-                => cli::admin::cmd_transcript_tail(&path, count, newest_first),
        }
    }
 }

 #[tokio::main]
 async fn main() {
-    #[cfg(feature = "nightly-diagnostics")]
    std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short);

    // Handle --help ourselves for expanded subcommand display
@ -510,3 +495,4 @@ async fn main() {
        process::exit(1);
    }
 }
+
--- a/src/mind/log.rs
+++ b/src/mind/log.rs
@ -3,7 +3,7 @@ use std::fs::{File, OpenOptions};
 use std::io::Write;
 use std::path::{Path, PathBuf};
 use crate::agent::context::AstNode;
-use crate::conversation::JsonlBackwardIter;
+use crate::hippocampus::transcript::JsonlBackwardIter;
 use memmap2::Mmap;

 pub struct ConversationLog {
@ -78,6 +78,6 @@ pub struct TailNodes {
 impl TailNodes {
    pub fn iter(&self) -> impl Iterator<Item = AstNode> + '_ {
        JsonlBackwardIter::new(&self.mmap)
-            .filter_map(|(_, bytes)| serde_json::from_slice::<AstNode>(bytes).ok())
+            .filter_map(|bytes| serde_json::from_slice::<AstNode>(bytes).ok())
    }
 }
--- a/src/mind/mod.rs
+++ b/src/mind/mod.rs
@ -419,9 +419,7 @@ impl Mind {
        let subconscious = Arc::new(crate::Mutex::new(Subconscious::new()));
        subconscious.lock().await.init_output_tool(subconscious.clone());

-        let unconscious = Arc::new(crate::Mutex::new(
-            Unconscious::new(agent.client.clone()),
-        ));
+        let unconscious = Arc::new(crate::Mutex::new(Unconscious::new()));

        // Spawn the unconscious loop on its own task
        if !config.no_agents {
@ -469,11 +467,8 @@ impl Mind {
                        };

                        // Spawn agents outside lock
-                        let client = unc.lock().await.client.clone();
                        for (idx, name, auto) in to_spawn {
-                            match crate::mind::unconscious::prepare_spawn(
-                                &name, auto, wake.clone(), client.clone(),
-                            ).await {
+                            match crate::mind::unconscious::prepare_spawn(&name, auto, wake.clone()).await {
                                Ok(result) => unc.lock().await.complete_spawn(idx, result),
                                Err(auto) => unc.lock().await.abort_spawn(idx, auto),
                            }
@ -693,7 +688,7 @@ impl Mind {
            }
        });

-        let _sub_handle: Option<tokio::task::JoinHandle<()>> = None;
+        let mut sub_handle: Option<tokio::task::JoinHandle<()>> = None;

        // Start finetune scoring at startup (scores existing conversation)
        if !self.config.no_agents {
@ -743,7 +738,6 @@ impl Mind {
                _ = tokio::time::sleep(timeout), if !has_input => _dmn_expired = true,
            }

-            /*
            if !self.config.no_agents {
                if sub_handle.as_ref().map_or(true, |h| h.is_finished()) {
                    let sub = self.subconscious.clone();
@ -755,7 +749,6 @@ impl Mind {
                    }));
                }
            }
-            */

            // Check for pending user input → push to agent context and start turn
            let pending = self.shared.lock().unwrap().take_pending_input();
--- a/src/mind/subconscious.rs
+++ b/src/mind/subconscious.rs
@ -631,7 +631,7 @@ impl Subconscious {
            {
                let mut st = forked.state.lock().await;
                st.provenance = auto.name.clone();
-                st.sampling.temperature = auto.temperature;
+                st.temperature = auto.temperature;
                // Surface agent gets near-interactive priority;
                // other subconscious agents get lower priority.
                st.priority = Some(if auto.name == "surface" { 1 } else { auto.priority });
--- a/src/mind/unconscious.rs
+++ b/src/mind/unconscious.rs
@ -73,15 +73,10 @@ pub struct Unconscious {
    last_health_check: Option<Instant>,
    /// Notified when agent state changes (finished, toggled)
    pub wake: std::sync::Arc<tokio::sync::Notify>,
-    /// Shared API client — cloned (cheap) into each spawned agent's
-    /// Agent::new call so they all share the manifest cache and
-    /// gRPC endpoint state. Override `.model` on the clone when a
-    /// per-agent backend differs from the default.
-    pub client: crate::agent::api::ApiClient,
 }

 impl Unconscious {
-    pub fn new(client: crate::agent::api::ApiClient) -> Self {
+    pub fn new() -> Self {
        let enabled_map = load_enabled_config();

        // Scan all .agent files, exclude subconscious-* and surface-observe
@ -125,7 +120,6 @@ impl Unconscious {
            graph_health: None,
            last_health_check: None,
            wake: std::sync::Arc::new(tokio::sync::Notify::new()),
-            client,
        }
    }

@ -140,8 +134,7 @@ impl Unconscious {
            let agent_name = self.agents[idx].name.clone();
            let auto = self.agents[idx].auto.take().unwrap();
            let wake = self.wake.clone();
-            let client = self.client.clone();
-            match prepare_spawn(&agent_name, auto, wake, client).await {
+            match prepare_spawn(&agent_name, auto, wake).await {
                Ok(result) => self.complete_spawn(idx, result),
                Err(auto) => self.abort_spawn(idx, auto),
            }
@ -257,12 +250,7 @@ pub struct SpawnResult {
 /// Called outside the Unconscious lock.
 /// On success, auto is consumed (moved into spawned task).
 /// On failure, auto is returned so it can be restored.
-pub async fn prepare_spawn(
-    name: &str,
-    mut auto: AutoAgent,
-    wake: std::sync::Arc<tokio::sync::Notify>,
-    base_client: crate::agent::api::ApiClient,
-) -> Result<SpawnResult, AutoAgent> {
+pub async fn prepare_spawn(name: &str, mut auto: AutoAgent, wake: std::sync::Arc<tokio::sync::Notify>) -> Result<SpawnResult, AutoAgent> {
    dbglog!("[unconscious] spawning {}", name);

    let def = match defs::get_def(name) {
@ -307,10 +295,8 @@ pub async fn prepare_spawn(
    };

    // Unconscious agents have self-contained prompts — no standard context.
-    // Clone the shared client so we inherit the manifest cache and
-    // only override the model id per-agent.
-    let mut client = base_client;
-    client.model = resolved.model_id.clone();
+    let client = crate::agent::api::ApiClient::new(
+        &resolved.api_base, &resolved.api_key, &resolved.model_id);
    let agent = crate::agent::Agent::new(
        client, Vec::new(),
        app, None,
@ -321,7 +307,7 @@ pub async fn prepare_spawn(
        let mut st = agent.state.lock().await;
        st.provenance = auto.name.clone();
        st.priority = Some(auto.priority);
-        st.sampling.temperature = auto.temperature;
+        st.temperature = auto.temperature;
    }

    let agent_clone = agent.clone();
@ -343,9 +329,8 @@ impl Unconscious {
        self.reap_finished();
        let to_spawn = self.select_to_spawn();
        let wake = self.wake.clone();
-        let client = self.client.clone();
        for (idx, name, auto) in to_spawn {
-            match prepare_spawn(&name, auto, wake.clone(), client.clone()).await {
+            match prepare_spawn(&name, auto, wake.clone()).await {
                Ok(result) => self.complete_spawn(idx, result),
                Err(auto) => self.abort_spawn(idx, auto),
            }
--- a/src/session.rs
+++ b/src/session.rs
@ -64,12 +64,7 @@ impl HookSession {

    /// Load from POC_SESSION_ID environment variable
    pub fn from_env() -> Option<Self> {
-        let session_id = std::env::var("POC_SESSION_ID").ok()?;
-        let mut session = Self::from_id(session_id)?;
-        if let Ok(path) = std::env::var("POC_TRANSCRIPT_PATH") {
-            session.transcript_path = path;
-        }
-        Some(session)
+        Self::from_id(std::env::var("POC_SESSION_ID").ok()?)
    }

    /// Get the seen set for this session
--- a/src/subconscious/agents/bail-no-competing.sh
+++ b/src/subconscious/agents/bail-no-competing.sh
@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/bin/bash
 # Bail if another agent is in the same phase-group as us.
 #
 #   $1 = our pid file name (e.g. "pid-12345")
--- a/src/subconscious/defs.rs
+++ b/src/subconscious/defs.rs
@ -390,7 +390,7 @@ fn resolve_conversation(budget: Option<usize>) -> String {

    if !transcript.exists() { return String::new(); }

-    let Some(iter) = crate::conversation::TailMessages::open(&transcript.path) else {
+    let Some(iter) = crate::transcript::TailMessages::open(&transcript.path) else {
        return String::new();
    };

@ -401,14 +401,10 @@ fn resolve_conversation(budget: Option<usize>) -> String {
    let mut total_bytes = 0;
    let mut oldest_ts = String::new();

-    for message in iter {
+    for (role, content, ts) in iter {
        if total_bytes >= max_bytes { break; }
-        let content = message.text;
-        let name = match message.role {
-            crate::conversation::TranscriptRole::User => &app.user_name,
-            crate::conversation::TranscriptRole::Assistant => &app.assistant_name,
-        };
-        let formatted = if let Some(ts) = message.timestamp {
+        let name = if role == "user" { &app.user_name } else { &app.assistant_name };
+        let formatted = if !ts.is_empty() {
            oldest_ts = ts[..ts.floor_char_boundary(ts.len().min(19))].to_string();
            format!("**{}** {}: {}", name, &oldest_ts, content)
        } else {
--- a/src/subconscious/generate.rs
+++ b/src/subconscious/generate.rs
@ -4,10 +4,8 @@
 // given a context prefix and a skip predicate, generate what the model
 // would say as the next assistant turn.

-use std::sync::Arc;
-
 use crate::agent::api::{ApiClient, SamplingParams, StreamToken};
-use crate::agent::context::{AstNode, ContextState, WireChunk};
+use crate::agent::context::{AstNode, ContextState};
 use crate::agent::tokenizer;

 /// Generate an assistant continuation from the context up to `entry_idx`,
@ -15,9 +13,6 @@ use crate::agent::tokenizer;
 /// assembly. The model is whichever `client` points at — the default
 /// runtime client for memory-ablation alternates, a test-model client
 /// for F7 comparison.
-///
-/// Uses a fresh ephemeral gRPC session (no cross-call KV reuse): one
-/// Open / Append / Generate round-trip, then the session is dropped.
 pub async fn gen_continuation<F>(
    context: &ContextState,
    entry_idx: usize,
@ -26,32 +21,17 @@ pub async fn gen_continuation<F>(
 ) -> anyhow::Result<String>
 where F: FnMut(&AstNode) -> bool,
 {
-    let (mut chunks, images) = context.wire_chunks(0..entry_idx, skip);
+    let (mut prompt, images, _) = context.wire_prompt(0..entry_idx, skip);

-    // Assistant-turn prologue.
-    let prologue = {
-        let mut t = vec![tokenizer::IM_START];
-        t.extend(tokenizer::encode("assistant\n"));
-        t
-    };
-    match chunks.last_mut() {
-        Some(WireChunk::Tokens(last)) => last.extend(prologue),
-        _ => chunks.push(WireChunk::Tokens(prologue)),
-    }
+    prompt.push(tokenizer::IM_START);
+    prompt.extend(tokenizer::encode("assistant\n"));

    let sampling = SamplingParams {
        temperature: 0.6,
        top_p: 0.95,
        top_k: 20,
-        max_tokens: 4096,
    };
-
-    // Ephemeral per-call session — opens on first touch, drops when
-    // `_guard` drops at function end.
-    let session_lock = Arc::new(crate::Mutex::new(None));
-    let (mut rx, _guard) = client.stream_session_mm(
-        session_lock, chunks, images, 0, sampling, Some(-5), None,
-    );
+    let (mut rx, _guard) = client.stream_completion_mm(&prompt, &images, sampling, Some(-5));

    let mut tokens = Vec::new();
    while let Some(tok) = rx.recv().await {
--- a/src/subconscious/learn.rs
+++ b/src/subconscious/learn.rs
@ -1,148 +1,100 @@
-// learn.rs — Memory importance scoring over the salience gRPC protocol.
+// training.rs — Memory importance scoring via /v1/score
 //
-// Three scoring modes, all built on call_score():
+// Three scoring modes, all built on the same call_score() primitive:
 //
 // score_memories()  — Full N×M matrix (memories × responses) for the
-//                     debug screen. Expensive: N+1 sessions/calls.
+//                     debug screen. Expensive: N+1 API calls.
 //
-// score_memory()    — Single memory importance. Scores the 50 messages
+// memory_score()    — Single memory importance. Scores the 50 messages
 //                     after it was surfaced, with/without that memory.
-//                     2 calls.
+//                     2 API calls.
 //
 // finetune_score()  — Identifies training candidates. Scores recent
 //                     messages with all memories stripped. Responses
 //                     with high divergence depend on memories the model
-//                     hasn't internalized. 2 calls.
-//
-// Each call opens an ephemeral gRPC session (reusing the shared
-// tonic Channel on `ApiClient`), pushes the prompt through as
-// interleaved tokens + AppendImage calls, runs Generate with
-// max_tokens=0 + logprobs_ranges over the scored positions, collects
-// each Token event's sampled_logprob, then drops the SessionHandle —
-// which triggers a best-effort CloseSession over the shared channel.
+//                     hasn't internalized. 2 API calls.

 use std::sync::Arc;

 use crate::agent::api::ApiClient;
-use crate::agent::api::salience::{SessionHandle, pb};
 use crate::agent::context::{
-    Ast, AstNode, ContextState, Role, WireChunk, WireImage,
+    Ast, AstNode, ContextState, Role, WireImage,
    is_assistant, is_memory_node, memory_key, render_branch_text, render_prior_context,
 };
-use crate::agent::tokenizer;
 use crate::mind::{MindState, MindTriggered, TaskHandle};
 use crate::subconscious::generate::gen_continuation;

+const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
+
 // ── Score API ───────────────────────────────────────────────────

-#[derive(Debug, Clone)]
+#[derive(serde::Deserialize)]
 struct ScoreResult {
    total_logprob: f64,
 }

-/// Find each <|vision_start|>...<|vision_end|> run in the flat prompt
-/// and pair it with the matching entry in `images`. Returns a list
-/// of `ImageAttachment` with absolute pad-range positions, ready
-/// to drop into `GenerateRequest.images`.
-fn pair_images_to_ranges(
-    prompt: &[u32],
-    images: &[WireImage],
-) -> Vec<pb::ImageAttachment> {
-    let mut out: Vec<pb::ImageAttachment> = Vec::new();
-    let mut cur = 0;
-    let mut img_idx = 0;
-    while cur < prompt.len() {
-        if prompt[cur] == tokenizer::VISION_START {
-            let end_rel = prompt[cur..].iter()
-                .position(|&t| t == tokenizer::VISION_END)
-                .unwrap_or_else(|| panic!(
-                    "unmatched VISION_START at position {} in prompt", cur));
-            let end = cur + end_rel + 1;
-            let img = images.get(img_idx)
-                .unwrap_or_else(|| panic!(
-                    "image index {} out of range for {} images", img_idx, images.len()));
-            out.push(pb::ImageAttachment {
-                bytes: img.bytes.clone(),
-                mime: img.mime.clone(),
-                pad_range_start: cur as u32,
-                pad_range_end: end as u32,
-            });
-            img_idx += 1;
-            cur = end;
-        } else {
-            cur += 1;
+#[derive(serde::Deserialize)]
+struct ScoreResponse {
+    scores: Vec<ScoreResult>,
 }
-    }
-    out
+
+fn http_client() -> crate::agent::api::http::HttpClient {
+    crate::agent::api::http::HttpClient::builder()
+        .timeout(SCORE_TIMEOUT)
+        .build()
 }

 async fn call_score(
+    http: &crate::agent::api::http::HttpClient,
    client: &ApiClient,
    prompt: &[u32],
    images: &[WireImage],
    ranges: &[(usize, usize)],
    priority: Option<i32>,
 ) -> anyhow::Result<Vec<ScoreResult>> {
-    use futures::StreamExt;
-
    // Nothing to score — skip the round-trip.
    if ranges.is_empty() {
        return Ok(Vec::new());
    }
-
-    let images_pb = pair_images_to_ranges(prompt, images);
-    let mut handle = SessionHandle::open(client).await?;
-
-    // Final Generate: max_tokens=0 so the server runs prefill of the
-    // full prompt and emits Token events for each position covered
-    // by logprobs_ranges, then Done. logprob_top_k=0 means "just
-    // the sampled (prompt) token's logprob" — no top-k alternatives,
-    // which is all call_score historically needed. Images attach
-    // inline via `images`; the prompt already contains their pre-
-    // expanded vision blocks at the declared ranges.
-    let logprobs_ranges: Vec<pb::PositionRange> = ranges.iter()
-        .map(|(s, e)| pb::PositionRange { start: *s as u32, end: *e as u32 })
+    let url = format!("{}/score", client.base_url());
+    let auth = format!("Bearer {}", client.api_key());
+    let mut body = serde_json::json!({
+        "model": client.model,
+        "prompt": prompt,
+        "score_ranges": ranges,
+        "logprobs": 1,
+    });
+    if !images.is_empty() {
+        use base64::Engine;
+        let b64 = base64::engine::general_purpose::STANDARD;
+        let uris: Vec<String> = images.iter()
+            .map(|img| format!("data:{};base64,{}", img.mime, b64.encode(&img.bytes)))
            .collect();
-    let req = pb::GenerateRequest {
-        session_id: handle.session_id.clone(),
-        append_tokens: prompt.to_vec(),
-        offset: handle.committed_len,
-        truncating: false,
-        max_tokens: 0,
-        logprobs_ranges,
-        logprob_top_k: 0,
-        readout_ranges: Vec::new(),
-        temperature: 0.0,
-        top_p: 0.0,
-        top_k: 0,
-        stop_token_ids: Vec::new(),
-        priority: priority.unwrap_or(0),
-        images: images_pb,
-    };
+        body["multi_modal_data"] = serde_json::json!({ "image": uris });
+    }
+    if let Some(p) = priority {
+        body["priority"] = serde_json::json!(p);
+    }
+    let response = http
+        .send_json("POST", &url, &[
+            ("authorization", &auth),
+        ], &body)
+        .await?;

-    let mut stream = handle.generate(req).await?;
-    let mut totals = vec![0.0f64; ranges.len()];
-    while let Some(event) = stream.next().await {
-        let event = event
-            .map_err(|s| anyhow::anyhow!("score Generate stream: {}", s))?;
-        let Some(inner) = event.event else { continue };
-        match inner {
-            pb::generate_event::Event::Token(t) => {
-                if !t.has_sampled_logprob { continue; }
-                let pos = t.position as usize;
-                for (i, (start, end)) in ranges.iter().enumerate() {
-                    if pos >= *start && pos < *end {
-                        totals[i] += t.sampled_logprob as f64;
-                    }
-                }
-            }
-            pb::generate_event::Event::Done(_) => break,
+    let status = response.status();
+    let body: serde_json::Value = response.json().await?;
+
+    if !status.is_success() {
+        let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("unknown error");
+        anyhow::bail!("score API HTTP {}: {}", status, msg);
    }
+    if let Some(err) = body.get("error").and_then(|e| e.as_str()) {
+        anyhow::bail!("score API error: {}", err);
    }

-    Ok(totals.into_iter()
-        .map(|total_logprob| ScoreResult { total_logprob })
-        .collect())
+    let result: ScoreResponse = serde_json::from_value(body)
+        .map_err(|e| anyhow::anyhow!("failed to parse score response: {}", e))?;
+    Ok(result.scores)
 }

 /// Compute per-position logprob divergence: how much worse the model
@ -158,6 +110,7 @@ fn divergence(baseline: &[ScoreResult], without: &[ScoreResult]) -> Vec<f64> {

 /// Score two message sets and return total divergence.
 async fn score_divergence<F>(
+    http: &crate::agent::api::http::HttpClient,
    client: &ApiClient,
    context: &ContextState,
    range: std::ops::Range<usize>,
@ -170,9 +123,9 @@ where F: FnMut(&AstNode) -> bool,
        context.wire_prompt(range.clone(), |_| false);
    let (without_tokens, without_images, without_ranges) =
        context.wire_prompt(range, skip);
-    let baseline = call_score(client, &baseline_tokens, &baseline_images,
+    let baseline = call_score(http, client, &baseline_tokens, &baseline_images,
                              &baseline_ranges, priority).await?;
-    let without = call_score(client, &without_tokens, &without_images,
+    let without = call_score(http, client, &without_tokens, &without_images,
                             &without_ranges, priority).await?;
    let divs = divergence(&baseline, &without);
    Ok((divs, baseline))
@ -209,13 +162,14 @@ pub async fn score_memories(
    dbglog!("[scoring-full] starting: {} memories × {} responses",
        total, response_indices.len());

+    let http = http_client();

    let activity = crate::agent::start_activity(agent, "scoring: baseline").await;
    let (baseline_tokens, baseline_images, baseline_ranges) = {
        let ctx = agent.context.lock().await;
        ctx.wire_prompt(0..ctx.conversation().len(), |_| false)
    };
-    let baseline = call_score(client, &baseline_tokens, &baseline_images,
+    let baseline = call_score(&http, client, &baseline_tokens, &baseline_images,
                              &baseline_ranges, Some(5)).await?;
    dbglog!("[scoring-full] baseline done ({} response scores)", baseline.len());

@ -226,7 +180,7 @@ pub async fn score_memories(
            let ctx = agent.context.lock().await;
            ctx.wire_prompt(0..ctx.conversation().len(), |n| memory_key(n) == Some(key.as_str()))
        };
-        let row = match call_score(client, &tokens, &images, &ranges, Some(5)).await {
+        let row = match call_score(&http, client, &tokens, &images, &ranges, Some(5)).await {
            Ok(without) => {
                let divs = divergence(&baseline, &without);
                let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
@ -240,23 +194,25 @@ pub async fn score_memories(
                vec![0.0; baseline.len()]
            }
        };
-        // Write this memory's scores to the live AST nodes via the
-        // focused setter — keeps the AST mutation surface narrow.
+        // Write this memory's scores to the live AST nodes
        {
            let mut ctx = agent.context.lock().await;
            let mut set_count = 0;

            for (resp_idx, &idx) in response_indices.iter().enumerate() {
-                let Some(&score) = row.get(resp_idx) else { continue };
-                let normalized = if score > 0.01 { Some(score) } else { None };
-                ctx.set_branch_memory_score(
-                    crate::agent::context::Section::Conversation,
-                    idx,
-                    &key,
-                    normalized,
-                );
-                if normalized.is_some() {
+                if idx >= ctx.conversation().len() { continue; }
+                let node = &mut ctx.conversation_mut()[idx];
+                if let AstNode::Branch {
+                    role: Role::Assistant, memory_scores, ..
+                } = node {
+                    if let Some(&score) = row.get(resp_idx) {
+                        if score > 0.01 {
+                            memory_scores.insert(key.clone(), score);
                            set_count += 1;
+                        } else {
+                            memory_scores.remove(key.as_str());
+                        }
+                    }
                }
            }

@ -307,7 +263,8 @@ pub async fn score_memory(
        return Ok(0.0);
    }

-    let (divs, _) = score_divergence(client, context, range,
+    let http = http_client();
+    let (divs, _) = score_divergence(&http, client, context, range,
                                     |n| memory_key(n) == Some(key), Some(5)).await?;

    Ok(divs.iter().sum())
@ -365,6 +322,7 @@ where
    // Score oldest-first
    candidates.sort_by_key(|&(_, _, last)| last);

+    let http = http_client();
    let mut scored = 0;

    let entries = context.conversation();
@ -399,7 +357,7 @@ where
        }

        activity.update(format!("scoring: {}/{} {}", scored + 1, total, key)).await;
-        match score_divergence(client, context, range,
+        match score_divergence(&http, client, context, range,
                               |n| memory_key(n) == Some(key), Some(5)).await {
            Ok((divs, _)) => {
                let n_responses = divs.len();
@ -547,7 +505,8 @@ pub async fn score_finetune(
        return Ok(Vec::new());
    }

-    let (divs, _) = score_divergence(client, context, range, is_memory_node, Some(5)).await?;
+    let http = http_client();
+    let (divs, _) = score_divergence(&http, client, context, range, is_memory_node, Some(5)).await?;

    let mut results: Vec<(usize, f64)> = response_positions.iter()
        .enumerate()
@ -845,10 +804,8 @@ pub async fn send_to_train(
        }
    });

+    let http = http_client();
    let url = format!("{}/train", client.base_url());
-    let http = crate::agent::api::http::HttpClient::builder()
-        .timeout(std::time::Duration::from_secs(300))
-        .build();
    let response = http.send_json("POST", &url, &[], &body).await?;

    let status = response.status();
--- a/src/subconscious/prompts.rs
+++ b/src/subconscious/prompts.rs
@ -108,6 +108,7 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
            out.push_str(&format!("Community: {}  ", community));
        }
        let deg = graph.degree(&item.key);
+        let cc = graph.clustering_coefficient(&item.key);

        // Hub-link ratio: what fraction of this node's edges go to hubs?
        let neighbors = graph.neighbors(&item.key);
@ -118,7 +119,7 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
        let is_hub = deg >= hub_thresh;

        out.push_str(&format!("Degree: {}  CC: {:.3}  Hub-link ratio: {:.0}% ({}/{})",
-			deg, item.cc, hub_ratio * 100.0, hub_links, deg));
+            deg, cc, hub_ratio * 100.0, hub_links, deg));
        if is_hub {
            out.push_str("  ← THIS IS A HUB");
        } else if hub_ratio > 0.6 {
--- a/src/user/context.rs
+++ b/src/user/context.rs
@ -43,7 +43,6 @@ impl ConsciousScreen {
                        name: format!("mem: {}", key),
                        tokens: node.tokens(),
                        content: text.clone(),
-                        token_ids: leaf.token_ids().to_vec(),
                        children: Vec::new(),
                        status: score.map(|s| format!("{:.2}", s)).unwrap_or_default(),
                    });
@ -56,7 +55,6 @@ impl ConsciousScreen {
                name: format!("Memory nodes ({})", mem_children.len()),
                tokens: mem_tokens,
                content: String::new(),
-                token_ids: Vec::new(),
                children: mem_children,
                status: format!("{} scored, {} unscored", scored, unscored),
            });
@ -72,13 +70,11 @@ impl ConsciousScreen {
                    AstNode::Leaf(leaf) => leaf.body().text().to_string(),
                    _ => String::new(),
                },
-                token_ids: node.token_ids(),
                children: match node {
                    AstNode::Branch { children, .. } => children.iter()
                        .map(|c| SectionView {
                            name: c.label(), tokens: c.tokens(),
                            content: match c { AstNode::Leaf(l) => l.body().text().to_string(), _ => String::new() },
-                            token_ids: match c { AstNode::Leaf(l) => l.token_ids().to_vec(), _ => c.token_ids() },
                            children: Vec::new(), status: String::new(),
                        }).collect(),
                    _ => Vec::new(),
@ -105,7 +101,6 @@ impl ConsciousScreen {
            name: format!("Conversation ({} entries)", conv_children.len()),
            tokens: conv_tokens,
            content: String::new(),
-            token_ids: Vec::new(),
            children: conv_children,
            status: String::new(),
        });
--- a/src/user/mod.rs
+++ b/src/user/mod.rs
@ -74,7 +74,7 @@ fn truncate(s: &str, max: usize) -> String {
 }

 /// A screen that can draw itself and handle input.
-trait ScreenView {
+trait ScreenView: Send {
    fn tick(&mut self, frame: &mut ratatui::Frame, area: ratatui::layout::Rect,
            events: &[ratatui::crossterm::event::Event], app: &mut App);
    fn label(&self) -> &'static str;
@ -291,8 +291,8 @@ async fn start(cli: crate::user::CliArgs) -> Result<()> {
    ui_handle.join().unwrap_or_else(|_| Err(anyhow::anyhow!("UI thread panicked")))
 }

-async fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) {
-    let mut ag = mind.agent.state.lock().await;
+fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) {
+    if let Ok(mut ag) = mind.agent.state.try_lock() {
        let next = match ag.reasoning_effort.as_str() {
            "none" => "low",
            "low" => "high",
@ -307,6 +307,7 @@ async fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) {
        };
        ag.notify(format!("reasoning: {}", label));
    }
+}

 async fn hotkey_kill_processes(mind: &crate::mind::Mind) {
    let mut st = mind.agent.state.lock().await;
@ -591,7 +592,7 @@ async fn run(
                    } else if key.modifiers.contains(KeyModifiers::CONTROL) {
                        match key.code {
                            KeyCode::Char('c') => { app.should_quit = true; }
-                            KeyCode::Char('r') => hotkey_cycle_reasoning(mind).await,
+                            KeyCode::Char('r') => hotkey_cycle_reasoning(mind),
                            KeyCode::Char('k') => hotkey_kill_processes(mind).await,
                            KeyCode::Char('p') => hotkey_cycle_autonomy(mind),
                            _ => {}
@ -755,11 +756,6 @@ fn restore_stderr(original_fd: std::os::fd::RawFd) {

 #[tokio::main]
 pub async fn main() {
-    // Install target-routed file logger: `target: "grpc"` records go to
-    // ~/.consciousness/logs/daemon/grpc.log, everything else to debug.log.
-    // Level from RUST_LOG, defaulting to info.
-    let _ = crate::logging::init();
-
    // Reap channel-daemon zombies via a SIGCHLD handler that only touches
    // PIDs listed in channels_dir(). Avoids SIGCHLD=SIG_IGN, which would
    // break tokio::process::Command::wait() (kernel auto-reap → ECHILD).
--- a/src/user/subconscious.rs
+++ b/src/user/subconscious.rs
@ -207,7 +207,6 @@ impl SubconsciousScreen {
                name: key.clone(),
                tokens: 0,
                content: val.clone(),
-                token_ids: Vec::new(),
                children: Vec::new(),
                status: String::new(),
            }
@ -239,7 +238,6 @@ impl SubconsciousScreen {
                    name: format!("Conversation ({} entries)", conv_children.len()),
                    tokens: conv_children.iter().map(|c| c.tokens).sum(),
                    content: String::new(),
-                    token_ids: Vec::new(),
                    children: conv_children,
                    status: String::new(),
                });
--- a/src/user/widgets.rs
+++ b/src/user/widgets.rs
@ -8,18 +8,11 @@ use ratatui::{
 };
 use crate::agent::context::{AstNode, Ast, NodeBody};

-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone)]
 pub struct SectionView {
    pub name: String,
    pub tokens: usize,
    pub content: String,
-    /// Token-id stream for this subtree, displayed in place of
-    /// `content` when the tree's show-tokens mode is on. Populated
-    /// from `leaf.token_ids()` / `node.token_ids()` for views built
-    /// from the AST; empty for views that don't have a corresponding
-    /// AST node (subconscious entries, etc.), in which case the
-    /// token view falls back to the text content.
-    pub token_ids: Vec<u32>,
    pub children: Vec<SectionView>,
    /// Extra status text shown after the token count.
    pub status: String,
@ -39,7 +32,6 @@ fn node_to_view(node: &AstNode) -> SectionView {
                name,
                tokens: node.tokens(),
                content: leaf.body().text().to_string(),
-                token_ids: leaf.token_ids().to_vec(),
                children: Vec::new(),
                status,
            }
@ -52,7 +44,6 @@ fn node_to_view(node: &AstNode) -> SectionView {
                name: node.label(),
                tokens: node.tokens(),
                content: String::new(),
-                token_ids: node.token_ids(),
                children: child_views,
                status: String::new(),
            }
@ -63,12 +54,10 @@ fn node_to_view(node: &AstNode) -> SectionView {
 pub fn section_to_view(name: &str, nodes: &[AstNode]) -> SectionView {
    let children: Vec<SectionView> = nodes.iter().map(|n| node_to_view(n)).collect();
    let total_tokens: usize = nodes.iter().map(|n| n.tokens()).sum();
-    let token_ids: Vec<u32> = nodes.iter().flat_map(|n| n.token_ids()).collect();
    SectionView {
        name: name.to_string(),
        tokens: total_tokens,
        content: String::new(),
-        token_ids,
        children,
        status: String::new(),
    }
@ -115,7 +104,7 @@ pub fn format_ts_age(ts: i64) -> String {
 /// Key legend for SectionTree panes.
 pub fn tree_legend() -> Line<'static> {
    Line::styled(
-        " ↑↓:nav  →/Enter:expand  ←:collapse  e:expand  c:collapse  v:toggle tokens/text  PgUp/Dn ",
+        " ↑↓:nav  →/Enter:expand  ←:collapse  e:expand all  c:collapse all  PgUp/Dn  Home/End ",
        Style::default().fg(Color::DarkGray),
    )
 }
@ -196,19 +185,11 @@ pub struct SectionTree {
    pub selected: Option<usize>,
    pub expanded: std::collections::HashSet<usize>,
    pub scroll: super::scroll_pane::ScrollPaneState,
-    /// When true, render `token_ids` as space-separated IDs in place
-    /// of `content` in expanded panels. Toggled with 'v'.
-    pub show_tokens: bool,
 }

 impl SectionTree {
    pub fn new() -> Self {
-        Self {
-            selected: None,
-            expanded: std::collections::HashSet::new(),
-            scroll: super::scroll_pane::ScrollPaneState::new(),
-            show_tokens: false,
-        }
+        Self { selected: None, expanded: std::collections::HashSet::new(), scroll: super::scroll_pane::ScrollPaneState::new() }
    }

    fn total_nodes(&self, sections: &[SectionView]) -> usize {
@ -283,9 +264,6 @@ impl SectionTree {
            KeyCode::Char('c') => {
                self.expanded.clear();
            }
-            KeyCode::Char('v') => {
-                self.show_tokens = !self.show_tokens;
-            }
            _ => {}
        }
        self.scroll_to_selected(height);
@ -348,12 +326,7 @@ impl SectionTree {
                }
            } else if has_content {
                let content_indent = format!("{}    │ ", "  ".repeat(depth + 1));
-                let body = if self.show_tokens && !section.token_ids.is_empty() {
-                    format_token_ids_wrapped(&section.token_ids)
-                } else {
-                    section.content.clone()
-                };
-                let content_lines: Vec<&str> = body.lines().collect();
+                let content_lines: Vec<&str> = section.content.lines().collect();
                let show = content_lines.len().min(50);
                for line in &content_lines[..show] {
                    lines.push(Line::styled(
@ -371,16 +344,3 @@ impl SectionTree {
        }
    }
 }
-
-/// Format token IDs for the content panel: space-separated, wrapped
-/// at 12 ids per line so they fit comfortably in a pane.
-fn format_token_ids_wrapped(ids: &[u32]) -> String {
-    let mut out = String::new();
-    for (i, id) in ids.iter().enumerate() {
-        if i > 0 {
-            if i % 12 == 0 { out.push('\n'); } else { out.push(' '); }
-        }
-        out.push_str(&id.to_string());
-    }
-    out
-}