Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

49 changed files with 1180 additions and 4680 deletions

514
Cargo.lock generated
View file

@ -165,39 +165,6 @@ dependencies = [
"tree-sitter-yaml", "tree-sitter-yaml",
] ]
[[package]]
name = "async-stream"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
dependencies = [
"async-stream-impl",
"futures-core",
"pin-project-lite",
]
[[package]]
name = "async-stream-impl"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "async-trait"
version = "0.1.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]] [[package]]
name = "atomic" name = "atomic"
version = "0.6.1" version = "0.6.1"
@ -241,53 +208,6 @@ dependencies = [
"fs_extra", "fs_extra",
] ]
[[package]]
name = "axum"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
dependencies = [
"async-trait",
"axum-core",
"bytes",
"futures-util",
"http",
"http-body",
"http-body-util",
"itoa",
"matchit",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"sync_wrapper",
"tower 0.5.3",
"tower-layer",
"tower-service",
]
[[package]]
name = "axum-core"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
dependencies = [
"async-trait",
"bytes",
"futures-util",
"http",
"http-body",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper",
"tower-layer",
"tower-service",
]
[[package]] [[package]]
name = "base64" name = "base64"
version = "0.13.1" version = "0.13.1"
@ -571,7 +491,6 @@ dependencies = [
"anyhow", "anyhow",
"ast-grep-core", "ast-grep-core",
"ast-grep-language", "ast-grep-language",
"async-stream",
"base64 0.22.1", "base64 0.22.1",
"bytes", "bytes",
"capnp", "capnp",
@ -599,14 +518,11 @@ dependencies = [
"notify-debouncer-mini", "notify-debouncer-mini",
"paste", "paste",
"peg", "peg",
"prost",
"protoc-bin-vendored",
"ratatui", "ratatui",
"redb", "redb",
"regex", "regex",
"rustls", "rustls",
"rustls-native-certs", "rustls-native-certs",
"rustls-pemfile",
"serde", "serde",
"serde_json", "serde_json",
"serde_urlencoded", "serde_urlencoded",
@ -615,10 +531,7 @@ dependencies = [
"tokenizers", "tokenizers",
"tokio", "tokio",
"tokio-rustls", "tokio-rustls",
"tokio-stream",
"tokio-util", "tokio-util",
"tonic",
"tonic-build",
"tui-markdown", "tui-markdown",
"tui-textarea-2", "tui-textarea-2",
"uuid", "uuid",
@ -1151,12 +1064,6 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]]
name = "fixedbitset"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
[[package]] [[package]]
name = "flate2" name = "flate2"
version = "1.1.9" version = "1.1.9"
@ -1381,31 +1288,6 @@ dependencies = [
"regex-syntax", "regex-syntax",
] ]
[[package]]
name = "h2"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
dependencies = [
"atomic-waker",
"bytes",
"fnv",
"futures-core",
"futures-sink",
"http",
"indexmap 2.14.0",
"slab",
"tokio",
"tokio-util",
"tracing",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.15.5" version = "0.15.5"
@ -1511,12 +1393,6 @@ version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
[[package]]
name = "httpdate"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]] [[package]]
name = "hyper" name = "hyper"
version = "1.9.0" version = "1.9.0"
@ -1527,11 +1403,9 @@ dependencies = [
"bytes", "bytes",
"futures-channel", "futures-channel",
"futures-core", "futures-core",
"h2",
"http", "http",
"http-body", "http-body",
"httparse", "httparse",
"httpdate",
"itoa", "itoa",
"pin-project-lite", "pin-project-lite",
"smallvec", "smallvec",
@ -1539,19 +1413,6 @@ dependencies = [
"want", "want",
] ]
[[package]]
name = "hyper-timeout"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
dependencies = [
"hyper",
"hyper-util",
"pin-project-lite",
"tokio",
"tower-service",
]
[[package]] [[package]]
name = "hyper-util" name = "hyper-util"
version = "0.1.20" version = "0.1.20"
@ -1559,17 +1420,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
dependencies = [ dependencies = [
"bytes", "bytes",
"futures-channel",
"futures-util",
"http", "http",
"http-body", "http-body",
"hyper", "hyper",
"libc",
"pin-project-lite", "pin-project-lite",
"socket2 0.6.3",
"tokio", "tokio",
"tower-service",
"tracing",
] ]
[[package]] [[package]]
@ -1630,16 +1485,6 @@ version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09e54e57b4c48b40f7aec75635392b12b3421fa26fe8b4332e63138ed278459c" checksum = "09e54e57b4c48b40f7aec75635392b12b3421fa26fe8b4332e63138ed278459c"
[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown 0.12.3",
]
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "2.14.0" version = "2.14.0"
@ -2013,12 +1858,6 @@ dependencies = [
"xml5ever", "xml5ever",
] ]
[[package]]
name = "matchit"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.8.0" version = "2.8.0"
@ -2049,12 +1888,6 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "mime"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]] [[package]]
name = "minimal-lexical" name = "minimal-lexical"
version = "0.2.1" version = "0.2.1"
@ -2105,12 +1938,6 @@ dependencies = [
"syn 2.0.117", "syn 2.0.117",
] ]
[[package]]
name = "multimap"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
[[package]] [[package]]
name = "new_debug_unreachable" name = "new_debug_unreachable"
version = "1.0.6" version = "1.0.6"
@ -2406,16 +2233,6 @@ dependencies = [
"sha2", "sha2",
] ]
[[package]]
name = "petgraph"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
dependencies = [
"fixedbitset 0.5.7",
"indexmap 2.14.0",
]
[[package]] [[package]]
name = "phf" name = "phf"
version = "0.11.3" version = "0.11.3"
@ -2468,26 +2285,6 @@ dependencies = [
"siphasher", "siphasher",
] ]
[[package]]
name = "pin-project"
version = "1.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]] [[package]]
name = "pin-project-lite" name = "pin-project-lite"
version = "0.2.17" version = "0.2.17"
@ -2507,7 +2304,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07" checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07"
dependencies = [ dependencies = [
"base64 0.22.1", "base64 0.22.1",
"indexmap 2.14.0", "indexmap",
"quick-xml", "quick-xml",
"serde", "serde",
"time", "time",
@ -2581,122 +2378,6 @@ dependencies = [
"yansi", "yansi",
] ]
[[package]]
name = "prost"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
dependencies = [
"bytes",
"prost-derive",
]
[[package]]
name = "prost-build"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
dependencies = [
"heck",
"itertools",
"log",
"multimap",
"once_cell",
"petgraph",
"prettyplease",
"prost",
"prost-types",
"regex",
"syn 2.0.117",
"tempfile",
]
[[package]]
name = "prost-derive"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
dependencies = [
"anyhow",
"itertools",
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "prost-types"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
dependencies = [
"prost",
]
[[package]]
name = "protoc-bin-vendored"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1c381df33c98266b5f08186583660090a4ffa0889e76c7e9a5e175f645a67fa"
dependencies = [
"protoc-bin-vendored-linux-aarch_64",
"protoc-bin-vendored-linux-ppcle_64",
"protoc-bin-vendored-linux-s390_64",
"protoc-bin-vendored-linux-x86_32",
"protoc-bin-vendored-linux-x86_64",
"protoc-bin-vendored-macos-aarch_64",
"protoc-bin-vendored-macos-x86_64",
"protoc-bin-vendored-win32",
]
[[package]]
name = "protoc-bin-vendored-linux-aarch_64"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c350df4d49b5b9e3ca79f7e646fde2377b199e13cfa87320308397e1f37e1a4c"
[[package]]
name = "protoc-bin-vendored-linux-ppcle_64"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a55a63e6c7244f19b5c6393f025017eb5d793fd5467823a099740a7a4222440c"
[[package]]
name = "protoc-bin-vendored-linux-s390_64"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dba5565db4288e935d5330a07c264a4ee8e4a5b4a4e6f4e83fad824cc32f3b0"
[[package]]
name = "protoc-bin-vendored-linux-x86_32"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8854774b24ee28b7868cd71dccaae8e02a2365e67a4a87a6cd11ee6cdbdf9cf5"
[[package]]
name = "protoc-bin-vendored-linux-x86_64"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b38b07546580df720fa464ce124c4b03630a6fb83e05c336fea2a241df7e5d78"
[[package]]
name = "protoc-bin-vendored-macos-aarch_64"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89278a9926ce312e51f1d999fee8825d324d603213344a9a706daa009f1d8092"
[[package]]
name = "protoc-bin-vendored-macos-x86_64"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81745feda7ccfb9471d7a4de888f0652e806d5795b61480605d4943176299756"
[[package]]
name = "protoc-bin-vendored-win32"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95067976aca6421a523e491fce939a3e65249bac4b977adee0ee9771568e8aa3"
[[package]] [[package]]
name = "pulldown-cmark" name = "pulldown-cmark"
version = "0.13.3" version = "0.13.3"
@ -2752,8 +2433,6 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [ dependencies = [
"libc",
"rand_chacha 0.3.1",
"rand_core 0.6.4", "rand_core 0.6.4",
] ]
@ -2763,20 +2442,10 @@ version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [ dependencies = [
"rand_chacha 0.9.0", "rand_chacha",
"rand_core 0.9.5", "rand_core 0.9.5",
] ]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core 0.6.4",
]
[[package]] [[package]]
name = "rand_chacha" name = "rand_chacha"
version = "0.9.0" version = "0.9.0"
@ -2792,9 +2461,6 @@ name = "rand_core"
version = "0.6.4" version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom 0.2.17",
]
[[package]] [[package]]
name = "rand_core" name = "rand_core"
@ -3043,15 +2709,6 @@ dependencies = [
"security-framework", "security-framework",
] ]
[[package]]
name = "rustls-pemfile"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
dependencies = [
"rustls-pki-types",
]
[[package]] [[package]]
name = "rustls-pki-types" name = "rustls-pki-types"
version = "1.14.0" version = "1.14.0"
@ -3174,7 +2831,7 @@ version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
dependencies = [ dependencies = [
"indexmap 2.14.0", "indexmap",
"itoa", "itoa",
"memchr", "memchr",
"serde", "serde",
@ -3278,16 +2935,6 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"
[[package]]
name = "socket2"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "socket2" name = "socket2"
version = "0.6.3" version = "0.6.3"
@ -3402,12 +3049,6 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "sync_wrapper"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
[[package]] [[package]]
name = "syntect" name = "syntect"
version = "5.3.0" version = "5.3.0"
@ -3486,7 +3127,7 @@ dependencies = [
"fancy-regex", "fancy-regex",
"filedescriptor", "filedescriptor",
"finl_unicode", "finl_unicode",
"fixedbitset 0.4.2", "fixedbitset",
"hex", "hex",
"lazy_static", "lazy_static",
"libc", "libc",
@ -3646,7 +3287,7 @@ dependencies = [
"parking_lot", "parking_lot",
"pin-project-lite", "pin-project-lite",
"signal-hook-registry", "signal-hook-registry",
"socket2 0.6.3", "socket2",
"tokio-macros", "tokio-macros",
"windows-sys 0.61.2", "windows-sys 0.61.2",
] ]
@ -3672,17 +3313,6 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "tokio-stream"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
dependencies = [
"futures-core",
"pin-project-lite",
"tokio",
]
[[package]] [[package]]
name = "tokio-util" name = "tokio-util"
version = "0.7.18" version = "0.7.18"
@ -3697,130 +3327,6 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "tonic"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
dependencies = [
"async-stream",
"async-trait",
"axum",
"base64 0.22.1",
"bytes",
"h2",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-timeout",
"hyper-util",
"percent-encoding",
"pin-project",
"prost",
"rustls-native-certs",
"rustls-pemfile",
"socket2 0.5.10",
"tokio",
"tokio-rustls",
"tokio-stream",
"tower 0.4.13",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tonic-build"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11"
dependencies = [
"prettyplease",
"proc-macro2",
"prost-build",
"prost-types",
"quote",
"syn 2.0.117",
]
[[package]]
name = "tower"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
dependencies = [
"futures-core",
"futures-util",
"indexmap 1.9.3",
"pin-project",
"pin-project-lite",
"rand 0.8.5",
"slab",
"tokio",
"tokio-util",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tower"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
dependencies = [
"futures-core",
"futures-util",
"pin-project-lite",
"sync_wrapper",
"tower-layer",
"tower-service",
]
[[package]]
name = "tower-layer"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
[[package]]
name = "tower-service"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
[[package]]
name = "tracing"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
dependencies = [
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-attributes"
version = "0.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "tracing-core"
version = "0.1.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
dependencies = [
"once_cell",
]
[[package]] [[package]]
name = "tree-sitter" name = "tree-sitter"
version = "0.26.8" version = "0.26.8"
@ -4379,7 +3885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"indexmap 2.14.0", "indexmap",
"wasm-encoder", "wasm-encoder",
"wasmparser", "wasmparser",
] ]
@ -4392,7 +3898,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
dependencies = [ dependencies = [
"bitflags 2.11.0", "bitflags 2.11.0",
"hashbrown 0.15.5", "hashbrown 0.15.5",
"indexmap 2.14.0", "indexmap",
"semver", "semver",
] ]
@ -4761,7 +4267,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"heck", "heck",
"indexmap 2.14.0", "indexmap",
"prettyplease", "prettyplease",
"syn 2.0.117", "syn 2.0.117",
"wasm-metadata", "wasm-metadata",
@ -4792,7 +4298,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bitflags 2.11.0", "bitflags 2.11.0",
"indexmap 2.14.0", "indexmap",
"log", "log",
"serde", "serde",
"serde_derive", "serde_derive",
@ -4811,7 +4317,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"id-arena", "id-arena",
"indexmap 2.14.0", "indexmap",
"log", "log",
"semver", "semver",
"serde", "serde",

View file

@ -18,9 +18,6 @@ name = "consciousness"
version.workspace = true version.workspace = true
edition.workspace = true edition.workspace = true
[features]
nightly-diagnostics = []
[dependencies] [dependencies]
anyhow = "1" anyhow = "1"
html2md = "0.2" html2md = "0.2"
@ -64,11 +61,6 @@ futures = "0.3"
capnp = "0.25" capnp = "0.25"
capnp-rpc = "0.25" capnp-rpc = "0.25"
tonic = { version = "0.12", features = ["tls", "tls-roots"] }
prost = "0.13"
async-stream = "0.3"
tokio-stream = "0.1"
tokenizers = "0.22" tokenizers = "0.22"
http = "1" http = "1"
@ -82,13 +74,10 @@ imagesize = "0.14"
rustls = "0.23" rustls = "0.23"
tokio-rustls = "0.26" tokio-rustls = "0.26"
rustls-native-certs = "0.8" rustls-native-certs = "0.8"
rustls-pemfile = "2"
serde_urlencoded = "0.7" serde_urlencoded = "0.7"
[build-dependencies] [build-dependencies]
capnpc = "0.25" capnpc = "0.25"
tonic-build = { version = "0.12", default-features = false, features = ["prost", "transport"] }
protoc-bin-vendored = "3"
[lib] [lib]
name = "consciousness" name = "consciousness"

View file

@ -13,21 +13,4 @@ fn main() {
.file("schema/channel.capnp") .file("schema/channel.capnp")
.run() .run()
.expect("capnp compile failed (channel.capnp)"); .expect("capnp compile failed (channel.capnp)");
// Generate salience.v1 gRPC client + message types from proto.
// Server side (python) is generated separately via grpcio-tools.
// Use vendored protoc so we don't require a system install.
let protoc = protoc_bin_vendored::protoc_bin_path()
.expect("vendored protoc not available for this platform");
// SAFETY: build script is single-threaded at this point; setting env
// before invoking tonic_build is the documented way to point it at a
// non-PATH protoc.
unsafe { std::env::set_var("PROTOC", protoc); }
tonic_build::configure()
.build_server(false)
.build_client(true)
.compile_protos(&["proto/salience.proto"], &["proto"])
.expect("tonic_build compile failed (salience.proto)");
println!("cargo:rerun-if-changed=proto/salience.proto");
} }

View file

@ -181,8 +181,6 @@ struct TelegramMessage {
chat_id: i64, chat_id: i64,
sender: String, sender: String,
text: String, text: String,
/// Absolute path to a downloaded media file (photo, etc.), if any.
media_path: Option<String>,
} }
/// Fetch and parse pending updates from Telegram via long polling. /// Fetch and parse pending updates from Telegram via long polling.
@ -208,115 +206,19 @@ async fn get_updates(
let sender = msg["from"]["first_name"].as_str().unwrap_or("unknown").to_string(); let sender = msg["from"]["first_name"].as_str().unwrap_or("unknown").to_string();
let chat_id = msg["chat"]["id"].as_i64().unwrap_or(0); let chat_id = msg["chat"]["id"].as_i64().unwrap_or(0);
// Photo: array of PhotoSize, largest is last. Download largest, if let Some(text) = msg["text"].as_str() {
// surface message with [image: <path>] marker so the multimodal messages.push(TelegramMessage {
// model can Read the image. update_id,
let (text, media_path) = if let Some(sizes) = msg["photo"].as_array() { chat_id,
let caption = msg["caption"].as_str().unwrap_or("").to_string(); sender,
let largest = sizes.last(); text: text.to_string(),
let file_id = largest });
.and_then(|s| s["file_id"].as_str()) }
.unwrap_or("");
if file_id.is_empty() {
error!("telegram photo: missing file_id in update {update_id}");
(caption, None)
} else {
// Bound the download — HttpClient::request_timeout only covers
// send_request, not body collect, so an indefinitely-slow body
// would otherwise stall every subsequent poll.
let dl = tokio::time::timeout(
std::time::Duration::from_secs(60),
download_telegram_file(client, token, file_id),
).await
.unwrap_or_else(|_| Err("download timed out after 60s".into()));
match dl {
Ok(path) => (caption, Some(path)),
Err(e) => {
error!("telegram photo download failed (file_id={file_id}): {e}");
// Surface what we have: caption plus a marker that
// a photo was sent but couldn't be fetched.
let marker = format!("[image: download failed: {e}]");
let combined = if caption.is_empty() {
marker
} else {
format!("{marker}\n{caption}")
};
(combined, None)
}
}
}
} else if let Some(text) = msg["text"].as_str() {
(text.to_string(), None)
} else {
// Other media types (voice, video, sticker, etc.) — skip for now,
// but log so we can extend later.
let kind = ["voice", "video", "sticker", "document", "audio", "animation"]
.iter()
.find(|k| !msg[**k].is_null())
.copied()
.unwrap_or("unknown");
info!("telegram: skipping non-text/photo message (kind={kind}, update_id={update_id})");
continue;
};
messages.push(TelegramMessage {
update_id,
chat_id,
sender,
text,
media_path,
});
} }
} }
Ok(messages) Ok(messages)
} }
/// Resolve a Telegram file_id to a downloadable URL path via getFile.
async fn get_file_path(
client: &HttpClient,
token: &str,
file_id: &str,
) -> Result<String, Box<dyn std::error::Error>> {
let url = format!(
"https://api.telegram.org/bot{}/getFile?file_id={}",
token, file_id,
);
let response = client.get(&url).await?;
let body = response.text().await?;
let resp: serde_json::Value = serde_json::from_str(&body)
.map_err(|e| format!("getFile JSON parse error: {e}"))?;
if !resp["ok"].as_bool().unwrap_or(false) {
return Err(format!("getFile failed: {}", resp["description"].as_str().unwrap_or("?")).into());
}
let file_path = resp["result"]["file_path"].as_str()
.ok_or("getFile: missing result.file_path")?;
Ok(file_path.to_string())
}
/// Download a Telegram file by file_id into the channel media dir.
/// Returns the absolute local path on success.
async fn download_telegram_file(
client: &HttpClient,
token: &str,
file_id: &str,
) -> Result<String, Box<dyn std::error::Error>> {
let file_path = get_file_path(client, token, file_id).await?;
let url = format!("https://api.telegram.org/file/bot{}/{}", token, file_path);
let response = client.get(&url).await?;
let status = response.status();
if !status.is_success() {
return Err(format!("file download failed: {status}").into());
}
let bytes = response.bytes().await?;
let ext = file_path.rsplit('.').next().filter(|e| !e.contains('/')).unwrap_or("dat");
let media_dir = log_dir().join("media");
std::fs::create_dir_all(&media_dir)?;
let dest = media_dir.join(format!("{file_id}.{ext}"));
std::fs::write(&dest, &bytes)?;
Ok(dest.to_string_lossy().to_string())
}
/// Send a text message to a Telegram chat. /// Send a text message to a Telegram chat.
async fn send_message( async fn send_message(
client: &HttpClient, client: &HttpClient,
@ -467,19 +369,11 @@ async fn poll_once(
let sender_lower = msg.sender.to_lowercase(); let sender_lower = msg.sender.to_lowercase();
let channel = format!("telegram.{}", sender_lower); let channel = format!("telegram.{}", sender_lower);
// If the message has media, prepend an [image: <abs_path>] marker channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &msg.text);
// so the multimodal model can Read the file directly.
let body = match &msg.media_path {
Some(path) if msg.text.is_empty() => format!("[image: {path}]"),
Some(path) => format!("[image: {path}]\n{}", msg.text),
None => msg.text.clone(),
};
channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &body);
let mut s = state.borrow_mut(); let mut s = state.borrow_mut();
s.config.chat_ids.insert(sender_lower, msg.chat_id); s.config.chat_ids.insert(sender_lower, msg.chat_id);
let line = format!("[{}] {}", msg.sender, body); let line = format!("[{}] {}", msg.sender, msg.text);
s.push_message(line, 2, &channel); s.push_message(line, 2, &channel);
} }

View file

@ -26,12 +26,10 @@ use consciousness::thalamus::channel_log::ChannelLog;
#[derive(Clone, serde::Serialize, serde::Deserialize)] #[derive(Clone, serde::Serialize, serde::Deserialize)]
struct PaneConfig { struct PaneConfig {
/// Human-readable label: becomes the channel name "tmux.<label>", /// Human-readable label, becomes the channel name "tmux.<label>"
/// and the tmux pane title / window name the live pane id is
/// resolved from. The pane id is deliberately not stored — it is
/// ephemeral (recycled across pane and tmux-server restarts), so it
/// is looked up fresh on every connect attempt.
label: String, label: String,
/// Tmux pane ID, e.g. "%5"
pane_id: String,
} }
#[derive(Clone, serde::Serialize, serde::Deserialize)] #[derive(Clone, serde::Serialize, serde::Deserialize)]
@ -88,9 +86,11 @@ impl State {
} }
} }
/// Whether a pane with this label is registered. /// Get pane_id for a label
fn has_pane(&self, label: &str) -> bool { fn get_pane(&self, label: &str) -> Option<&str> {
self.config.panes.iter().any(|p| p.label == label) self.config.panes.iter()
.find(|p| p.label == label)
.map(|p| p.pane_id.as_str())
} }
/// Check if a pane is connected /// Check if a pane is connected
@ -103,124 +103,98 @@ impl State {
self.connected.insert(label.to_string(), connected); self.connected.insert(label.to_string(), connected);
} }
/// Register a pane and persist. /// Add a pane and persist
fn add_pane(&mut self, label: String) { fn add_pane(&mut self, label: String, pane_id: String) {
if !self.config.panes.iter().any(|p| p.label == label) { if !self.config.panes.iter().any(|p| p.label == label) {
self.config.panes.push(PaneConfig { label }); self.config.panes.push(PaneConfig { label, pane_id });
save_config(&self.config); save_config(&self.config);
} }
} }
/// Unregister a pane and persist. Returns whether it was registered. /// Remove a pane and persist
fn remove_pane(&mut self, label: &str) -> bool { fn remove_pane(&mut self, label: &str) -> Option<String> {
if let Some(idx) = self.config.panes.iter().position(|p| p.label == label) { if let Some(idx) = self.config.panes.iter().position(|p| p.label == label) {
self.config.panes.remove(idx); let pane = self.config.panes.remove(idx);
self.connected.remove(label); self.connected.remove(label);
save_config(&self.config); save_config(&self.config);
true Some(pane.pane_id)
} else { } else {
false None
} }
} }
} }
// ── Pipe-Pane Reader ────────────────────────────────────────── // ── Pipe-Pane Reader ──────────────────────────────────────────
/// Wait between connect attempts for a pane that is not yet reachable. /// Set up pipe-pane for a single pane, reading output into the channel log.
const RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_secs(2); async fn pipe_pane_reader(state: SharedState, pane: PaneConfig) {
/// Keep a pane streamed into its channel log for as long as it stays
/// registered. The pane id is resolved fresh by label on every connect
/// attempt — tmux pane ids are ephemeral, so the label (pane title /
/// window name) is the durable identity. Retries until the pane exists
/// and pipe-pane succeeds, and reconnects the same way if the pipe
/// later drops. Returns once close() unregisters the pane.
async fn pipe_pane_reader(state: SharedState, label: String) {
let pipe_dir = dirs::home_dir() let pipe_dir = dirs::home_dir()
.unwrap_or_default() .unwrap_or_default()
.join(".consciousness/channels/tmux-pipes"); .join(".consciousness/channels/tmux-pipes");
std::fs::create_dir_all(&pipe_dir).ok(); std::fs::create_dir_all(&pipe_dir).ok();
let pipe_path = pipe_dir.join(format!("{}.pipe", label));
let channel_key = format!("tmux.{}", label);
loop { let pipe_path = pipe_dir.join(format!("{}.pipe", pane.label));
if !state.borrow().has_pane(&label) { let _ = std::fs::remove_file(&pipe_path);
return;
}
connect_and_stream(&state, &label, &pipe_path, &channel_key).await; // Create a named pipe (FIFO)
state.borrow_mut().set_connected(&label, false);
if !state.borrow().has_pane(&label) {
return;
}
tokio::time::sleep(RETRY_INTERVAL).await;
}
}
/// One connect attempt: resolve the pane's live id by label, point its
/// output at the FIFO with pipe-pane, and stream lines into the channel
/// log. Returns on the first failure, or when the stream ends.
async fn connect_and_stream(
state: &SharedState,
label: &str,
pipe_path: &std::path::Path,
channel_key: &str,
) {
let pane_id = match find_pane_by_name(label) {
Some(id) => id,
None => return,
};
// Fresh FIFO for this attempt.
let _ = std::fs::remove_file(pipe_path);
unsafe { unsafe {
let c_path = std::ffi::CString::new(pipe_path.to_str().unwrap()).unwrap(); let c_path = std::ffi::CString::new(pipe_path.to_str().unwrap()).unwrap();
libc::mkfifo(c_path.as_ptr(), 0o644); libc::mkfifo(c_path.as_ptr(), 0o644);
} }
// Point the pane's output at our FIFO. // Tell tmux to pipe this pane's output to our FIFO
let pipe_cmd = format!("cat >> {}", pipe_path.to_string_lossy()); let pipe_path_str = pipe_path.to_string_lossy().to_string();
match std::process::Command::new("tmux") let result = std::process::Command::new("tmux")
.args(["pipe-pane", "-t", &pane_id, &pipe_cmd]) .args(["pipe-pane", "-t", &pane.pane_id, &format!("cat >> {}", pipe_path_str)])
.output() .output();
{
Ok(o) if o.status.success() => {} match result {
Ok(o) => { Ok(output) if output.status.success() => {
warn!("pipe-pane failed for {} ({}): {}", label, pane_id, info!("pipe-pane set up for {} ({})", pane.label, pane.pane_id);
String::from_utf8_lossy(&o.stderr)); }
Ok(output) => {
error!("pipe-pane failed for {}: {}", pane.label,
String::from_utf8_lossy(&output.stderr));
state.borrow_mut().set_connected(&pane.label, false);
return; return;
} }
Err(e) => { Err(e) => {
error!("running tmux pipe-pane for {}: {}", label, e); error!("failed to run tmux pipe-pane for {}: {}", pane.label, e);
state.borrow_mut().set_connected(&pane.label, false);
return; return;
} }
} }
let file = match tokio::fs::File::open(pipe_path).await { // Open the FIFO and read lines
let file = match tokio::fs::File::open(&pipe_path).await {
Ok(f) => f, Ok(f) => f,
Err(e) => { Err(e) => {
warn!("opening pipe for {}: {}", label, e); error!("failed to open pipe for {}: {}", pane.label, e);
state.borrow_mut().set_connected(&pane.label, false);
return; return;
} }
}; };
info!("connected channel tmux.{} (pane {})", label, pane_id); // Mark as connected once pipe is open
state.borrow_mut().set_connected(label, true); state.borrow_mut().set_connected(&pane.label, true);
let reader = tokio::io::BufReader::new(file);
let mut lines = reader.lines();
let channel_key = format!("tmux.{}", pane.label);
let mut lines = tokio::io::BufReader::new(file).lines();
while let Ok(Some(line)) = lines.next_line().await { while let Ok(Some(line)) = lines.next_line().await {
if line.trim().is_empty() { if line.trim().is_empty() {
continue; continue;
} }
let mut s = state.borrow_mut(); let mut s = state.borrow_mut();
s.channel_logs let log = s.channel_logs
.entry(channel_key.to_string()) .entry(channel_key.clone())
.or_insert_with(ChannelLog::new) .or_insert_with(ChannelLog::new);
.push(line); log.push(line);
} }
warn!("pipe-pane stream ended for {}", label); warn!("pipe-pane reader ended for {}", pane.label);
state.borrow_mut().set_connected(&pane.label, false);
} }
// ── ChannelServer Implementation ─────────────────────────────── // ── ChannelServer Implementation ───────────────────────────────
@ -270,10 +244,10 @@ impl channel_server::Server for ChannelServerImpl {
let channel = pry!(pry!(params.get_channel()).to_str()).to_string(); let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
let message = pry!(pry!(params.get_message()).to_str()).to_string(); let message = pry!(pry!(params.get_message()).to_str()).to_string();
// Send to tmux pane via send-keys — resolve the live pane id by // Send to tmux pane via send-keys
// label (it is not stored).
let label = channel.strip_prefix("tmux.").unwrap_or(&channel); let label = channel.strip_prefix("tmux.").unwrap_or(&channel);
if let Some(pane_id) = find_pane_by_name(label) { let pane_id = self.state.borrow().get_pane(label).map(String::from);
if let Some(pane_id) = pane_id {
let _ = std::process::Command::new("tmux") let _ = std::process::Command::new("tmux")
.args(["send-keys", "-t", &pane_id, &message, "Enter"]) .args(["send-keys", "-t", &pane_id, &message, "Enter"])
.output(); .output();
@ -328,22 +302,28 @@ impl channel_server::Server for ChannelServerImpl {
let params = pry!(params.get()); let params = pry!(params.get());
let label = pry!(pry!(params.get_label()).to_str()).to_string(); let label = pry!(pry!(params.get_label()).to_str()).to_string();
// Already registered — nothing to do. // Check if already open
if self.state.borrow().has_pane(&label) { if self.state.borrow().get_pane(&label).is_some() {
return std::future::ready(Ok(())); return std::future::ready(Ok(()));
} }
info!("opening channel tmux.{}", label); // Find the tmux pane by name (window or pane title)
let pane_id = match find_pane_by_name(&label) {
Some(id) => id,
None => return std::future::ready(Err(capnp::Error::failed(
format!("no tmux pane named '{}'", label)))),
};
// Register the label and persist. The pane id is not stored — info!("opening channel tmux.{} (pane {})", label, pane_id);
// the reader resolves it by label on every connect attempt, so
// this succeeds even if the pane does not exist yet; the reader
// connects once it appears.
self.state.borrow_mut().add_pane(label.clone());
// Register in state and persist
self.state.borrow_mut().add_pane(label.clone(), pane_id.clone());
// Start pipe-pane reader
let pane = PaneConfig { label, pane_id };
let reader_state = self.state.clone(); let reader_state = self.state.clone();
tokio::task::spawn_local(async move { tokio::task::spawn_local(async move {
pipe_pane_reader(reader_state, label).await; pipe_pane_reader(reader_state, pane).await;
}); });
std::future::ready(Ok(())) std::future::ready(Ok(()))
@ -359,18 +339,14 @@ impl channel_server::Server for ChannelServerImpl {
let label = channel.strip_prefix("tmux.").unwrap_or(&channel).to_string(); let label = channel.strip_prefix("tmux.").unwrap_or(&channel).to_string();
let mut s = self.state.borrow_mut(); let mut s = self.state.borrow_mut();
if s.remove_pane(&label) { if let Some(pane_id) = s.remove_pane(&label) {
info!("closing channel tmux.{}", label); info!("closing channel tmux.{}", label);
s.channel_logs.remove(&format!("tmux.{}", label)); s.channel_logs.remove(&format!("tmux.{}", label));
// Stop piping if the pane is still around (if it is gone the // Disconnect pipe-pane
// pipe is already dead). The reader then sees the pane let _ = std::process::Command::new("tmux")
// unregistered and exits. .args(["pipe-pane", "-t", &pane_id])
if let Some(pane_id) = find_pane_by_name(&label) { .output();
let _ = std::process::Command::new("tmux")
.args(["pipe-pane", "-t", &pane_id])
.output();
}
} }
std::future::ready(Ok(())) std::future::ready(Ok(()))
@ -421,13 +397,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
tokio::task::LocalSet::new() tokio::task::LocalSet::new()
.run_until(async move { .run_until(async move {
// Start a pipe-pane reader for each configured pane; each // Start a pipe-pane reader for each configured pane
// resolves its live pane id by label and retries until
// connected.
for pane in state.borrow().config.panes.clone() { for pane in state.borrow().config.panes.clone() {
let reader_state = state.clone(); let reader_state = state.clone();
tokio::task::spawn_local(async move { tokio::task::spawn_local(async move {
pipe_pane_reader(reader_state, pane.label).await; pipe_pane_reader(reader_state, pane).await;
}); });
} }

27
flake.lock generated
View file

@ -1,27 +0,0 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1781074563,
"narHash": "sha256-md8WlXOlfnIeHeOScMTTHFyf2d6iaTwPl2apR5EQ3P4=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "9ae611a455b90cf061d8f332b977e387bda8e1ca",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

View file

@ -1,42 +0,0 @@
{
description = "Development shell for consciousness";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
};
outputs = { nixpkgs, ... }:
let
systems = [
"x86_64-linux"
"aarch64-linux"
];
forAllSystems = nixpkgs.lib.genAttrs systems;
in
{
devShells = forAllSystems (system:
let
pkgs = import nixpkgs { inherit system; };
in
{
default = pkgs.mkShell {
packages = with pkgs; [
cargo
rustc
rustfmt
clippy
rust-analyzer
capnproto
pkg-config
jq
sqlite
python3
];
RUST_BACKTRACE = "1";
};
});
};
}

View file

@ -1,276 +0,0 @@
// salience.proto stateful generation + per-token concept readout over gRPC.
//
// Shape:
// - One server-streaming RPC (Generate) for inference. Every other
// operation is unary. This is the minimum streaming we need
// tokens arrive one at a time with optional readouts / logprobs
// and keeping everything else unary makes the client dramatically
// simpler than a single bidi state machine did.
//
// - Server-side sessions hold the token list and image binaries.
// Sessions exist for bandwidth: at 200K tokens we'd otherwise
// re-ship ~800KB every turn, which hurts badly over a WAN link.
// vLLM's prefix cache holds the KV; the session just gives the
// client a handle so it can send deltas.
//
// - The client is the source of truth for prompt content. The server
// is the source of truth for image token expansion (how many
// IMAGE_PAD tokens an image becomes under this model). The client
// never writes vision tokens itself AppendImage appends the whole
// <|vision_start|> + IMAGE_PAD×N + <|vision_end|> block server-side.
//
// - Every mutation carries (offset, truncating): the client's view of
// the server's current length, plus whether the client is deliberately
// rewriting history. Server validates on each call and rejects drift.
// No silent divergence, no migration bugs.
//
// - Errors use gRPC status codes. NOT_FOUND for missing sessions,
// FAILED_PRECONDITION for offset drift or image-block splits,
// RESOURCE_EXHAUSTED for context overflow, ABORTED for "session busy".
//
// Not in v1:
// - Authentication beyond a shared bearer token in gRPC metadata.
// - Multi-tenant session namespacing.
// - Sampling traces beyond top-k logprobs.
syntax = "proto3";
package salience.v1;
// ============================================================
// Service
// ============================================================
service Salience {
// Create a fresh session. Client uses session_id on every subsequent
// RPC until CloseSession or TTL eviction (default 30 min idle). To
// refresh TTL across a long pause, issue a no-op Generate (empty
// append_tokens, max_tokens=0, no ranges).
rpc OpenSession(OpenSessionRequest) returns (OpenSessionResponse);
// Release the session's tokens + images. Idempotent.
rpc CloseSession(CloseSessionRequest) returns (CloseSessionResponse);
// Branch a session at a given token position. The new session
// inherits tokens [0, at_position) and any images whose vision
// block lies fully in that range. Rejected with FAILED_PRECONDITION
// if at_position falls inside an image block (client picks a clean
// boundary).
rpc ForkSession(ForkSessionRequest) returns (ForkSessionResponse);
// Prefill + optionally decode. Images are attached inline via
// `GenerateRequest.images`; the client writes its own pre-expanded
// <|vision_start|> + N*<|image_pad|> + <|vision_end|> runs into
// `append_tokens` and declares each run's range in `images[i]`.
// Server validates run length against the actual vision-encoder
// feature count and returns INVALID_ARGUMENT on mismatch. Stream
// yields Token events (with optional readouts / logprobs per
// position) followed by a terminating Done.
rpc Generate(GenerateRequest) returns (stream GenerateEvent);
// Readout manifest for the currently-loaded model concept names,
// layer indices, tensor dtype. Stateless; fetch once at client
// startup and cache.
rpc GetReadoutManifest(GetReadoutManifestRequest) returns (ReadoutManifest);
// Dump the full token stream of a session. Debug-only: used by the
// client to verify its local accounting against the server's
// session.tokens byte-for-byte when divergence is suspected. Not
// cheap copies the whole sequence across the wire.
rpc DumpSession(DumpSessionRequest) returns (DumpSessionResponse);
}
// ============================================================
// Lifecycle
// ============================================================
message OpenSessionRequest {
// Model identifier, must match vLLM's served model. The server
// only has one model loaded; this is a safety check on what the
// client thinks it's talking to.
string model = 1;
}
message OpenSessionResponse {
string session_id = 1;
uint32 max_model_len = 2;
}
message CloseSessionRequest {
string session_id = 1;
}
message CloseSessionResponse {}
message ForkSessionRequest {
string session_id = 1; // source session
uint32 at_position = 2; // new session inherits tokens [0, at_position)
}
message ForkSessionResponse {
string session_id = 1; // new session
}
// ============================================================
// Inference
// ============================================================
// One image attached to a Generate call. The client is responsible
// for writing the expanded placeholder run (VISION_START +
// N*IMAGE_PAD + VISION_END) into `GenerateRequest.append_tokens` at
// positions [pad_range_start, pad_range_end) and pairing it with
// the corresponding `ImageAttachment` entry. Server validates that
// the declared range's pad count matches what the vision encoder
// produces, and returns INVALID_ARGUMENT if they disagree.
message ImageAttachment {
// Image bytes (PNG / JPEG / WebP / ).
bytes bytes = 1;
// MIME type, e.g. "image/png".
string mime = 2;
// Absolute token positions (in `session.tokens` AFTER `append_tokens`
// is applied) spanning the full vision block `[vision_start,
// pad*N, vision_end]`. end is exclusive, so end - start == N + 2.
uint32 pad_range_start = 3;
uint32 pad_range_end = 4;
}
message GenerateRequest {
string session_id = 1;
// Tokens to append before prefill. May be empty. Client writes the
// full vision block (VISION_START + N*IMAGE_PAD + VISION_END) for
// any newly-attached image directly into this stream; each such
// block must be paired with a matching entry in `images`. The
// server validates that the declared ranges all point at IMAGE_PAD
// runs and that each run's length matches what the vision encoder
// produces for the corresponding image.
repeated uint32 append_tokens = 2;
// Client's view of session.tokens length at the time of the call.
// Must equal server's actual length, OR be strictly less when
// truncating=true (server rewinds before appending). Any other
// mismatch is FAILED_PRECONDITION.
uint32 offset = 3;
bool truncating = 4;
// Decode budget. 0 = prefill only (no decode, emit Token events
// for positions covered by logprobs_ranges / readout_ranges, then
// Done; replaces the old /score endpoint). >0 = decode up to this
// many tokens, stopping early on EOS / stop_token_ids.
uint32 max_tokens = 5;
// Position ranges (absolute, within the session's post-append
// token list) at which to emit logprobs on Token events. Empty =
// no logprobs. `logprob_top_k > 0` returns the top-k alternative
// tokens at each covered position; `logprob_top_k == 0` returns
// only the sampled-token's logprob.
repeated PositionRange logprobs_ranges = 6;
uint32 logprob_top_k = 7;
// Position ranges at which to emit concept-readout vectors. Empty
// = no readouts. Logical shape per position is
// [n_layers][n_concepts] see GetReadoutManifest.
repeated PositionRange readout_ranges = 8;
// Sampling parameters. Meaningful only when max_tokens > 0.
float temperature = 9; // default 1.0 when zero
float top_p = 10; // default 1.0 when zero
uint32 top_k = 11; // default 0 (disabled)
repeated uint32 stop_token_ids = 12;
// vLLM scheduler priority (0 = interactive, 10 = batch).
int32 priority = 13;
// Images newly attached on this call. Each entry describes one
// image's binary bytes, its mime type, and the exact token-position
// range of its pre-expanded placeholder run inside `session.tokens`
// after `append_tokens` is applied. See `ImageAttachment`.
repeated ImageAttachment images = 14;
}
message PositionRange {
uint32 start = 1; // inclusive
uint32 end = 2; // exclusive
}
message GenerateEvent {
oneof event {
Token token = 1;
GenerateDone done = 2;
}
}
message Token {
// Token id at this position. For prefill this is the prompt token;
// for decode it's the sampled token.
uint32 id = 1;
// Absolute position in the session's token list.
uint32 position = 2;
// True for prefill positions, false for decode.
bool is_prefill = 3;
// Concept readout at this position. Empty if the position wasn't
// covered by readout_ranges.
repeated float readout = 4 [packed = true];
// Top-k alternative tokens' logprobs at this position populated
// when the position is covered by logprobs_ranges and
// logprob_top_k > 0.
repeated TokenLogprob logprobs = 5;
// Logprob of the token at `position` (the prompt token for
// prefill, the sampled token for decode). Populated when the
// position is covered by logprobs_ranges.
float sampled_logprob = 6;
bool has_sampled_logprob = 7;
}
message TokenLogprob {
uint32 id = 1;
float logprob = 2;
}
message GenerateDone {
uint32 prompt_tokens = 1;
uint32 completion_tokens = 2;
uint32 total_tokens = 3;
enum FinishReason {
FINISH_REASON_UNSPECIFIED = 0;
FINISH_REASON_EOS = 1; // emitted EOS / stop token
FINISH_REASON_LENGTH = 2; // hit max_tokens
FINISH_REASON_CANCELLED = 3; // client cancelled
FINISH_REASON_STOP_STRING = 4; // matched a stop string
}
FinishReason finish_reason = 4;
}
// ============================================================
// Readout manifest
// ============================================================
message GetReadoutManifestRequest {}
message ReadoutManifest {
repeated string concepts = 1;
repeated uint32 layers = 2;
uint32 hidden_size = 3;
string dtype = 4;
}
// ============================================================
// Debug
// ============================================================
message DumpSessionRequest {
string session_id = 1;
}
message DumpSessionResponse {
// The full session.tokens sequence, verbatim.
repeated uint32 tokens = 1 [packed = true];
}

View file

@ -1,327 +0,0 @@
"""Quantize Qwen3.6-27B (multimodal) to FP8 for vLLM serving.
Why this exists
---------------
The earlier `quantize_qwen3_6.py` (in shell history, never committed)
loaded the model with `AutoModelForCausalLM`, which silently strips
the multimodal arch. Result: an FP8 checkpoint with no vision tower
weights at all. vLLM happily instantiated the vision tower from the
config and ran it with default/uninitialized weights, producing
gibberish image features and `!!!!!!`-style output. We chased that
through the protocol layer for a long time before tracing it back
to the quant. This script avoids that trap by loading via the
config-declared class explicitly.
Recipe
------
FP8_DYNAMIC (per-channel weight scales, per-token dynamic activation
scales, both E4M3) for Linear weights, with an `ignore` list derived
from Unsloth's UD-Q8_K_XL (`unsloth/Qwen3.6-27B-GGUF`). Their
sensitivity sweep flagged specific layers as quantization-fragile;
we honor those layer indices even though their algorithm is
GGUF-native Q8_K and ours is FP8 sensitivity is a layer property,
not an algorithm property.
vLLM fusion constraint
~~~~~~~~~~~~~~~~~~~~~~
vLLM's Qwen3.5/3.6 model code fuses sub-modules at load time:
qkv_proj q_proj, k_proj, v_proj
gate_up_proj gate_proj, up_proj
in_proj_qkvz in_proj_qkv, in_proj_z
in_proj_ba in_proj_b, in_proj_a
compressed_tensors rejects checkpoints where sub-modules of a fused
layer have different quantization schemes. Our ignore list is shaped
around this within any fused layer, all components share a scheme.
That's the reason `in_proj_qkv` is ignored even though Unsloth's
sweep doesn't single it out, and the reason late-stack attn override
covers q/k/v rather than just q/k.
MTP merge
---------
`Qwen3_5ForConditionalGeneration` doesn't expose the MTP submodule,
so `oneshot()` produces a checkpoint with the 15 `mtp.*` tensors
silently dropped. After quantization we read the MTP weights back
out of the upstream cached snapshot and splice them into the saved
safetensors at BF16. They're small (~850 MB) so quantizing them
isn't worth the calibration risk; speculative-decoding code paths
in vLLM expect the MTP head present.
Output
------
`OUTPUT_DIR` gets the FP8 model.safetensors + config + processor +
recipe.yaml. Vision tower stays BF16 (in `ignore`); LM Linears go
to FP8; norms, SSM internals (not Linear), and MTP tensors stay
BF16 untouched.
Verification at end: re-opens the saved safetensors and asserts
- vision .weight tensors present (>= 150; full count is 167)
- lm_head + embed_tokens at fp16/bf16 (NOT FP8)
- a sampled FP8'd Linear actually has float8 dtype
- 15 mtp.* tensors present
Run
---
~/vllm-venv/bin/python quantize_qwen3_6_mm.py
"""
from __future__ import annotations
import glob
import json
import sys
from pathlib import Path
import torch
from huggingface_hub import snapshot_download
from llmcompressor import oneshot
from llmcompressor.modifiers.quantization import QuantizationModifier
from safetensors import safe_open
from safetensors.torch import save_file
from transformers import AutoProcessor
from transformers.models.qwen3_5.modeling_qwen3_5 import (
Qwen3_5ForConditionalGeneration,
)
MODEL = "Qwen/Qwen3.6-27B"
OUTPUT_DIR = "/home/ubuntu/amygdala-training/Qwen3.6-27B-FP8-mm"
# Layers Unsloth's UD-Q8_K_XL keeps at F16 (perplexity-sensitive
# in their sweep). Late-stack clustering is consistent with the
# general finding that errors near the output propagate directly
# to logits.
LATE_FFN_LAYERS = (50, 51, 59, 62, 63)
LATE_ATTN_LAYERS = (51, 59, 63)
# Build the ignore regex list. Note: llmcompressor matches these
# patterns against MODULE names (no `.weight` suffix) when walking
# `named_modules()` for `targets=["Linear"]`. The first pass of
# this script used `\.weight$` patterns and silently quantized
# lm_head + every linear_attn projection — verified post-hoc by
# inspecting the saved safetensors. Patterns now anchor on `$`
# at the module name.
IGNORE_PATTERNS: list[str] = [
# Original recipe: lm_head and embeddings always full-precision.
# (embed_tokens is an Embedding, not a Linear, so it's already
# ignored by `targets=["Linear"]`. Pattern kept as belt-and-
# suspenders in case future llmcompressor versions widen the
# target set.)
"re:lm_head$",
"re:.*embed_tokens$",
# Vision tower — entire `model.visual.*` subtree (vision
# transformer blocks + merger + patch_embed + pos_embed).
# Unsloth ships the vision tower as a separate `mmproj-BF16.gguf`
# for GGUF consumers; in our single-file FP8 setup we just leave
# them at BF16.
"re:model\\.visual\\..*",
# MTP (multi-token prediction) module — Unsloth's GGUF doesn't
# carry MTP weights so we have no precision signal from them;
# safest to keep BF16.
"re:mtp\\..*",
# Linear-attention block — keep ENTIRELY at BF16. vLLM fuses
# `in_proj_qkv` and `in_proj_z` into a single `in_proj_qkvz`
# layer, and compressed_tensors rejects mixed schemes within a
# fused layer. Unsloth's recipe keeps z, a, b, out at F16/F32
# (gate/SSM internals are quantization-fragile in the GatedDeltaNet
# update), so the principled choice is to also keep `in_proj_qkv`
# at BF16 rather than FP8'ing the gate to match. We give up ~1 GB
# of FP8 coverage; in exchange we follow Unsloth's quality intent
# and load cleanly under vLLM. (`in_proj_a` + `in_proj_b` are
# likewise fused as `in_proj_ba` — both ignored, consistent.)
"re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_qkv$",
"re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_z$",
"re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_a$",
"re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_b$",
"re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.out_proj$",
# Per-layer high-precision MLP (Unsloth flagged exactly these
# late-stack indices in their UD-Q8_K_XL sensitivity sweep, all
# three of {gate, up, down} per layer). vLLM fuses gate+up into
# `gate_up_proj`; ignoring both keeps the fused layer consistent.
# `down_proj` is its own (non-fused) layer.
"re:model\\.language_model\\.layers\\.("
+ "|".join(str(n) for n in LATE_FFN_LAYERS)
+ ")\\.mlp\\.(down|gate|up)_proj$",
# Per-layer high-precision attention q/k/v (Unsloth's sweep upgrades
# only q and k; we extend to v because vLLM fuses q/k/v into
# `qkv_proj` and rejects mixed schemes. `o_proj` is its own
# non-fused layer and stays at FP8.
"re:model\\.language_model\\.layers\\.("
+ "|".join(str(n) for n in LATE_ATTN_LAYERS)
+ ")\\.self_attn\\.(q|k|v)_proj$",
]
def main() -> None:
print(f"Loading {MODEL} as multimodal "
f"(Qwen3_5ForConditionalGeneration)...", flush=True)
model = Qwen3_5ForConditionalGeneration.from_pretrained(
MODEL,
dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True,
)
print(f" loaded: {model.__class__.__name__}", flush=True)
print(f"Loading processor (text + image preprocessing)...", flush=True)
processor = AutoProcessor.from_pretrained(MODEL, trust_remote_code=True)
print("Running FP8_DYNAMIC oneshot quantization...", flush=True)
print(f" ignore list: {len(IGNORE_PATTERNS)} patterns",
flush=True)
recipe = QuantizationModifier(
targets=["Linear"],
scheme="FP8_DYNAMIC",
ignore=IGNORE_PATTERNS,
)
oneshot(model=model, recipe=recipe, output_dir=OUTPUT_DIR)
processor.save_pretrained(OUTPUT_DIR)
print(f" wrote model + processor to {OUTPUT_DIR}", flush=True)
merge_mtp(OUTPUT_DIR)
verify_output(OUTPUT_DIR)
def merge_mtp(out_dir: str) -> None:
"""Splice upstream MTP tensors into the saved FP8 safetensors.
`Qwen3_5ForConditionalGeneration` skips the MTP submodule on load,
so oneshot's output is missing the 15 `mtp.*` tensors. We resolve
the upstream snapshot via the HF cache (already populated by
from_pretrained), pull just the MTP tensors out at BF16, and
rewrite the safetensors with them merged in. The compressed_tensors
metadata header (which carries the FP8 format identifier vLLM
needs to dequantize) is preserved verbatim.
Atomic-rename is used so a crash mid-write doesn't corrupt the
33+ GB checkpoint we just spent minutes producing.
"""
print("\nMerging upstream MTP tensors...", flush=True)
upstream_dir = Path(snapshot_download(
MODEL,
allow_patterns=["model.safetensors.index.json",
"model-*-of-*.safetensors"],
))
with open(upstream_dir / "model.safetensors.index.json") as f:
idx = json.load(f)
mtp_shards = sorted({v for k, v in idx["weight_map"].items()
if k.startswith("mtp.")})
print(f" MTP tensors live in shards: {mtp_shards}", flush=True)
mtp_tensors: dict[str, torch.Tensor] = {}
for shard in mtp_shards:
with safe_open(upstream_dir / shard, framework="pt") as f:
for k in f.keys():
if k.startswith("mtp."):
mtp_tensors[k] = f.get_tensor(k).contiguous()
mtp_bytes = sum(t.numel() * t.element_size()
for t in mtp_tensors.values())
print(f" loaded {len(mtp_tensors)} mtp tensors "
f"({mtp_bytes/1e6:.1f} MB)", flush=True)
fp8_files = sorted(Path(out_dir).glob("*.safetensors"))
if len(fp8_files) != 1:
sys.exit(f"FAIL: expected single safetensors shard, "
f"got {fp8_files}")
existing_path = fp8_files[0]
with safe_open(existing_path, framework="pt") as f:
metadata = f.metadata() or {}
all_tensors = {k: f.get_tensor(k) for k in f.keys()}
overlap = set(all_tensors) & set(mtp_tensors)
if overlap:
sys.exit(f"FAIL: MTP key collision with FP8 output: "
f"{sorted(overlap)[:5]}")
all_tensors.update(mtp_tensors)
tmp_path = existing_path.with_name(existing_path.name + ".new")
print(f" rewriting {existing_path.name} "
f"({len(all_tensors)} tensors)...", flush=True)
save_file(all_tensors, str(tmp_path), metadata=metadata)
tmp_path.replace(existing_path)
print(" done", flush=True)
def verify_output(out_dir: str) -> None:
"""Open the saved safetensors and assert the recipe actually
landed: vision tower present at BF16, FP8 dtype on at least one
quantized Linear, lm_head not FP8."""
print(f"\nVerifying {out_dir}...", flush=True)
files = sorted(glob.glob(f"{out_dir}/*.safetensors"))
if not files:
sys.exit(f"FAIL: no safetensors in {out_dir}")
vision_keys: list[tuple[str, str]] = []
fp8_sample: tuple[str, str] | None = None
lm_head_dtype: str | None = None
mtp_keys: list[str] = []
for fp in files:
with safe_open(fp, framework="pt") as f:
for k in f.keys():
if k.startswith("mtp."):
mtp_keys.append(k)
# Some FP8 quants write a sibling `_scale` / `_zero_point`;
# we just care about the .weight tensors.
if not k.endswith(".weight"):
continue
t = f.get_tensor(k)
dtype = str(t.dtype).replace("torch.", "")
if "model.visual." in k:
vision_keys.append((k, dtype))
if k == "lm_head.weight":
lm_head_dtype = dtype
if (fp8_sample is None
and "float8" in dtype
and "language_model.layers" in k):
fp8_sample = (k, dtype)
# Qwen3.6-27B has 167 vision `.weight` tensors (333 vision tensors
# total, the rest are `.bias` and per-block norms). 150 is a
# sanity floor that catches "vision tower didn't make it through"
# without being brittle to minor arch revisions.
if len(vision_keys) < 150:
sys.exit(f"FAIL: only {len(vision_keys)} vision tensors found "
f"(expected >= 150). Vision tower didn't make it "
f"through the quant.")
bad_vision = [(k, d) for k, d in vision_keys if "float8" in d]
if bad_vision:
sys.exit(f"FAIL: vision weights got quantized to FP8: "
f"{bad_vision[:3]}...")
if lm_head_dtype is None:
sys.exit("FAIL: lm_head.weight not found in output.")
if "float8" in lm_head_dtype:
sys.exit(f"FAIL: lm_head.weight is FP8 ({lm_head_dtype}); "
f"should be BF16/FP16.")
if fp8_sample is None:
sys.exit("FAIL: no FP8 weights found in language_model.layers — "
"the recipe didn't quantize anything.")
# Upstream Qwen3.6-27B has exactly 15 mtp.* tensors (1 fused
# transformer block + projection + norms). merge_mtp() should
# have spliced all of them in.
if len(mtp_keys) != 15:
sys.exit(f"FAIL: expected 15 mtp.* tensors, found "
f"{len(mtp_keys)}. merge_mtp() missed some.")
print(f"{len(vision_keys)} vision tensors at "
f"{vision_keys[0][1]} (not FP8)")
print(f" ✓ lm_head.weight at {lm_head_dtype} (not FP8)")
print(f" ✓ FP8 sample: {fp8_sample[0]} = {fp8_sample[1]}")
print(f"{len(mtp_keys)} mtp.* tensors present")
print("DONE")
if __name__ == "__main__":
main()

View file

@ -100,7 +100,7 @@ impl HttpClient {
.map_err(|e| anyhow::anyhow!("invalid server name: {e}"))?; .map_err(|e| anyhow::anyhow!("invalid server name: {e}"))?;
let connector = tokio_rustls::TlsConnector::from(self.tls.clone()); let connector = tokio_rustls::TlsConnector::from(self.tls.clone());
let tls = connector.connect(server_name.to_owned(), tcp).await let tls = connector.connect(server_name.to_owned(), tcp).await
.map_err(|e| anyhow::anyhow!("TLS handshake to {host}: {e}"))?; .context("TLS handshake")?;
TokioIo::new(Box::new(tls) as Box<dyn IoStream>) TokioIo::new(Box::new(tls) as Box<dyn IoStream>)
} else { } else {
TokioIo::new(Box::new(tcp) as Box<dyn IoStream>) TokioIo::new(Box::new(tcp) as Box<dyn IoStream>)
@ -154,14 +154,6 @@ impl HttpResponse {
Ok(String::from_utf8_lossy(&bytes).into_owned()) Ok(String::from_utf8_lossy(&bytes).into_owned())
} }
/// Read the entire body as raw bytes (for binary downloads).
pub async fn bytes(self) -> Result<Bytes> {
let bytes = self.body.collect().await
.context("reading response body")?
.to_bytes();
Ok(bytes)
}
/// Read the entire body and deserialize as JSON. /// Read the entire body and deserialize as JSON.
pub async fn json<T: serde::de::DeserializeOwned>(self) -> Result<T> { pub async fn json<T: serde::de::DeserializeOwned>(self) -> Result<T> {
let bytes = self.body.collect().await let bytes = self.body.collect().await
@ -198,7 +190,6 @@ impl HttpClientBuilder {
} }
pub fn build(self) -> HttpClient { pub fn build(self) -> HttpClient {
install_rustls_crypto_provider();
let certs = rustls_native_certs::load_native_certs() let certs = rustls_native_certs::load_native_certs()
.certs.into_iter() .certs.into_iter()
.collect::<Vec<_>>(); .collect::<Vec<_>>();
@ -206,13 +197,6 @@ impl HttpClientBuilder {
for cert in certs { for cert in certs {
root_store.add(cert).ok(); root_store.add(cert).ok();
} }
// Also trust any `.pem` files under `~/.consciousness/certs/` —
// self-signed server certs for our own vllm hosts live there.
// Drop a new `<host>.pem` in the dir to trust a new server; no
// code change needed.
for cert in load_user_certs() {
root_store.add(cert).ok();
}
let tls = Arc::new( let tls = Arc::new(
ClientConfig::builder() ClientConfig::builder()
.with_root_certificates(root_store) .with_root_certificates(root_store)
@ -226,65 +210,6 @@ impl HttpClientBuilder {
} }
} }
/// Install rustls' default crypto provider exactly once per process.
/// rustls 0.23 doesn't pick one automatically when multiple features
/// could provide it (e.g. when tonic pulls in both ring and aws-lc-rs
/// via transitive deps). Idempotent via OnceLock; safe to call from
/// multiple callers.
fn install_rustls_crypto_provider() {
static ONCE: std::sync::OnceLock<()> = std::sync::OnceLock::new();
ONCE.get_or_init(|| {
let _ = rustls::crypto::ring::default_provider().install_default();
});
}
/// Load every `.pem` file under `~/.consciousness/certs/` as a DER
/// certificate and return them. Silent on missing dir, missing files,
/// or parse errors — those are "no extra certs trusted" rather than
/// hard failures, to keep startup robust.
/// Load the concatenated PEM bytes of every `.pem` file under
/// `~/.consciousness/certs/` — suitable for passing to a tonic
/// `ClientTlsConfig::ca_certificate(Certificate::from_pem(...))` call
/// so gRPC connections trust the same self-signed servers the HTTP
/// path does.
pub(crate) fn load_user_certs_pem_bytes() -> Vec<u8> {
let mut out = Vec::new();
let Some(home) = dirs::home_dir() else { return out };
let dir = home.join(".consciousness").join("certs");
let Ok(entries) = std::fs::read_dir(&dir) else { return out };
for entry in entries.flatten() {
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) != Some("pem") {
continue;
}
if let Ok(bytes) = std::fs::read(&path) {
out.extend_from_slice(&bytes);
if !bytes.ends_with(b"\n") {
out.push(b'\n');
}
}
}
out
}
fn load_user_certs() -> Vec<rustls::pki_types::CertificateDer<'static>> {
let mut out = Vec::new();
let Some(home) = dirs::home_dir() else { return out };
let dir = home.join(".consciousness").join("certs");
let Ok(entries) = std::fs::read_dir(&dir) else { return out };
for entry in entries.flatten() {
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) != Some("pem") {
continue;
}
let Ok(bytes) = std::fs::read(&path) else { continue };
for cert in rustls_pemfile::certs(&mut bytes.as_slice()).flatten() {
out.push(cert);
}
}
out
}
/// Trait alias for streams that work with hyper's IO adapter. /// Trait alias for streams that work with hyper's IO adapter.
trait IoStream: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static {} trait IoStream: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static {}
impl<T: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static> IoStream for T {} impl<T: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static> IoStream for T {}

View file

@ -7,14 +7,13 @@
// Set POC_DEBUG=1 for verbose per-turn logging. // Set POC_DEBUG=1 for verbose per-turn logging.
pub mod http; pub mod http;
pub mod salience;
use std::time::Duration; use std::time::{Duration, Instant};
use anyhow::Result; use anyhow::Result;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use serde::Deserialize; use serde::Deserialize;
use http::HttpClient; use http::{HttpClient, HttpResponse};
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
pub struct Usage { pub struct Usage {
@ -38,21 +37,6 @@ pub struct ReadoutManifest {
/// from pairing with the manifest fetched at startup. /// from pairing with the manifest fetched at startup.
pub type TokenReadout = Vec<Vec<f32>>; pub type TokenReadout = Vec<Vec<f32>>;
/// Client-side sampling state. Mirrors the wire-level fields in
/// `GenerateRequest` (proto flattened its `SamplingParams` submessage
/// in so the server handler reads them directly), but stays as a
/// grouped struct on the client because UI / config / tests pass
/// these around together.
#[derive(Clone, Copy)]
pub struct SamplingParams {
pub temperature: f32,
pub top_p: f32,
pub top_k: u32,
/// Decode budget. 0 = prefill only; >0 = decode up to this many
/// tokens, stopping early on EOS / stop_token_ids.
pub max_tokens: u32,
}
/// A JoinHandle that aborts its task when dropped. /// A JoinHandle that aborts its task when dropped.
pub(crate) struct AbortOnDrop(tokio::task::JoinHandle<()>); pub(crate) struct AbortOnDrop(tokio::task::JoinHandle<()>);
@ -62,6 +46,13 @@ impl Drop for AbortOnDrop {
} }
} }
/// Sampling parameters for model generation.
#[derive(Clone, Copy)]
pub(crate) struct SamplingParams {
pub temperature: f32,
pub top_p: f32,
pub top_k: u32,
}
// ───────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────
// Stream events — yielded by backends, consumed by the runner // Stream events — yielded by backends, consumed by the runner
@ -83,17 +74,6 @@ pub struct ApiClient {
api_key: String, api_key: String,
pub model: String, pub model: String,
base_url: String, base_url: String,
/// Cached readout manifest — fetched once per process and shared
/// across ApiClient clones (every Agent/fork gets the same cell).
/// `None` after fetch means the server has readout disabled (404).
manifest: std::sync::Arc<tokio::sync::OnceCell<Option<ReadoutManifest>>>,
/// Shared tonic Channel to the salience gRPC endpoint. Opened on
/// first use and reused across every SessionHandle / RPC call
/// derived from this ApiClient. tonic multiplexes concurrent
/// requests over the HTTP/2 connection automatically.
salience_channel: std::sync::Arc<
tokio::sync::OnceCell<tonic::transport::Channel>
>,
} }
impl ApiClient { impl ApiClient {
@ -108,69 +88,33 @@ impl ApiClient {
api_key: api_key.to_string(), api_key: api_key.to_string(),
model: model.to_string(), model: model.to_string(),
base_url: base_url.trim_end_matches('/').to_string(), base_url: base_url.trim_end_matches('/').to_string(),
manifest: std::sync::Arc::new(tokio::sync::OnceCell::new()),
salience_channel: std::sync::Arc::new(tokio::sync::OnceCell::new()),
} }
} }
/// Return a `SalienceClient` on the shared gRPC channel — opens pub(crate) fn stream_completion_mm(
/// the channel on first call and reuses it thereafter across
/// every ApiClient clone. All scoring / inference / session
/// RPCs flow through this single multiplexed HTTP/2 connection.
///
/// Bumps tonic's default 4 MiB encode/decode caps to 64 MiB on
/// every client. Multimodal Generate requests carry pre-encoded
/// image bytes inline (Qwen3.6's 768×768 patches at high res
/// land around 58 MiB per turn), and Done events with full
/// per-token readout vectors can also exceed 4 MiB on long runs.
pub async fn salience_client(&self) -> Result<
salience::pb::salience_client::SalienceClient<tonic::transport::Channel>
> {
let ch = self.salience_channel.get_or_try_init(|| async {
let grpc_url = salience::derive_grpc_url(&self.base_url);
log::debug!(target: "grpc",
"opening shared salience channel: http_base={} -> grpc_url={}",
self.base_url, grpc_url);
salience::connect_channel(&grpc_url).await
}).await?;
const MAX_GRPC_MESSAGE_BYTES: usize = 64 * 1024 * 1024;
Ok(salience::pb::salience_client::SalienceClient::new(ch.clone())
.max_decoding_message_size(MAX_GRPC_MESSAGE_BYTES)
.max_encoding_message_size(MAX_GRPC_MESSAGE_BYTES))
}
/// Stream generation via a gRPC session. Walks the prompt chunks
/// comparing against the session's `committed_len`, sends the
/// delta as interleaved `AppendImage` + intermediate
/// `Generate(max_tokens=0)` (for text runs separating images) +
/// a final `Generate(max_tokens=sampling.max_tokens, ...)` whose
/// Token events stream back through the channel.
///
/// On any gRPC error the session is dropped; the next call
/// reopens fresh. Happy-path ordering: Token* Done. Error paths
/// emit `StreamToken::Error` and close.
pub(crate) fn stream_session_mm(
&self, &self,
session_lock: std::sync::Arc<crate::Mutex<Option<salience::SessionHandle>>>, prompt_tokens: &[u32],
chunks: Vec<super::context::WireChunk>, images: &[super::context::WireImage],
images: Vec<super::context::WireImage>,
match_upto: u32,
sampling: SamplingParams, sampling: SamplingParams,
priority: Option<i32>, priority: Option<i32>,
readout_shape: Option<(u32, u32)>,
) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) { ) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
let (tx, rx) = mpsc::unbounded_channel(); let (tx, rx) = mpsc::unbounded_channel();
let client = self.clone(); let client = self.client.clone();
let api_key = self.api_key.clone();
let model = self.model.clone();
let prompt_tokens = prompt_tokens.to_vec();
let images: Vec<(Vec<u8>, String)> = images.iter()
.map(|i| (i.bytes.clone(), i.mime.clone()))
.collect();
let base_url = self.base_url.clone();
let handle = tokio::spawn(async move { let handle = tokio::spawn(async move {
let result = run_session_generate( let result = stream_completions(
session_lock, &client, chunks, images, match_upto, sampling, &client, &base_url, &api_key, &model,
priority, readout_shape, &tx, &prompt_tokens, &images, &tx, sampling, priority,
).await; ).await;
if let Err(e) = result { if let Err(e) = result {
log::warn!(target: "grpc", let _ = tx.send(StreamToken::Error(e.to_string()));
"stream_session_mm error, forwarding to UI: {:#}", e);
let _ = tx.send(StreamToken::Error(format!("{:#}", e)));
} }
}); });
@ -184,243 +128,386 @@ impl ApiClient {
/// readout is enabled on the server, `Ok(None)` on 404 (disabled), /// readout is enabled on the server, `Ok(None)` on 404 (disabled),
/// or an error on any other failure. /// or an error on any other failure.
/// ///
/// First call performs the HTTP fetch; subsequent calls (including /// Call once at startup and cache the result; the manifest doesn't
/// across ApiClient clones sharing the same cell) return the /// change during a server run.
/// cached result. The manifest doesn't change during a server run.
pub fn model_str(&self) -> &str { &self.model }
pub async fn fetch_readout_manifest(&self) -> Result<Option<ReadoutManifest>> { pub async fn fetch_readout_manifest(&self) -> Result<Option<ReadoutManifest>> {
let manifest = self.manifest.get_or_try_init(|| async { let url = format!("{}/readout/manifest", self.base_url);
let url = format!("{}/readout/manifest", self.base_url); let auth = format!("Bearer {}", self.api_key);
let auth = format!("Bearer {}", self.api_key); let response = self
let response = self .client
.client .get_with_headers(&url, &[("Authorization", &auth)])
.get_with_headers(&url, &[("Authorization", &auth)]) .await
.await .map_err(|e| anyhow::anyhow!("readout manifest fetch ({}): {}", url, e))?;
.map_err(|e| anyhow::anyhow!("readout manifest fetch ({}): {}", url, e))?; let status = response.status();
let status = response.status(); if status.as_u16() == 404 {
if status.as_u16() == 404 { return Ok(None);
return Ok::<_, anyhow::Error>(None); }
} if !status.is_success() {
if !status.is_success() { let body = response.text().await.unwrap_or_default();
let body = response.text().await.unwrap_or_default(); let n = body.floor_char_boundary(body.len().min(500));
let n = body.floor_char_boundary(body.len().min(500)); anyhow::bail!("readout manifest HTTP {} ({}): {}", status, url, &body[..n]);
anyhow::bail!("readout manifest HTTP {} ({}): {}", status, url, &body[..n]); }
} Ok(Some(response.json().await?))
Ok(Some(response.json().await?))
}).await?;
Ok(manifest.clone())
} }
} }
/// Body of the gRPC-path streaming task. Walks the wire chunks async fn stream_completions(
/// against the session's `committed_len`, sends the delta via client: &HttpClient,
/// AppendImage / intermediate prefill-only Generates / final decode base_url: &str,
/// Generate, and translates the final Generate's Token events into api_key: &str,
/// StreamTokens on `tx`. On success the session handle is returned model: &str,
/// to `session_lock` with an updated `committed_len`; on error the prompt_tokens: &[u32],
/// handle is dropped so the next call reopens. images: &[(Vec<u8>, String)],
async fn run_session_generate( tx: &mpsc::UnboundedSender<StreamToken>,
session_lock: std::sync::Arc<crate::Mutex<Option<salience::SessionHandle>>>,
client: &ApiClient,
chunks: Vec<super::context::WireChunk>,
images: Vec<super::context::WireImage>,
match_upto: u32,
sampling: SamplingParams, sampling: SamplingParams,
priority: Option<i32>, priority: Option<i32>,
readout_shape: Option<(u32, u32)>, ) -> anyhow::Result<()> {
tx: &mpsc::UnboundedSender<StreamToken>, let mut request = serde_json::json!({
) -> Result<()> { "model": model,
use std::time::Instant; "prompt": prompt_tokens,
use futures::StreamExt; "max_tokens": 16384,
use super::context::WireChunk; "temperature": sampling.temperature,
use salience::pb; "top_p": sampling.top_p,
"top_k": sampling.top_k,
let mut handle: salience::SessionHandle = { "stream": true,
let mut guard = session_lock.lock().await; "return_token_ids": true,
match guard.take() { "skip_special_tokens": false,
Some(h) => h, "stop_token_ids": [super::tokenizer::IM_END],
None => { });
drop(guard); if !images.is_empty() {
log::debug!(target: "grpc", "run_session_generate: opening new session"); use base64::Engine;
salience::SessionHandle::open(client).await? let b64 = base64::engine::general_purpose::STANDARD;
} let uris: Vec<String> = images.iter()
} .map(|(bytes, mime)| format!("data:{};base64,{}", mime, b64.encode(bytes)))
}; .collect();
request["multi_modal_data"] = serde_json::json!({ "image": uris });
// If the client believes the match extends only up to `match_upto` }
// but the server has more, we need to rewind. For v1 the match is if let Some(p) = priority {
// either whole or broken — `match_upto` is always 0 on any mutation request["priority"] = serde_json::json!(p);
// — so the cheapest correct recovery is to drop the session and
// open a fresh one.
if match_upto < handle.committed_len {
log::warn!(target: "grpc",
"session rewind: match_upto={} < committed_len={} — reopening session (resending {} bytes)",
match_upto, handle.committed_len, handle.committed_len - match_upto);
drop(handle);
handle = salience::SessionHandle::open(client).await?;
} }
// Walk chunks at byte-level, taking everything past `match_upto` let url = format!("{}/completions", base_url);
// as the delta. Token chunks can be split mid-way; images live let debug_label = format!("{} prompt tokens, model={}", prompt_tokens.len(), model);
// inline in the token stream, so there's no separate image-chunk
// case anymore. let mut response = send_and_check(
let mut acc: u32 = 0; client, &url, &request,
let mut pending: Vec<u32> = Vec::new(); ("Authorization", &format!("Bearer {}", api_key)),
for chunk in chunks.iter() { &[], &debug_label, None,
match chunk { ).await?;
WireChunk::Tokens(t) => {
let len = t.len() as u32; let mut reader = SseReader::new();
let chunk_end = acc + len; let mut usage = None;
if chunk_end <= match_upto {
acc = chunk_end; while let Some(event) = reader.next_event(&mut response).await? {
} else if acc < match_upto { if let Some(err_msg) = event["error"]["message"].as_str() {
let skip = (match_upto - acc) as usize; anyhow::bail!("API error in stream: {}", err_msg);
pending.extend_from_slice(&t[skip..]); }
acc = chunk_end;
} else { if let Some(u) = event["usage"].as_object() {
pending.extend_from_slice(t); if let Ok(u) = serde_json::from_value::<Usage>(serde_json::Value::Object(u.clone())) {
acc = chunk_end; usage = Some(u);
}
} }
} }
}
// Filter images to those entirely past `match_upto` — anything let choices = match event["choices"].as_array() {
// before is on the server already (prior turn), anything Some(c) => c,
// straddling is a hard divergence (image partially-sent shouldn't None => continue,
// happen with our atomic AppendImage history; with images-inline
// it can only happen if mark_dirty cleared match_upto mid-block,
// which the AST mutators prevent).
let mut new_images: Vec<pb::ImageAttachment> = Vec::new();
for img in &images {
if img.pad_end <= match_upto {
continue; // already sent on a prior turn
}
if img.pad_start < match_upto {
anyhow::bail!(
"session divergence: image at [{},{}) straddles match_upto={}",
img.pad_start, img.pad_end, match_upto,
);
}
new_images.push(pb::ImageAttachment {
bytes: img.bytes.clone(),
mime: img.mime.clone(),
pad_range_start: img.pad_start,
pad_range_end: img.pad_end,
});
}
// Final Generate: pending holds any trailing text; decode up to
// sampling.max_tokens. Request readouts on all decode positions
// via a catch-all range ending at u32::MAX — decode never
// reaches it.
let prompt_len_after_append = handle.committed_len + pending.len() as u32;
let readout_ranges = if readout_shape.is_some() {
vec![pb::PositionRange {
start: prompt_len_after_append,
end: u32::MAX,
}]
} else {
Vec::new()
};
let req = pb::GenerateRequest {
session_id: handle.session_id.clone(),
append_tokens: pending,
offset: handle.committed_len,
truncating: false,
max_tokens: sampling.max_tokens,
logprobs_ranges: Vec::new(),
logprob_top_k: 0,
readout_ranges,
temperature: sampling.temperature,
top_p: sampling.top_p,
top_k: sampling.top_k,
stop_token_ids: Vec::new(),
priority: priority.unwrap_or(0),
images: new_images,
};
let session_id_for_log = handle.session_id.clone();
let t_generate = Instant::now();
log::debug!(target: "grpc",
"session {} Generate: offset={} append={} max_tokens={} priority={}",
session_id_for_log, req.offset, req.append_tokens.len(),
req.max_tokens, req.priority);
let mut stream = handle.generate(req).await?;
let (n_layers, n_concepts) = readout_shape.unwrap_or((0, 0));
let mut session_terminated = false;
let mut first_token_at: Option<Instant> = None;
while let Some(event) = stream.next().await {
let event = match event {
Ok(e) => e,
Err(status) => {
log::warn!(target: "grpc",
"session {} Generate stream error: {} — dropping session",
session_id_for_log, status);
session_terminated = true;
let _ = tx.send(StreamToken::Error(format!(
"Generate stream error: {}", status,
)));
break;
}
}; };
let Some(inner) = event.event else { continue };
match inner { for choice in choices {
pb::generate_event::Event::Token(t) => { // `readout`, if present, is a nested list
if t.is_prefill { continue; } // `[num_tokens][n_layers][n_concepts]`. Parse it once per
if first_token_at.is_none() { // chunk and pair rows with token ids by index — the rows
log::debug!(target: "grpc", // are in the same order as `token_ids`.
"session {} first decode token at {:?}", let readouts: Option<Vec<TokenReadout>> = choice["readout"]
session_id_for_log, t_generate.elapsed()); .as_array()
first_token_at = Some(Instant::now()); .map(|outer| {
} outer.iter().filter_map(|per_token| {
let readout = if t.readout.is_empty() { per_token.as_array().map(|layers| {
None layers.iter().filter_map(|per_layer| {
} else if n_layers == 0 || n_concepts == 0 { per_layer.as_array().map(|vals| {
None vals.iter()
} else { .filter_map(|v| v.as_f64().map(|f| f as f32))
let expected = (n_layers as usize) * (n_concepts as usize); .collect::<Vec<f32>>()
if t.readout.len() != expected { })
log::warn!(target: "grpc", }).collect::<Vec<Vec<f32>>>()
"readout shape mismatch: expected {}*{}={}, got {}", })
n_layers, n_concepts, expected, t.readout.len()); }).collect()
None
} else {
let n = n_concepts as usize;
let mut layers: Vec<Vec<f32>> = Vec::with_capacity(n_layers as usize);
for l in 0..(n_layers as usize) {
layers.push(t.readout[l * n..(l + 1) * n].to_vec());
}
Some(layers)
}
};
if tx.send(StreamToken::Token { id: t.id, readout }).is_err() {
break;
}
}
pb::generate_event::Event::Done(d) => {
log::debug!(target: "grpc",
"session {} Done: prompt={} completion={} total={} reason={:?} elapsed={:?}",
session_id_for_log, d.prompt_tokens, d.completion_tokens,
d.total_tokens, d.finish_reason, t_generate.elapsed());
handle.committed_len = d.total_tokens;
let usage = Some(Usage {
prompt_tokens: d.prompt_tokens,
completion_tokens: d.completion_tokens,
total_tokens: d.total_tokens,
}); });
let _ = tx.send(StreamToken::Done { usage });
if let Some(ids) = choice["token_ids"].as_array() {
for (i, id_val) in ids.iter().enumerate() {
if let Some(id) = id_val.as_u64() {
let readout = readouts
.as_ref()
.and_then(|r| r.get(i).cloned());
let _ = tx.send(StreamToken::Token {
id: id as u32,
readout,
});
}
}
} else if let Some(text) = choice["text"].as_str() {
// Fallback: provider didn't return token_ids, encode locally.
// No readout available in this path — the encoder may
// produce a different token count than the server did.
if !text.is_empty() {
for id in super::tokenizer::encode(text) {
let _ = tx.send(StreamToken::Token { id, readout: None });
}
}
} }
} }
} }
if !session_terminated { let _ = tx.send(StreamToken::Done { usage });
let mut guard = session_lock.lock().await;
*guard = Some(handle);
}
Ok(()) Ok(())
} }
/// Send an HTTP request and check for errors.
pub(crate) async fn send_and_check(
client: &HttpClient,
url: &str,
body: &impl serde::Serialize,
auth_header: (&str, &str),
extra_headers: &[(&str, &str)],
debug_label: &str,
request_json: Option<&str>,
) -> Result<HttpResponse> {
let debug = std::env::var("POC_DEBUG").is_ok();
let start = Instant::now();
if debug {
let payload_size = serde_json::to_string(body)
.map(|s| s.len())
.unwrap_or(0);
dbglog!(
"request: {}K payload, {}",
payload_size / 1024, debug_label,
);
}
let mut headers: Vec<(&str, &str)> = Vec::with_capacity(extra_headers.len() + 1);
headers.push(auth_header);
headers.extend_from_slice(extra_headers);
let response = client
.send_json("POST", url, &headers, body)
.await
.map_err(|e| {
let msg = e.to_string();
let cause = if msg.contains("connect timeout") || msg.contains("TCP connect") {
"connection refused"
} else if msg.contains("request timeout") {
"request timed out"
} else {
"request error"
};
anyhow::anyhow!("{} ({}): {}", cause, url, msg)
})?;
let status = response.status();
let elapsed = start.elapsed();
if debug {
for name in [
"x-ratelimit-remaining",
"x-ratelimit-limit",
"x-request-id",
] {
if let Some(val) = response.header(name) {
dbglog!("header {}: {}", name, val);
}
}
}
if !status.is_success() {
let body = response.text().await.unwrap_or_default();
dbglog!(
"HTTP {} after {:.1}s ({}): {}",
status,
elapsed.as_secs_f64(),
url,
&body[..body.floor_char_boundary(body.len().min(500))]
);
if let Some(json) = request_json {
let log_dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/logs/failed-requests");
let _ = std::fs::create_dir_all(&log_dir);
let ts = chrono::Local::now().format("%Y%m%dT%H%M%S");
let path = log_dir.join(format!("{}.json", ts));
if std::fs::write(&path, json).is_ok() {
dbglog!(
"saved failed request to {} (HTTP {})", path.display(), status
);
}
}
anyhow::bail!("HTTP {} ({}): {}", status, url, &body[..body.floor_char_boundary(body.len().min(1000))]);
}
if debug {
dbglog!(
"connected in {:.1}s (HTTP {})",
elapsed.as_secs_f64(),
status.as_u16()
);
}
Ok(response)
}
/// SSE stream reader. Handles the generic SSE plumbing shared by both
/// backends: chunk reading with timeout, line buffering, `data:` prefix
/// stripping, `[DONE]` detection, JSON parsing, and parse error diagnostics.
/// Yields parsed events as serde_json::Value — each backend handles its
/// own event types.
pub(crate) struct SseReader {
line_buf: String,
chunk_timeout: Duration,
pub stream_start: Instant,
pub chunks_received: u64,
pub sse_lines_parsed: u64,
pub sse_parse_errors: u64,
debug: bool,
done: bool,
/// Serialized request payload — saved to disk on errors for replay debugging.
pub(crate) request_json: Option<String>,
}
impl SseReader {
pub(crate) fn new() -> Self {
Self {
line_buf: String::new(),
chunk_timeout: Duration::from_secs(crate::config::get().api_stream_timeout_secs),
stream_start: Instant::now(),
chunks_received: 0,
sse_lines_parsed: 0,
sse_parse_errors: 0,
debug: std::env::var("POC_DEBUG").is_ok(),
done: false,
request_json: None,
}
}
/// Attach the serialized request payload for error diagnostics.
/// Save the request payload to disk for replay debugging.
fn save_failed_request(&self, reason: &str) {
let Some(ref json) = self.request_json else { return };
let log_dir = dirs::home_dir()
.unwrap_or_default()
.join(".consciousness/logs/failed-requests");
let _ = std::fs::create_dir_all(&log_dir);
let ts = chrono::Local::now().format("%Y%m%dT%H%M%S");
let path = log_dir.join(format!("{}.json", ts));
if std::fs::write(&path, json).is_ok() {
dbglog!(
"saved failed request to {} ({})", path.display(), reason
);
}
}
/// Read the next SSE event from the response stream.
/// Returns Ok(Some(value)) for each parsed data line,
/// Ok(None) when the stream ends or [DONE] is received.
pub(crate) async fn next_event(
&mut self,
response: &mut HttpResponse,
) -> Result<Option<serde_json::Value>> {
loop {
// Drain complete lines from the buffer before reading more chunks
while let Some(newline_pos) = self.line_buf.find('\n') {
let line = self.line_buf[..newline_pos].trim().to_string();
self.line_buf = self.line_buf[newline_pos + 1..].to_string();
if line == "data: [DONE]" {
self.done = true;
return Ok(None);
}
if line.is_empty()
|| line.starts_with("event: ")
|| !line.starts_with("data: ")
{
continue;
}
let json_str = &line[6..];
self.sse_lines_parsed += 1;
match serde_json::from_str(json_str) {
Ok(v) => return Ok(Some(v)),
Err(e) => {
self.sse_parse_errors += 1;
if self.sse_parse_errors == 1 || self.debug {
let preview = if json_str.len() > 200 {
format!("{}...", &json_str[..200])
} else {
json_str.to_string()
};
dbglog!(
"SSE parse error (#{}) {}: {}",
self.sse_parse_errors, e, preview
);
}
continue;
}
}
}
if self.done {
return Ok(None);
}
// Read more data from the response stream
match tokio::time::timeout(self.chunk_timeout, response.chunk()).await {
Ok(Ok(Some(chunk))) => {
self.chunks_received += 1;
self.line_buf.push_str(&String::from_utf8_lossy(&chunk));
}
Ok(Ok(None)) => return Ok(None),
Ok(Err(e)) => {
let buf_preview = if self.line_buf.is_empty() {
"(empty)".to_string()
} else {
let n = self.line_buf.len().min(500);
format!("{}B: {}", self.line_buf.len(), &self.line_buf[..n])
};
let msg = format!(
"stream error after {} chunks, {:.1}s, {} sse lines: {} | buf: {}",
self.chunks_received,
self.stream_start.elapsed().as_secs_f64(),
self.sse_lines_parsed,
e, buf_preview,
);
dbglog!("{}", msg);
self.save_failed_request(&msg);
return Err(e.into());
}
Err(_) => {
let buf_preview = if self.line_buf.is_empty() {
"(empty)".to_string()
} else {
let n = self.line_buf.len().min(500);
format!("{}B: {}", self.line_buf.len(), &self.line_buf[..n])
};
let msg = format!(
"stream timeout: {}s, {} chunks, {} sse lines, {:.1}s elapsed | buf: {}",
self.chunk_timeout.as_secs(),
self.chunks_received,
self.sse_lines_parsed,
self.stream_start.elapsed().as_secs_f64(),
buf_preview,
);
dbglog!("{}", msg);
self.save_failed_request(&msg);
anyhow::bail!(
"stream timeout: no data for {}s ({} chunks received)",
self.chunk_timeout.as_secs(),
self.chunks_received
);
}
}
}
}
}

View file

@ -1,279 +0,0 @@
// agent/api/salience.rs — gRPC client bindings for salience.v1.
//
// Thin wrapper around the tonic-generated types. Every RPC except
// Generate is unary; Generate is server-streaming. Free functions
// (open/close session) wrap the lifecycle RPCs; `SessionHandle` just
// carries the id + connection params so later RPCs can reuse them.
//
// The old bidi Session() API is gone — see git history for its shape.
#![allow(clippy::enum_variant_names)]
use anyhow::{Context, Result};
use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint};
/// Generated prost + tonic types for salience.v1. Call sites use
/// `pb::OpenSessionRequest`, `pb::Token`, etc.
pub mod pb {
tonic::include_proto!("salience.v1");
}
pub type SalienceClient = pb::salience_client::SalienceClient<Channel>;
/// Open a TLS-aware gRPC channel to the salience server. `base_url`
/// looks like `https://host:8443`. User-provided CA certs under
/// `~/.consciousness/certs/` are trusted in addition to the system
/// roots (for self-signed server certs).
///
/// Returns the raw `Channel` so callers (`ApiClient::salience_client`)
/// can cache it and clone a `SalienceClient` per request without
/// reopening the TCP/TLS connection. tonic multiplexes RPCs over the
/// shared channel automatically.
pub async fn connect_channel(base_url: &str) -> Result<Channel> {
let mut endpoint = Endpoint::from_shared(base_url.to_string())
.with_context(|| format!("invalid salience endpoint: {}", base_url))?
.connect_timeout(std::time::Duration::from_secs(30))
.timeout(std::time::Duration::from_secs(600));
if base_url.starts_with("https://") {
let user_certs = super::http::load_user_certs_pem_bytes();
let mut tls = ClientTlsConfig::new().with_native_roots();
if !user_certs.is_empty() {
tls = tls.ca_certificate(Certificate::from_pem(user_certs));
}
endpoint = endpoint
.tls_config(tls)
.with_context(|| "configuring tonic TLS")?;
}
endpoint
.connect()
.await
.with_context(|| format!("failed to connect to salience server at {}", base_url))
}
/// Derive the gRPC base URL from the HTTP completions base URL.
///
/// vLLM's salience gRPC server listens on a different port (8443) from
/// the HTTP endpoint (8000) and accepts no path component. Given an
/// HTTP base like `https://host:8000/v1`, produce `https://host:8443`.
/// No-op when the path is empty and the port isn't 8000.
pub fn derive_grpc_url(http_base: &str) -> String {
let mut url = http_base.trim_end_matches('/').to_string();
if let Some(proto_end) = url.find("://") {
let rest_start = proto_end + 3;
if let Some(path_slash) = url[rest_start..].find('/') {
url.truncate(rest_start + path_slash);
}
}
url.replace(":8000", ":8443")
}
/// Attach a bearer token to a tonic request as gRPC metadata.
pub fn with_auth<T>(req: &mut tonic::Request<T>, api_key: &str) {
if api_key.is_empty() {
return;
}
let bearer = format!("Bearer {}", api_key);
if let Ok(val) = bearer.parse() {
req.metadata_mut().insert("authorization", val);
}
}
/// Handle to a server-side session. Carries the id + an `ApiClient`
/// clone (which holds the shared tonic Channel) so subsequent
/// per-session RPCs go over the process-global connection.
/// `committed_len` tracks the server's current session.tokens length
/// so the client can submit deltas with the right `offset`.
pub struct SessionHandle {
pub session_id: String,
pub max_model_len: u32,
pub committed_len: u32,
client: super::ApiClient,
}
impl SessionHandle {
pub async fn open(client: &super::ApiClient) -> Result<Self> {
let t0 = std::time::Instant::now();
log::debug!(target: "grpc", "OpenSession rpc: start");
let mut c = client.salience_client().await?;
let mut req = tonic::Request::new(pb::OpenSessionRequest {
model: client.model.clone(),
});
with_auth(&mut req, client.api_key());
let resp = c
.open_session(req)
.await
.with_context(|| "OpenSession RPC failed")?
.into_inner();
log::debug!(target: "grpc",
"OpenSession rpc: done session_id={} max_model_len={} elapsed={:?}",
resp.session_id, resp.max_model_len, t0.elapsed());
Ok(Self {
session_id: resp.session_id,
max_model_len: resp.max_model_len,
committed_len: 0,
client: client.clone(),
})
}
pub fn client(&self) -> &super::ApiClient { &self.client }
/// Debug-only: fetch the server's full session.tokens. Used to
/// verify client-side accounting byte-for-byte when divergence
/// is suspected. Not cheap on large sessions.
pub async fn dump_tokens(&self) -> Result<Vec<u32>> {
let mut c = self.client.salience_client().await?;
let mut req = tonic::Request::new(pb::DumpSessionRequest {
session_id: self.session_id.clone(),
});
with_auth(&mut req, self.client.api_key());
let resp = c
.dump_session(req)
.await
.with_context(|| "DumpSession RPC failed")?
.into_inner();
Ok(resp.tokens)
}
/// Open a gRPC Generate stream with the given request. Caller
/// iterates the returned stream of GenerateEvents; the handle's
/// `committed_len` should be advanced by the caller on Done based
/// on the Done event's `total_tokens` field.
pub async fn generate(
&self,
req: pb::GenerateRequest,
) -> Result<tonic::Streaming<pb::GenerateEvent>> {
let t0 = std::time::Instant::now();
log::debug!(target: "grpc",
"Generate rpc: open-stream session={} offset={} append={} max_tokens={}",
self.session_id, req.offset, req.append_tokens.len(), req.max_tokens);
let mut c = self.client.salience_client().await?;
let mut req = tonic::Request::new(req);
with_auth(&mut req, self.client.api_key());
let resp = c
.generate(req)
.await
.with_context(|| "Generate RPC failed")?;
log::debug!(target: "grpc",
"Generate rpc: stream opened session={} open-latency={:?}",
self.session_id, t0.elapsed());
Ok(resp.into_inner())
}
/// Run a prefill-only Generate (max_tokens=0) that appends the
/// given tokens to the session. No decode, no Token events — the
/// server just extends session.tokens and runs prefill to warm
/// the KV cache. Used to interleave text runs between AppendImage
/// calls, and by score paths that want prompt_logprobs without a
/// decode step.
pub async fn prefill_only(&mut self, tokens: Vec<u32>) -> Result<()> {
use futures::StreamExt;
let req = pb::GenerateRequest {
session_id: self.session_id.clone(),
append_tokens: tokens,
offset: self.committed_len,
truncating: false,
max_tokens: 0,
logprobs_ranges: Vec::new(),
logprob_top_k: 0,
readout_ranges: Vec::new(),
temperature: 0.0,
top_p: 0.0,
top_k: 0,
stop_token_ids: Vec::new(),
priority: 0,
images: Vec::new(),
};
let mut stream = self.generate(req).await?;
while let Some(event) = stream.next().await {
let event = event.map_err(|s| anyhow::anyhow!("prefill Generate stream: {}", s))?;
if let Some(pb::generate_event::Event::Done(d)) = event.event {
self.committed_len = d.total_tokens;
}
}
Ok(())
}
}
/// Drop → fire CloseSession in a detached task so servers don't leak
/// sessions until TTL eviction. Best-effort: if no tokio runtime is
/// available we skip; the server's 30min TTL will reap it eventually.
impl Drop for SessionHandle {
fn drop(&mut self) {
if self.session_id.is_empty() {
return;
}
let session_id = std::mem::take(&mut self.session_id);
let client = self.client.clone();
let Ok(rt) = tokio::runtime::Handle::try_current() else {
log::debug!(target: "grpc",
"SessionHandle drop outside tokio runtime, session {} leaks to TTL",
session_id);
return;
};
rt.spawn(async move {
let Ok(mut c) = client.salience_client().await else { return };
let mut req = tonic::Request::new(pb::CloseSessionRequest {
session_id: session_id.clone(),
});
with_auth(&mut req, client.api_key());
if let Err(e) = c.close_session(req).await {
log::debug!(target: "grpc",
"CloseSession on drop failed for {}: {:#}",
session_id, e);
}
});
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn generated_types_compile() {
// Exercise the shape of the new proto types — if build.rs
// stops regenerating against the proto, this stops compiling.
let _open = pb::OpenSessionRequest {
model: "qwen3-vl".into(),
};
let _tok = pb::Token {
id: 42,
position: 0,
is_prefill: false,
readout: vec![0.1, 0.2, 0.3],
logprobs: vec![pb::TokenLogprob {
id: 1,
logprob: -0.5,
}],
sampled_logprob: -0.1,
has_sampled_logprob: true,
};
let _done = pb::GenerateDone {
prompt_tokens: 10,
completion_tokens: 20,
total_tokens: 30,
finish_reason: pb::generate_done::FinishReason::Eos as i32,
};
let _evt = pb::GenerateEvent {
event: Some(pb::generate_event::Event::Done(_done)),
};
}
#[test]
fn derive_grpc_url_cases() {
assert_eq!(
derive_grpc_url("https://host:8000/v1"),
"https://host:8443",
);
assert_eq!(
derive_grpc_url("https://host:8000/"),
"https://host:8443",
);
assert_eq!(
derive_grpc_url("https://host:9000/v1"),
"https://host:9000",
);
}
}

View file

@ -125,19 +125,7 @@ impl<'de> Deserialize<'de> for NodeLeaf {
body: NodeBody, body: NodeBody,
timestamp: DateTime<Utc>, timestamp: DateTime<Utc>,
} }
let mut raw = Raw::deserialize(deserializer)?; let raw = Raw::deserialize(deserializer)?;
// Heal pre-refactor logs: Image leaves used to be deserialized
// with token_count=0 (server-authoritative count was applied
// after AppendImage). With pads now expanded client-side at
// construction, recompute from the persisted dimensions if
// the stored count is 0.
if let NodeBody::Image { orig_height, orig_width, token_count, .. }
= &mut raw.body
{
if *token_count == 0 {
*token_count = qwen3_image_token_count(*orig_height, *orig_width);
}
}
let token_ids = raw.body.compute_token_ids(); let token_ids = raw.body.compute_token_ids();
Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp }) Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp })
} }
@ -155,44 +143,18 @@ pub enum AstNode {
/// Maps memory key → divergence score for this response. /// Maps memory key → divergence score for this response.
#[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")] #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
memory_scores: std::collections::BTreeMap<String, f64>, memory_scores: std::collections::BTreeMap<String, f64>,
/// Cached token stream for the subtree. When `Some`, wire-out
/// uses these bytes verbatim and skips recursion into children.
/// Populated by the response parser from the server's exact
/// stream; also computable from children as a fallback. Cleared
/// on any edit to a descendant. Not serialized — transient.
#[serde(skip, default)]
token_ids: Option<Vec<u32>>,
}, },
} }
/// The context window: four sections as Vec<AstNode>. /// The context window: four sections as Vec<AstNode>.
/// /// All mutation goes through ContextState methods to maintain the invariant
/// All mutation MUST go through `ContextState`'s public methods. Two /// that token_ids on every leaf matches its rendered text.
/// invariants ride on this:
/// 1. Every `Leaf.token_ids` matches its `body.compute_token_ids()`.
/// 2. For every `Branch { token_ids: Some(cached), .. }`, the cached
/// token stream matches what `wire_into` would produce by walking
/// `children` from scratch. Any mutation that touches a Branch's
/// children — directly or via a descendant — must clear the
/// Branch's `token_ids` so it gets recomputed on next wire-out.
///
/// The `&mut Vec<AstNode>` escape hatches are intentionally NOT
/// exposed; if you find yourself wanting one, add a focused method
/// here that maintains the invariants.
pub struct ContextState { pub struct ContextState {
system: Vec<AstNode>, system: Vec<AstNode>,
identity: Vec<AstNode>, identity: Vec<AstNode>,
journal: Vec<AstNode>, journal: Vec<AstNode>,
conversation: Vec<AstNode>, conversation: Vec<AstNode>,
pub conversation_log: Option<crate::mind::log::ConversationLog>, pub conversation_log: Option<crate::mind::log::ConversationLog>,
/// Length of the session's token stream on the server, as of the
/// last Done event. Updated by the grpc layer.
server_committed_len: u32,
/// Prefix length of our walk that still matches the server's
/// session.tokens byte-for-byte. When < `server_committed_len`
/// the session needs rewinding (truncating=true at this offset).
/// Reset to 0 on any mutation that could have changed sent bytes.
client_match_upto: u32,
} }
impl Clone for ContextState { impl Clone for ContextState {
@ -203,8 +165,6 @@ impl Clone for ContextState {
journal: self.journal.clone(), journal: self.journal.clone(),
conversation: self.conversation.clone(), conversation: self.conversation.clone(),
conversation_log: None, // forked contexts don't log conversation_log: None, // forked contexts don't log
server_committed_len: self.server_committed_len,
client_match_upto: self.client_match_upto,
} }
} }
} }
@ -241,10 +201,6 @@ pub struct ResponseParser {
think_buf: String, think_buf: String,
in_tool_call: bool, in_tool_call: bool,
tool_call_buf: String, tool_call_buf: String,
/// Raw generated token IDs, in arrival order. Combined with the
/// prologue at `finish` to stamp the Branch's authoritative
/// token cache — the bytes the server has for this branch.
generated_tokens: Vec<u32>,
} }
impl Role { impl Role {
@ -356,16 +312,6 @@ impl NodeLeaf {
pub fn token_ids(&self) -> &[u32] { &self.token_ids } pub fn token_ids(&self) -> &[u32] { &self.token_ids }
pub fn tokens(&self) -> usize { self.token_ids.len() } pub fn tokens(&self) -> usize { self.token_ids.len() }
pub fn timestamp(&self) -> DateTime<Utc> { self.timestamp } pub fn timestamp(&self) -> DateTime<Utc> { self.timestamp }
/// If this is an Image leaf, update its IMAGE_PAD count to `n` and
/// recompute cached `token_ids`. No-op on non-Image leaves —
/// callers know the body shape via `body()`.
pub fn set_image_token_count(&mut self, n: u32) {
if let NodeBody::Image { token_count, .. } = &mut self.body {
*token_count = n;
self.token_ids = self.body.compute_token_ids();
}
}
} }
impl AstNode { impl AstNode {
@ -414,9 +360,6 @@ impl AstNode {
orig_height: u32, orig_height: u32,
orig_width: u32, orig_width: u32,
) -> Self { ) -> Self {
// Pad count is computed eagerly from dimensions — no more
// "unknown until server responds" shape. Server validates
// on the Generate call; mismatches fail loud.
let token_count = qwen3_image_token_count(orig_height, orig_width); let token_count = qwen3_image_token_count(orig_height, orig_width);
Self::Leaf(NodeLeaf::new(NodeBody::Image { Self::Leaf(NodeLeaf::new(NodeBody::Image {
bytes, bytes,
@ -430,13 +373,7 @@ impl AstNode {
// -- Branch constructors -------------------------------------------------- // -- Branch constructors --------------------------------------------------
pub fn branch(role: Role, children: Vec<AstNode>) -> Self { pub fn branch(role: Role, children: Vec<AstNode>) -> Self {
Self::Branch { Self::Branch { role, children, timestamp: Utc::now(), memory_scores: Default::default() }
role,
children,
timestamp: Utc::now(),
memory_scores: Default::default(),
token_ids: None,
}
} }
pub fn system_msg(text: impl Into<String>) -> Self { pub fn system_msg(text: impl Into<String>) -> Self {
@ -445,7 +382,6 @@ impl AstNode {
children: vec![Self::content(text)], children: vec![Self::content(text)],
timestamp: Utc::now(), timestamp: Utc::now(),
memory_scores: Default::default(), memory_scores: Default::default(),
token_ids: None,
} }
} }
@ -455,7 +391,6 @@ impl AstNode {
children: vec![Self::content(text)], children: vec![Self::content(text)],
timestamp: Utc::now(), timestamp: Utc::now(),
memory_scores: Default::default(), memory_scores: Default::default(),
token_ids: None,
} }
} }
@ -467,12 +402,11 @@ impl AstNode {
let token_ids = leaf.body.compute_token_ids(); let token_ids = leaf.body.compute_token_ids();
Self::Leaf(NodeLeaf { token_ids, ..leaf }) Self::Leaf(NodeLeaf { token_ids, ..leaf })
} }
Self::Branch { role, children, timestamp, memory_scores, .. } => Self::Branch { Self::Branch { role, children, timestamp, memory_scores } => Self::Branch {
role, role,
children: children.into_iter().map(|c| c.retokenize()).collect(), children: children.into_iter().map(|c| c.retokenize()).collect(),
timestamp, timestamp,
memory_scores, memory_scores,
token_ids: None,
}, },
} }
} }
@ -549,10 +483,7 @@ impl AstNode {
fn token_ids_into(&self, out: &mut Vec<u32>) { fn token_ids_into(&self, out: &mut Vec<u32>) {
match self { match self {
Self::Leaf(leaf) => out.extend_from_slice(&leaf.token_ids), Self::Leaf(leaf) => out.extend_from_slice(&leaf.token_ids),
Self::Branch { token_ids: Some(cached), .. } => { Self::Branch { role, children, .. } => {
out.extend_from_slice(cached);
}
Self::Branch { role, children, token_ids: None, .. } => {
out.push(tokenizer::IM_START); out.push(tokenizer::IM_START);
out.extend(tokenizer::encode(&format!("{}\n", role.as_str()))); out.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
for child in children { for child in children {
@ -581,8 +512,7 @@ impl Ast for AstNode {
fn tokens(&self) -> usize { fn tokens(&self) -> usize {
match self { match self {
Self::Leaf(leaf) => leaf.tokens(), Self::Leaf(leaf) => leaf.tokens(),
Self::Branch { token_ids: Some(cached), .. } => cached.len(), Self::Branch { role, children, .. } => {
Self::Branch { role, children, token_ids: None, .. } => {
1 + role_header_tokens(*role) 1 + role_header_tokens(*role)
+ children.iter().map(|c| c.tokens()).sum::<usize>() + children.iter().map(|c| c.tokens()).sum::<usize>()
+ 1 + newline_tokens() + 1 + newline_tokens()
@ -736,7 +666,6 @@ impl ResponseParser {
think_buf: String::new(), think_buf: String::new(),
in_tool_call: false, in_tool_call: false,
tool_call_buf: String::new(), tool_call_buf: String::new(),
generated_tokens: Vec::new(),
} }
} }
@ -755,34 +684,18 @@ impl ResponseParser {
let handle = tokio::spawn(async move { let handle = tokio::spawn(async move {
let mut parser = self; let mut parser = self;
let agent_name = agent.state.lock().await.provenance.clone(); let agent_name = agent.state.lock().await.provenance.clone();
eprintln!(
"[agent:{agent_name}] parser task start branch_idx={} in_think={}",
parser.branch_idx, parser.in_think,
);
let log_path = format!("/tmp/poc-{}.log", agent_name); let log_path = format!("/tmp/poc-{}.log", agent_name);
let mut log_file = std::fs::OpenOptions::new() let mut log_file = std::fs::OpenOptions::new()
.create(true).append(true).open(&log_path).ok(); .create(true).append(true).open(&log_path).ok();
let mut full_text = String::new(); let mut full_text = String::new();
let mut token_count: usize = 0;
while let Some(event) = stream.recv().await { while let Some(event) = stream.recv().await {
match event { match event {
super::api::StreamToken::Token { id, readout } => { super::api::StreamToken::Token { id, readout } => {
token_count += 1;
if token_count == 1 {
eprintln!("[agent:{agent_name}] parser first token id={}", id);
} else if token_count % 256 == 0 {
eprintln!(
"[agent:{agent_name}] parser token_count={} chars={}",
token_count,
full_text.len(),
);
}
if let Some(r) = readout { if let Some(r) = readout {
if let Ok(mut buf) = agent.readout.lock() { if let Ok(mut buf) = agent.readout.lock() {
buf.push(id, r); buf.push(id, r);
} }
} }
parser.generated_tokens.push(id);
let text = super::tokenizer::decode(&[id]); let text = super::tokenizer::decode(&[id]);
full_text.push_str(&text); full_text.push_str(&text);
let mut ctx = agent.context.lock().await; let mut ctx = agent.context.lock().await;
@ -801,12 +714,6 @@ impl ResponseParser {
} }
} }
super::api::StreamToken::Done { usage } => { super::api::StreamToken::Done { usage } => {
eprintln!(
"[agent:{agent_name}] parser done token_count={} chars={} usage={:?}",
token_count,
full_text.len(),
usage,
);
if let Some(ref mut f) = log_file { if let Some(ref mut f) = log_file {
use std::io::Write; use std::io::Write;
let ctx = agent.context.lock().await; let ctx = agent.context.lock().await;
@ -823,31 +730,19 @@ impl ResponseParser {
let _ = writeln!(f, " unparsed text: {}", &full_text[..end]); let _ = writeln!(f, " unparsed text: {}", &full_text[..end]);
} }
} }
if let Some(ref u) = usage { if let Some(u) = usage {
agent.state.lock().await.last_prompt_tokens = u.prompt_tokens; agent.state.lock().await.last_prompt_tokens = u.prompt_tokens;
} }
let mut ctx = agent.context.lock().await; let mut ctx = agent.context.lock().await;
parser.finish(&mut ctx); parser.finish(&mut ctx);
if let Some(u) = usage {
ctx.note_session_synced(u.total_tokens);
}
return Ok(()); return Ok(());
} }
super::api::StreamToken::Error(e) => { super::api::StreamToken::Error(e) => {
eprintln!("[agent:{agent_name}] parser stream error: {}", e);
return Err(anyhow::anyhow!("{}", e)); return Err(anyhow::anyhow!("{}", e));
} }
} }
} }
eprintln!( Ok(())
"[agent:{agent_name}] parser stream closed without done token_count={} chars={}",
token_count,
full_text.len(),
);
Err(anyhow::anyhow!(
"stream closed without Done event after {} tokens",
token_count,
))
}); });
(rx, handle) (rx, handle)
} }
@ -928,7 +823,7 @@ impl ResponseParser {
} }
fn push_child(&self, ctx: &mut ContextState, child: AstNode) { fn push_child(&self, ctx: &mut ContextState, child: AstNode) {
ctx.push_child_raw(Section::Conversation, self.branch_idx, child); ctx.push_child(Section::Conversation, self.branch_idx, child);
} }
fn flush_content(&mut self, ctx: &mut ContextState) { fn flush_content(&mut self, ctx: &mut ContextState) {
@ -942,69 +837,10 @@ impl ResponseParser {
} }
pub fn finish(mut self, ctx: &mut ContextState) { pub fn finish(mut self, ctx: &mut ContextState) {
// Salvage any in-flight tag accumulators if the stream ended if !self.buf.is_empty() {
// before the close tag arrived (max_tokens, premature EOS,
// server-side cancel). Without this, an unterminated
// <think>...</think> drops all of self.think_buf and only the
// trailing rolling window in self.buf survives — observed as
// "responses cut off, only the last ~8 characters come
// through" because drain_safe keeps `close_tag.len()` bytes
// (8 for `</think>`) at the tail of buf.
if self.in_think {
if !self.buf.is_empty() {
self.think_buf.push_str(&std::mem::take(&mut self.buf));
}
let text = std::mem::take(&mut self.think_buf).trim().to_string();
if !text.is_empty() {
self.push_child(ctx, AstNode::thinking(text));
}
self.in_think = false;
} else if self.in_tool_call {
if !self.buf.is_empty() {
self.tool_call_buf.push_str(&std::mem::take(&mut self.buf));
}
let body = std::mem::take(&mut self.tool_call_buf);
match parse_tool_call_body(&body) {
Some((name, args)) => {
self.flush_content(ctx);
self.push_child(ctx, AstNode::tool_call(&name, &args));
}
None => {
// Body's likely incomplete (no `</tool_call>` ever
// arrived). Wrap as content with the open tag so the
// model can see its own truncated attempt next turn
// rather than losing it silently.
self.content_parts.push(format!("<tool_call>\n{}", body));
}
}
self.in_tool_call = false;
} else if !self.buf.is_empty() {
self.content_parts.push(std::mem::take(&mut self.buf)); self.content_parts.push(std::mem::take(&mut self.buf));
} }
self.flush_content(ctx); self.flush_content(ctx);
// Stamp the authoritative token cache onto the branch.
// Layout mirrors the full chat-template rendering of a
// message block:
//
// IM_START + "assistant\n" [+ "<think>\n"] (prologue — what we sent)
// + generated_tokens (what the server generated, ends in IM_END)
// + "\n" (trailing newline — template-required)
//
// Server only has through the IM_END (model stops on it,
// doesn't emit "\n"). Match-upto lands inside the cache
// right after IM_END; the chunk-walk's straddle path picks
// up the trailing "\n" as the head of the next turn's delta.
// The "\n" between turns matters: without it Qwen sees
// `<|im_end|><|im_start|>` back-to-back (no newline) and
// responds with garbage.
let prologue_text = if self.in_think { "assistant\n<think>\n" } else { "assistant\n" };
let mut cache = Vec::with_capacity(1 + self.generated_tokens.len() + 8);
cache.push(tokenizer::IM_START);
cache.extend(tokenizer::encode(prologue_text));
cache.extend(self.generated_tokens);
cache.extend(tokenizer::encode("\n"));
ctx.set_branch_cache(Section::Conversation, self.branch_idx, cache);
} }
} }
@ -1016,77 +852,20 @@ impl ContextState {
journal: Vec::new(), journal: Vec::new(),
conversation: Vec::new(), conversation: Vec::new(),
conversation_log: None, conversation_log: None,
server_committed_len: 0,
client_match_upto: 0,
} }
} }
// -- Server sync tracking -------------------------------------------------
/// Length of the session's token stream on the server. Updated by
/// the grpc layer from Generate Done events.
pub fn server_committed_len(&self) -> u32 { self.server_committed_len }
/// Prefix of our walk we still believe matches the server
/// byte-for-byte. If less than `server_committed_len`, the next
/// Generate must send `truncating=true` at this offset.
pub fn client_match_upto(&self) -> u32 { self.client_match_upto }
/// Called by the grpc layer after a successful Generate Done:
/// records both the server's new length and the fact that we
/// match up to it (we just sent everything).
pub fn note_session_synced(&mut self, total_tokens: u32) {
self.server_committed_len = total_tokens;
self.client_match_upto = total_tokens;
}
/// Reset match-upto to 0. Called from every mutation that could
/// have touched a region the server already has. For now,
/// conservatively drops alignment entirely — finer-grained
/// tracking (match-upto at the mutated node's offset) is a
/// future optimization.
fn mark_dirty(&mut self) {
self.client_match_upto = 0;
}
// -- Read access ---------------------------------------------------------- // -- Read access ----------------------------------------------------------
pub fn system(&self) -> &[AstNode] { &self.system } pub fn system(&self) -> &[AstNode] { &self.system }
pub fn identity(&self) -> &[AstNode] { &self.identity } pub fn identity(&self) -> &[AstNode] { &self.identity }
pub fn journal(&self) -> &[AstNode] { &self.journal } pub fn journal(&self) -> &[AstNode] { &self.journal }
pub fn conversation(&self) -> &[AstNode] { &self.conversation } pub fn conversation(&self) -> &[AstNode] { &self.conversation }
pub fn conversation_mut(&mut self) -> &mut Vec<AstNode> { &mut self.conversation }
/// Set or clear a single `memory_scores` entry on an Assistant
/// Branch. Used by the full-matrix scorer to attribute per-memory
/// divergence onto the response. `score = None` removes the key;
/// `Some(s)` inserts/overwrites.
///
/// Doesn't affect the Branch's token cache: `memory_scores` is a
/// serialized-but-non-tokenizing annotation. No-op (with a debug
/// log) if the index points to a Leaf or a non-Assistant Branch —
/// callers are typically iterating on stale indices and we'd
/// rather skip than panic.
pub fn set_branch_memory_score(
&mut self,
section: Section,
index: usize,
key: &str,
score: Option<f64>,
) {
let nodes = self.section_mut(section);
let Some(node) = nodes.get_mut(index) else { return };
let AstNode::Branch { role: Role::Assistant, memory_scores, .. } = node
else { return };
match score {
Some(s) => { memory_scores.insert(key.to_string(), s); }
None => { memory_scores.remove(key); }
}
}
pub fn sections(&self) -> [&Vec<AstNode>; 4] { pub fn sections(&self) -> [&Vec<AstNode>; 4] {
[&self.system, &self.identity, &self.journal, &self.conversation] [&self.system, &self.identity, &self.journal, &self.conversation]
} }
} }
impl Ast for ContextState { impl Ast for ContextState {
@ -1119,63 +898,30 @@ impl Ast for ContextState {
} }
/// An image collected from the AST for a request body. The AST stores /// An image collected from the AST for a request body. The AST stores
/// Image metadata collected during `wire_chunks` — the binary + /// the pre-expanded token form (N image_pads) for accurate budget
/// mime plus the absolute token-position range of the image's /// accounting; the wire form collapses each Image to a single
/// pre-expanded placeholder run in the full wire stream. Sent /// `<|image_pad|>` between vision bookends and ships the bytes
/// alongside `append_tokens` in `GenerateRequest` so the server /// separately as multi_modal_data.
/// can attach vision features to the declared positions. Positions
/// are absolute within the full wire walk starting at offset 0,
/// i.e. the same coordinate system as `session.tokens` on the
/// server once the walk has been applied.
#[derive(Clone)]
pub struct WireImage { pub struct WireImage {
pub bytes: Vec<u8>, pub bytes: Vec<u8>,
pub mime: String, pub mime: String,
pub pad_start: u32,
pub pad_end: u32,
}
/// One piece of the wire stream for the gRPC session path. Since
/// images now live inline in the token stream (pre-expanded at AST
/// construction time), there's only one variant — a run of tokens.
/// The parallel `Vec<WireImage>` returned by `wire_chunks` gives the
/// binary + position metadata for each embedded image.
#[derive(Clone)]
pub enum WireChunk {
Tokens(Vec<u32>),
} }
fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>) { fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>) {
match node { match node {
AstNode::Leaf(leaf) => match leaf.body() { AstNode::Leaf(leaf) => match leaf.body() {
NodeBody::Image { bytes, mime, .. } => { NodeBody::Image { bytes, mime, .. } => {
// The Image leaf's token_ids is already tokens.push(tokenizer::VISION_START);
// [VISION_START, IMAGE_PAD * N, VISION_END]. Inline tokens.push(tokenizer::IMAGE_PAD);
// those into the token stream and record the pad-run tokens.push(tokenizer::VISION_END);
// range so the server can attach features to the
// declared positions.
let pad_start = tokens.len() as u32;
tokens.extend_from_slice(leaf.token_ids());
let pad_end = tokens.len() as u32;
images.push(WireImage { images.push(WireImage {
bytes: bytes.clone(), bytes: bytes.clone(),
mime: mime.clone(), mime: mime.clone(),
pad_start,
pad_end,
}); });
} }
_ => tokens.extend_from_slice(leaf.token_ids()), _ => tokens.extend_from_slice(leaf.token_ids()),
}, },
AstNode::Branch { token_ids: Some(cached), children, .. } => { AstNode::Branch { role, children, .. } => {
// Cached branches still need their image children paired
// up with the vision-block ranges embedded in the cached
// token stream — the cache captures vision tokens but not
// the matching bytes/mime.
let base = tokens.len() as u32;
tokens.extend_from_slice(cached);
pair_cached_images(cached, children, base, images);
}
AstNode::Branch { role, children, token_ids: None, .. } => {
tokens.push(tokenizer::IM_START); tokens.push(tokenizer::IM_START);
tokens.extend(tokenizer::encode(&format!("{}\n", role.as_str()))); tokens.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
for c in children { for c in children {
@ -1187,101 +933,6 @@ fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>)
} }
} }
/// Depth-first iterator over Image leaves under a slice of AST nodes.
/// Yields `(bytes, mime)` borrows in document order; doesn't allocate
/// per yield (only a stack of pending nodes).
struct ImageLeaves<'a> {
stack: Vec<&'a AstNode>,
}
impl<'a> ImageLeaves<'a> {
fn new(nodes: &'a [AstNode]) -> Self {
let mut stack = Vec::with_capacity(nodes.len());
stack.extend(nodes.iter().rev());
Self { stack }
}
}
impl<'a> Iterator for ImageLeaves<'a> {
type Item = (&'a [u8], &'a str);
fn next(&mut self) -> Option<Self::Item> {
while let Some(node) = self.stack.pop() {
match node {
AstNode::Leaf(leaf) => {
if let NodeBody::Image { bytes, mime, .. } = leaf.body() {
return Some((bytes, mime));
}
}
AstNode::Branch { children, .. } => {
self.stack.extend(children.iter().rev());
}
}
}
None
}
}
/// Iterator over `(start, end)` token-offset pairs for each
/// `VISION_START..VISION_END` block in a token slice. Panics on an
/// unmatched VISION_START — that's an upstream tokenization bug
/// worth a loud failure.
fn vision_blocks(cached: &[u32]) -> impl Iterator<Item = (usize, usize)> + '_ {
let mut cur = 0;
std::iter::from_fn(move || {
while cur < cached.len() {
if cached[cur] == tokenizer::VISION_START {
let start = cur;
let end_rel = cached[cur..].iter()
.position(|&t| t == tokenizer::VISION_END)
.unwrap_or_else(|| panic!(
"unmatched VISION_START at offset {} in cached branch",
start));
let end = cur + end_rel + 1;
cur = end;
return Some((start, end));
}
cur += 1;
}
None
})
}
/// For a Branch whose `token_ids` are cached and may contain inlined
/// vision blocks (`VISION_START + IMAGE_PAD*N + VISION_END`), recover
/// the matching image bytes/mime from the children and emit one
/// `WireImage` per vision block with the absolute pad offsets in the
/// parent token stream.
///
/// The cache stores tokens but not image payloads; the AST stores
/// image payloads in the children but not their post-cache positions.
/// Pair them by zipping the two iterators; mismatched counts panic
/// loudly because that's an AST/cache invariant violation that
/// would otherwise mis-pair images on the wire.
fn pair_cached_images(
cached: &[u32],
children: &[AstNode],
base_offset: u32,
images: &mut Vec<WireImage>,
) {
let mut blocks = vision_blocks(cached);
let mut leaves = ImageLeaves::new(children);
loop {
match (blocks.next(), leaves.next()) {
(Some((s, e)), Some((bytes, mime))) => images.push(WireImage {
bytes: bytes.to_vec(),
mime: mime.to_string(),
pad_start: base_offset + s as u32,
pad_end: base_offset + e as u32,
}),
(None, None) => break,
(Some(_), None) => panic!(
"cached branch has more vision blocks than image children"),
(None, Some(_)) => panic!(
"cached branch has fewer vision blocks than image children"),
}
}
}
pub fn memory_key(node: &AstNode) -> Option<&str> { pub fn memory_key(node: &AstNode) -> Option<&str> {
match node { match node {
AstNode::Leaf(leaf) => match leaf.body() { AstNode::Leaf(leaf) => match leaf.body() {
@ -1391,92 +1042,6 @@ impl ContextState {
} }
(tokens, images, assistant_ranges) (tokens, images, assistant_ranges)
} }
/// Build the wire stream as interleaved `WireChunk`s for the gRPC
/// session path. Returns a tuple of (chunks, images): the chunks
/// hold the full token stream (with vision blocks inlined as
/// `VISION_START + IMAGE_PAD*N + VISION_END`), and the images
/// list carries each embedded image's binary + position range so
/// the gRPC layer can attach them via `GenerateRequest.images`.
///
/// Note: with images inlined into the token stream, the chunks
/// list is structurally a single `Tokens` chunk in the common
/// case — the multi-chunk shape persists only because some
/// callers may want the option of inserting breakpoints later.
///
/// `conv_range` and `skip` mirror `wire_prompt` — select a
/// conversation slice and drop identity / conversation nodes by
/// predicate.
pub fn wire_chunks<F>(
&self,
conv_range: std::ops::Range<usize>,
mut skip: F,
) -> (Vec<WireChunk>, Vec<WireImage>)
where F: FnMut(&AstNode) -> bool,
{
let mut buf: Vec<u32> = Vec::new();
let mut images: Vec<WireImage> = Vec::new();
fn visit(
node: &AstNode,
buf: &mut Vec<u32>,
images: &mut Vec<WireImage>,
) {
match node {
AstNode::Leaf(leaf) => match leaf.body() {
NodeBody::Image { bytes, mime, .. } => {
// Pre-expanded vision block lives in
// leaf.token_ids: [VISION_START, IMAGE_PAD*N,
// VISION_END]. Inline + record the range.
let pad_start = buf.len() as u32;
buf.extend_from_slice(leaf.token_ids());
let pad_end = buf.len() as u32;
images.push(WireImage {
bytes: bytes.clone(),
mime: mime.clone(),
pad_start,
pad_end,
});
}
_ => buf.extend_from_slice(leaf.token_ids()),
},
AstNode::Branch { token_ids: Some(cached), children, .. } => {
// Same fix as wire_into's cached arm: the cache
// holds vision tokens but not the matching bytes,
// so walk children to recover them.
let base = buf.len() as u32;
buf.extend_from_slice(cached);
pair_cached_images(cached, children, base, images);
}
AstNode::Branch { role, children, token_ids: None, .. } => {
buf.push(tokenizer::IM_START);
buf.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
for c in children {
visit(c, buf, images);
}
buf.push(tokenizer::IM_END);
buf.extend(tokenizer::encode("\n"));
}
}
}
for node in self.system() { visit(node, &mut buf, &mut images); }
for node in self.identity() {
if skip(node) { continue; }
visit(node, &mut buf, &mut images);
}
for node in self.journal() { visit(node, &mut buf, &mut images); }
for node in &self.conversation()[conv_range] {
if skip(node) { continue; }
visit(node, &mut buf, &mut images);
}
let chunks = if buf.is_empty() {
Vec::new()
} else {
vec![WireChunk::Tokens(buf)]
};
(chunks, images)
}
} }
impl ContextState { impl ContextState {
@ -1496,27 +1061,17 @@ impl ContextState {
dbglog!("warning: log: {:#}", e); dbglog!("warning: log: {:#}", e);
} }
} }
// Conversation appends always go to the tail — past committed —
// so they don't break the match. Any other section mutates a
// region the server may already have, so drop alignment.
if section != Section::Conversation {
self.mark_dirty();
}
self.section_mut(section).push(node); self.section_mut(section).push(node);
} }
/// Push without logging. /// Push without logging.
pub fn push_no_log(&mut self, section: Section, node: AstNode) { pub fn push_no_log(&mut self, section: Section, node: AstNode) {
if section != Section::Conversation {
self.mark_dirty();
}
self.section_mut(section).push(node); self.section_mut(section).push(node);
} }
/// Replace the body of a leaf at `index` in `section`. /// Replace the body of a leaf at `index` in `section`.
/// Re-tokenizes to maintain the invariant. /// Re-tokenizes to maintain the invariant.
pub fn set_message(&mut self, section: Section, index: usize, body: NodeBody) { pub fn set_message(&mut self, section: Section, index: usize, body: NodeBody) {
self.mark_dirty();
let nodes = self.section_mut(section); let nodes = self.section_mut(section);
let node = &mut nodes[index]; let node = &mut nodes[index];
match node { match node {
@ -1542,12 +1097,10 @@ impl ContextState {
} }
pub fn del(&mut self, section: Section, index: usize) -> AstNode { pub fn del(&mut self, section: Section, index: usize) -> AstNode {
self.mark_dirty();
self.section_mut(section).remove(index) self.section_mut(section).remove(index)
} }
pub fn clear(&mut self, section: Section) { pub fn clear(&mut self, section: Section) {
self.mark_dirty();
self.section_mut(section).clear(); self.section_mut(section).clear();
} }
@ -1568,7 +1121,6 @@ impl ContextState {
/// are > 50% of conversation tokens) or oldest conversation entry. /// are > 50% of conversation tokens) or oldest conversation entry.
/// Phase 3: Snap to user message boundary at start. /// Phase 3: Snap to user message boundary at start.
pub fn trim_conversation(&mut self) { pub fn trim_conversation(&mut self) {
self.mark_dirty();
let max_tokens = context_budget_tokens(); let max_tokens = context_budget_tokens();
let fixed = self.system.iter().map(|n| n.tokens()).sum::<usize>() let fixed = self.system.iter().map(|n| n.tokens()).sum::<usize>()
+ self.identity.iter().map(|n| n.tokens()).sum::<usize>() + self.identity.iter().map(|n| n.tokens()).sum::<usize>()
@ -1645,49 +1197,11 @@ impl ContextState {
} }
/// Push a child node into a branch at `index` in `section`. /// Push a child node into a branch at `index` in `section`.
/// Clears the branch's cached token stream — wire-out will recompute
/// from children until the cache is repopulated. If the cache was
/// populated (server had these bytes), drops session alignment.
pub fn push_child(&mut self, section: Section, index: usize, child: AstNode) { pub fn push_child(&mut self, section: Section, index: usize, child: AstNode) {
let node = &mut self.section_mut(section)[index];
let was_cached = matches!(node, AstNode::Branch { token_ids: Some(_), .. });
match node {
AstNode::Branch { children, token_ids, .. } => {
children.push(child);
*token_ids = None;
}
AstNode::Leaf(_) => panic!("push_child on leaf node"),
}
if was_cached {
self.mark_dirty();
}
}
/// Like `push_child` but preserves the branch's cached token stream.
/// Used by the response parser, which is simultaneously populating
/// the cache from the authoritative server stream and pushing the
/// parsed-out children — the two stay consistent by construction.
/// Module-private: callers outside `context.rs` must go through
/// `push_child` so the invariant is maintained.
fn push_child_raw(&mut self, section: Section, index: usize, child: AstNode) {
let node = &mut self.section_mut(section)[index]; let node = &mut self.section_mut(section)[index];
match node { match node {
AstNode::Branch { children, .. } => children.push(child), AstNode::Branch { children, .. } => children.push(child),
AstNode::Leaf(_) => panic!("push_child_raw on leaf node"), AstNode::Leaf(_) => panic!("push_child on leaf node"),
}
}
/// Stamp a verbatim token cache onto the branch at `index` in
/// `section`. Used by the response parser to record the server's
/// authoritative token stream for the just-finished turn.
/// Module-private: the cache is an invariant-load-bearing piece
/// of state, populated only by code that holds the server's
/// ground truth.
fn set_branch_cache(&mut self, section: Section, index: usize, tokens: Vec<u32>) {
let node = &mut self.section_mut(section)[index];
match node {
AstNode::Branch { token_ids, .. } => *token_ids = Some(tokens),
AstNode::Leaf(_) => panic!("set_branch_cache on leaf node"),
} }
} }
@ -1711,14 +1225,6 @@ impl ContextState {
// to at request time. Constants come from Qwen3.5-27B's preprocessor_config. // to at request time. Constants come from Qwen3.5-27B's preprocessor_config.
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Production client-side computation of image-token expansion. With
// the delta-session protocol, the client writes the pre-expanded
// vision block (VISION_START + N*IMAGE_PAD + VISION_END) directly
// into the token stream at Image-leaf construction time, and tells
// the server where each image's pad run lives via
// GenerateRequest.images. Server validates that this N matches
// what the vision encoder actually produces and rejects on
// mismatch — so drift here fails loudly, not silently.
const QWEN3_PATCH_SIZE: u32 = 16; const QWEN3_PATCH_SIZE: u32 = 16;
const QWEN3_MERGE_SIZE: u32 = 2; const QWEN3_MERGE_SIZE: u32 = 2;
const QWEN3_MIN_PIXELS: u64 = 65_536; const QWEN3_MIN_PIXELS: u64 = 65_536;
@ -1752,10 +1258,11 @@ fn smart_resize(h: u32, w: u32, factor: u32, min_pixels: u64, max_pixels: u64) -
} }
} }
/// How many `<|image_pad|>` tokens the Qwen3-VL vision encoder will /// Compute how many `<|image_pad|>` tokens vLLM will emit for an image of
/// produce for an image of the given dimensions. Server verifies /// the given dimensions. Matches Qwen3VL's feature-size calculation exactly:
/// this count against its own encoder run and rejects on mismatch. /// (grid_h * grid_w) / merge_size^2
pub fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 { /// where (grid_h, grid_w) = resized dims / patch_size.
fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
let factor = QWEN3_PATCH_SIZE * QWEN3_MERGE_SIZE; let factor = QWEN3_PATCH_SIZE * QWEN3_MERGE_SIZE;
let (rh, rw) = smart_resize(orig_h, orig_w, factor, QWEN3_MIN_PIXELS, QWEN3_MAX_PIXELS); let (rh, rw) = smart_resize(orig_h, orig_w, factor, QWEN3_MIN_PIXELS, QWEN3_MAX_PIXELS);
(rh / QWEN3_PATCH_SIZE) * (rw / QWEN3_PATCH_SIZE) / (QWEN3_MERGE_SIZE * QWEN3_MERGE_SIZE) (rh / QWEN3_PATCH_SIZE) * (rw / QWEN3_PATCH_SIZE) / (QWEN3_MERGE_SIZE * QWEN3_MERGE_SIZE)
@ -2206,34 +1713,29 @@ mod tests {
} }
#[test] #[test]
fn test_wire_prompt_preserves_expanded_image_pads() { fn test_wire_prompt_collapses_image_pads() {
let mut ctx = ContextState::new(); let mut ctx = ContextState::new();
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::User, vec![ ctx.push_no_log(Section::Conversation, AstNode::branch(Role::User, vec![
AstNode::content("look:"), AstNode::content("look:"),
AstNode::image(vec![0xDE, 0xAD], "image/png", 512, 512), AstNode::image(vec![0xDE, 0xAD], "image/png", 512, 512),
])); ]));
// AST side and wire side should both carry N image_pads + bookends — // AST side: N image_pads + bookends, full budget accounting.
// server's session.tokens length must match what vLLM's engine will
// actually process. Binary image bytes are shipped separately in
// multi_modal_data via the WireImage list.
let n_expected = qwen3_image_token_count(512, 512) as usize;
let full = ctx.token_ids(); let full = ctx.token_ids();
let n_image_pads_full = full.iter() let n_image_pads_full = full.iter()
.filter(|&&t| t == tokenizer::IMAGE_PAD).count(); .filter(|&&t| t == tokenizer::IMAGE_PAD).count();
assert_eq!(n_image_pads_full, n_expected); assert_eq!(n_image_pads_full, qwen3_image_token_count(512, 512) as usize);
// Wire side: single image_pad, bytes moved to images list.
let (wire, images, _) = ctx.wire_prompt(0..ctx.conversation().len(), |_| false); let (wire, images, _) = ctx.wire_prompt(0..ctx.conversation().len(), |_| false);
let n_image_pads_wire = wire.iter() let n_image_pads_wire = wire.iter()
.filter(|&&t| t == tokenizer::IMAGE_PAD).count(); .filter(|&&t| t == tokenizer::IMAGE_PAD).count();
assert_eq!(n_image_pads_wire, n_expected); assert_eq!(n_image_pads_wire, 1);
assert_eq!(images.len(), 1); assert_eq!(images.len(), 1);
assert_eq!(images[0].bytes, vec![0xDE, 0xAD]); assert_eq!(images[0].bytes, vec![0xDE, 0xAD]);
assert_eq!(images[0].mime, "image/png"); assert_eq!(images[0].mime, "image/png");
// One pair of vision_start/vision_end bookends around the N pads. // vision_start/vision_end bookends are preserved in wire form.
assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_START).count(), 1); assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_START).count(), 1);
assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_END).count(), 1); assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_END).count(), 1);
} }

View file

@ -17,7 +17,6 @@ pub mod api;
pub mod context; pub mod context;
pub mod oneshot; pub mod oneshot;
pub mod readout; pub mod readout;
pub mod salience;
pub mod tokenizer; pub mod tokenizer;
pub mod tools; pub mod tools;
@ -29,11 +28,6 @@ use context::{AstNode, ContextState, Section, Ast, PendingToolCall, ResponsePars
use crate::mind::log::ConversationLog; use crate::mind::log::ConversationLog;
async fn agent_trace(agent: &Arc<Agent>, msg: String) {
let provenance = agent.state.lock().await.provenance.clone();
eprintln!("[agent:{provenance}] {msg}");
}
// --- Activity tracking (RAII guards) --- // --- Activity tracking (RAII guards) ---
pub struct ActivityEntry { pub struct ActivityEntry {
@ -154,14 +148,6 @@ pub struct Agent {
/// token handler, read by UI screens (amygdala). Manifest is /// token handler, read by UI screens (amygdala). Manifest is
/// `None` when the server has readout disabled. /// `None` when the server has readout disabled.
pub readout: readout::SharedReadoutBuffer, pub readout: readout::SharedReadoutBuffer,
/// Long-lived gRPC session to the salience server, lazily opened
/// on first use. Tracks appended tokens so subsequent turns send
/// only the delta (prefix-cache reuse). None when not yet opened
/// or when the session has died and needs reopening.
///
/// Arc-wrapped so the spawned streaming task can share ownership
/// (the task outlives the call site).
pub grpc_session: std::sync::Arc<crate::Mutex<Option<api::salience::SessionHandle>>>,
} }
/// Mutable agent state — behind its own mutex. /// Mutable agent state — behind its own mutex.
@ -182,7 +168,9 @@ pub struct AgentState {
pub think_native: bool, pub think_native: bool,
/// Tool-based thinking — add a "think" tool for structured reasoning. /// Tool-based thinking — add a "think" tool for structured reasoning.
pub think_tool: bool, pub think_tool: bool,
pub sampling: api::SamplingParams, pub temperature: f32,
pub top_p: f32,
pub top_k: u32,
pub activities: Vec<ActivityEntry>, pub activities: Vec<ActivityEntry>,
next_activity_id: u64, next_activity_id: u64,
pub pending_yield: bool, pub pending_yield: bool,
@ -236,7 +224,6 @@ impl Agent {
session_id, session_id,
context: crate::Mutex::new(context), context: crate::Mutex::new(context),
readout, readout,
grpc_session: std::sync::Arc::new(crate::Mutex::new(None)),
state: crate::Mutex::new(AgentState { state: crate::Mutex::new(AgentState {
tools: agent_tools, tools: agent_tools,
mcp_tools: McpToolAccess::All, mcp_tools: McpToolAccess::All,
@ -244,12 +231,9 @@ impl Agent {
reasoning_effort: "none".to_string(), reasoning_effort: "none".to_string(),
think_native: true, think_native: true,
think_tool: false, think_tool: false,
sampling: api::SamplingParams { temperature: 0.6,
temperature: 0.6, top_p: 0.95,
top_p: 0.95, top_k: 20,
top_k: 20,
max_tokens: 4096,
},
activities: Vec::new(), activities: Vec::new(),
next_activity_id: 0, next_activity_id: 0,
pending_yield: false, pending_yield: false,
@ -308,9 +292,6 @@ impl Agent {
// shouldn't bleed into the main emotional readout even // shouldn't bleed into the main emotional readout even
// though they hit the same vLLM server. // though they hit the same vLLM server.
readout: readout::new_shared(), readout: readout::new_shared(),
// Forks get their own session — can't share a bidi stream,
// and forks have different conversation tails anyway.
grpc_session: std::sync::Arc::new(crate::Mutex::new(None)),
state: crate::Mutex::new(AgentState { state: crate::Mutex::new(AgentState {
tools, tools,
mcp_tools: McpToolAccess::None, mcp_tools: McpToolAccess::None,
@ -318,7 +299,9 @@ impl Agent {
reasoning_effort: "none".to_string(), reasoning_effort: "none".to_string(),
think_native: st.think_native, think_native: st.think_native,
think_tool: st.think_tool, think_tool: st.think_tool,
sampling: st.sampling, temperature: st.temperature,
top_p: st.top_p,
top_k: st.top_k,
activities: Vec::new(), activities: Vec::new(),
next_activity_id: 0, next_activity_id: 0,
pending_yield: false, pending_yield: false,
@ -333,35 +316,35 @@ impl Agent {
}) })
} }
/// Assemble a ready-to-send prompt as interleaved wire chunks for pub async fn assemble_prompt_tokens(&self) -> Vec<u32> {
/// the gRPC session path. Text runs are batched; each Image leaf self.assemble_prompt().await.0
/// becomes its own chunk. Also trims the conversation to budget }
/// first so we don't build a prompt the server will reject for
/// length. /// Assemble a ready-to-send prompt: token stream in wire form (each
pub async fn assemble_prompt(&self) /// image collapsed to a single `<|image_pad|>`) paired with the
-> (Vec<context::WireChunk>, Vec<context::WireImage>, u32) /// images to attach as multi_modal_data.
{ ///
/// Pre-send size check: if the context has grown past budget since the
/// last compact (accumulation between turns, a fork's context getting
/// bigger than expected, etc.), trim here rather than letting vLLM
/// reject the request. Client-side tokenization means we already know
/// the exact token count so there's no reason to round-trip an
/// oversize request.
pub async fn assemble_prompt(&self) -> (Vec<u32>, Vec<context::WireImage>) {
let mut ctx = self.context.lock().await; let mut ctx = self.context.lock().await;
if ctx.total_tokens() > context::context_budget_tokens() { if ctx.total_tokens() > context::context_budget_tokens() {
ctx.trim_conversation(); ctx.trim_conversation();
} }
let st = self.state.lock().await; let st = self.state.lock().await;
let conv_len = ctx.conversation().len(); let (mut tokens, images, _) =
let (mut chunks, images) = ctx.wire_chunks(0..conv_len, |_| false); ctx.wire_prompt(0..ctx.conversation().len(), |_| false);
// Assistant-turn prologue. Merge into the trailing Tokens tokens.push(tokenizer::IM_START);
// chunk if there is one, else push as a new chunk.
let mut prologue = vec![tokenizer::IM_START];
if st.think_native { if st.think_native {
prologue.extend(tokenizer::encode("assistant\n<think>\n")); tokens.extend(tokenizer::encode("assistant\n<think>\n"));
} else { } else {
prologue.extend(tokenizer::encode("assistant\n")); tokens.extend(tokenizer::encode("assistant\n"));
} }
match chunks.last_mut() { (tokens, images)
Some(context::WireChunk::Tokens(last)) => last.extend(prologue),
_ => chunks.push(context::WireChunk::Tokens(prologue)),
}
let match_upto = ctx.client_match_upto();
(chunks, images, match_upto)
} }
/// Rebuild the tools section of the system prompt from the current tools list. /// Rebuild the tools section of the system prompt from the current tools list.
@ -397,16 +380,10 @@ impl Agent {
pub async fn turn( pub async fn turn(
agent: Arc<Agent>, agent: Arc<Agent>,
) -> Result<TurnResult> { ) -> Result<TurnResult> {
agent_trace(&agent, format!("turn start")).await;
// Collect finished background tools // Collect finished background tools
{ {
let finished = agent.state.lock().await.active_tools.take_finished(); let finished = agent.state.lock().await.active_tools.take_finished();
if !finished.is_empty() { if !finished.is_empty() {
agent_trace(&agent, format!(
"collecting {} finished background tools",
finished.len(),
)).await;
let mut bg_ds = DispatchState::new(); let mut bg_ds = DispatchState::new();
let mut results = Vec::new(); let mut results = Vec::new();
for entry in finished { for entry in finished {
@ -425,50 +402,21 @@ impl Agent {
loop { loop {
let _thinking = start_activity(&agent, "thinking...").await; let _thinking = start_activity(&agent, "thinking...").await;
agent_trace(&agent, format!(
"turn loop overflow_retries={} empty_retries={}",
overflow_retries, empty_retries,
)).await;
let (rx, _stream_guard) = { let (rx, _stream_guard) = {
agent_trace(&agent, format!("assembling prompt")).await; let (prompt_tokens, images) = agent.assemble_prompt().await;
let (chunks, images, match_upto) = agent.assemble_prompt().await;
let chunk_tokens: usize = chunks.iter().map(|c| match c {
context::WireChunk::Tokens(t) => t.len(),
}).sum();
agent_trace(&agent, format!(
"prompt assembled chunks={} tokens={} images={} match_upto={}",
chunks.len(), chunk_tokens, images.len(), match_upto,
)).await;
let st = agent.state.lock().await; let st = agent.state.lock().await;
let readout_shape = agent.readout.lock().ok().and_then(|buf| { agent.client.stream_completion_mm(
buf.manifest.as_ref().map(|m| { &prompt_tokens,
(m.layers.len() as u32, m.concepts.len() as u32) &images,
}) api::SamplingParams {
}); temperature: st.temperature,
let sampling = st.sampling; top_p: st.top_p,
let priority = st.priority; top_k: st.top_k,
drop(st); },
agent_trace(&agent, format!( st.priority,
"starting stream max_tokens={} temperature={} top_p={} top_k={} priority={:?} readout_shape={:?}",
sampling.max_tokens,
sampling.temperature,
sampling.top_p,
sampling.top_k,
priority,
readout_shape,
)).await;
agent.client.stream_session_mm(
agent.grpc_session.clone(),
chunks,
images,
match_upto,
sampling,
priority,
readout_shape,
) )
}; };
agent_trace(&agent, format!("stream task spawned")).await;
let branch_idx = { let branch_idx = {
let mut ctx = agent.context.lock().await; let mut ctx = agent.context.lock().await;
@ -479,41 +427,11 @@ impl Agent {
idx idx
}; };
let think_native = agent.state.lock().await.think_native; let parser = ResponseParser::new(branch_idx);
let parser = ResponseParser::new(branch_idx, think_native);
let (mut tool_rx, parser_handle) = parser.run(rx, agent.clone()); let (mut tool_rx, parser_handle) = parser.run(rx, agent.clone());
agent_trace(&agent, format!(
"parser started branch_idx={} think_native={}",
branch_idx, think_native,
)).await;
let mut pending_calls: Vec<PendingToolCall> = Vec::new(); let mut pending_calls: Vec<PendingToolCall> = Vec::new();
loop { while let Some(call) = tool_rx.recv().await {
let call = match tokio::time::timeout(
std::time::Duration::from_secs(15),
tool_rx.recv(),
).await {
Ok(Some(call)) => call,
Ok(None) => {
agent_trace(&agent, format!(
"tool channel closed pending_calls={}",
pending_calls.len(),
)).await;
break;
}
Err(_) => {
agent_trace(&agent, format!(
"waiting for parser/tool events pending_calls={}",
pending_calls.len(),
)).await;
continue;
}
};
agent_trace(&agent, format!(
"tool call received id={} name={} args_len={}",
call.id, call.name, call.arguments.len(),
)).await;
let call_clone = call.clone(); let call_clone = call.clone();
let agent_handle = agent.clone(); let agent_handle = agent.clone();
let handle = tokio::spawn(async move { let handle = tokio::spawn(async move {
@ -536,10 +454,8 @@ impl Agent {
} }
// Check for stream/parse errors // Check for stream/parse errors
agent_trace(&agent, format!("awaiting parser task")).await;
match parser_handle.await { match parser_handle.await {
Ok(Err(e)) => { Ok(Err(e)) => {
agent_trace(&agent, format!("parser returned error: {:#}", e)).await;
if context::is_context_overflow(&e) && overflow_retries < 2 { if context::is_context_overflow(&e) && overflow_retries < 2 {
overflow_retries += 1; overflow_retries += 1;
let msg = format!("context overflow — compacting ({}/2)", overflow_retries); let msg = format!("context overflow — compacting ({}/2)", overflow_retries);
@ -553,12 +469,8 @@ impl Agent {
} }
return Err(e); return Err(e);
} }
Err(e) => { Err(e) => return Err(anyhow::anyhow!("parser task panicked: {}", e)),
agent_trace(&agent, format!("parser task panicked: {}", e)).await;
return Err(anyhow::anyhow!("parser task panicked: {}", e));
}
Ok(Ok(())) => { Ok(Ok(())) => {
agent_trace(&agent, format!("parser completed")).await;
// Assistant response was pushed to context by the parser; // Assistant response was pushed to context by the parser;
// log it now that parsing is complete. // log it now that parsing is complete.
let ctx = agent.context.lock().await; let ctx = agent.context.lock().await;
@ -579,10 +491,6 @@ impl Agent {
if !has_content && pending_calls.is_empty() { if !has_content && pending_calls.is_empty() {
if empty_retries < 2 { if empty_retries < 2 {
empty_retries += 1; empty_retries += 1;
agent_trace(&agent, format!(
"empty response retry {}/2",
empty_retries,
)).await;
agent.push_node(AstNode::user_msg( agent.push_node(AstNode::user_msg(
"[system] Your previous response was empty. \ "[system] Your previous response was empty. \
Please respond with text or use a tool." Please respond with text or use a tool."
@ -596,10 +504,6 @@ impl Agent {
// Wait for tool calls to complete // Wait for tool calls to complete
if !pending_calls.is_empty() { if !pending_calls.is_empty() {
ds.had_tool_calls = true; ds.had_tool_calls = true;
agent_trace(&agent, format!(
"waiting for {} foreground tools",
pending_calls.len(),
)).await;
let handles = agent.state.lock().await.active_tools.take_foreground(); let handles = agent.state.lock().await.active_tools.take_foreground();
let mut results = Vec::new(); let mut results = Vec::new();
@ -620,16 +524,6 @@ impl Agent {
if st.pending_model_switch.is_some() { ds.model_switch = st.pending_model_switch.take(); } if st.pending_model_switch.is_some() { ds.model_switch = st.pending_model_switch.take(); }
if st.pending_dmn_pause { ds.dmn_pause = true; st.pending_dmn_pause = false; } if st.pending_dmn_pause { ds.dmn_pause = true; st.pending_dmn_pause = false; }
drop(st);
agent_trace(&agent, format!(
"turn complete yield={} tool_calls={} tool_errors={} model_switch={:?} dmn_pause={}",
ds.yield_requested,
ds.had_tool_calls,
ds.tool_errors,
ds.model_switch,
ds.dmn_pause,
)).await;
return Ok(TurnResult { return Ok(TurnResult {
yield_requested: ds.yield_requested, yield_requested: ds.yield_requested,
had_tool_calls: ds.had_tool_calls, had_tool_calls: ds.had_tool_calls,

View file

@ -12,9 +12,7 @@ use crate::subconscious::{defs, prompts};
use std::collections::HashMap; use std::collections::HashMap;
use std::fs; use std::fs;
use std::io::Write as _;
use std::path::PathBuf; use std::path::PathBuf;
use std::time::Instant;
use super::context::AstNode; use super::context::AstNode;
use super::tools::{self as agent_tools}; use super::tools::{self as agent_tools};
@ -108,10 +106,6 @@ pub async fn save_agent_log(name: &str, agent: &std::sync::Arc<Agent>) -> RunSta
stats stats
} }
fn log_agent_event(agent: &str, msg: std::fmt::Arguments) {
eprintln!("[agent:{agent}] {msg}");
}
fn compute_run_stats(conversation: &[super::context::AstNode]) -> RunStats { fn compute_run_stats(conversation: &[super::context::AstNode]) -> RunStats {
use super::context::{AstNode, NodeBody}; use super::context::{AstNode, NodeBody};
@ -275,7 +269,7 @@ impl AutoAgent {
let mut st = agent.state.lock().await; let mut st = agent.state.lock().await;
st.provenance = format!("standalone:{}", self.name); st.provenance = format!("standalone:{}", self.name);
st.tools = self.tools.clone(); st.tools = self.tools.clone();
st.sampling.temperature = self.temperature; st.temperature = self.temperature;
st.priority = Some(self.priority); st.priority = Some(self.priority);
} }
@ -351,44 +345,20 @@ impl AutoAgent {
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>, bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
) -> Result<(), String> { ) -> Result<(), String> {
dbglog!("[auto] {} starting, {} steps", self.name, self.steps.len()); dbglog!("[auto] {} starting, {} steps", self.name, self.steps.len());
log_agent_event(&self.name, format_args!(
"starting run steps={} temperature={} priority={}",
self.steps.len(), self.temperature, self.priority));
let run_start = Instant::now();
for (i, step) in self.steps.iter().enumerate() { for (i, step) in self.steps.iter().enumerate() {
self.turn = i + 1; self.turn = i + 1;
self.current_phase = step.phase.clone(); self.current_phase = step.phase.clone();
let step_start = Instant::now();
log_agent_event(&self.name, format_args!(
"step {}/{} phase={} prompt_bytes={}",
i + 1, self.steps.len(), step.phase, step.prompt.len()));
if let Some(ref check) = bail_fn { if let Some(ref check) = bail_fn {
log_agent_event(&self.name, format_args!(
"step {}/{} phase={} bail check", i + 1, self.steps.len(), step.phase));
check(i)?; check(i)?;
log_agent_event(&self.name, format_args!(
"step {}/{} phase={} bail ok", i + 1, self.steps.len(), step.phase));
} }
backend.push_node(AstNode::system_msg(&step.prompt)).await; backend.push_node(AstNode::system_msg(&step.prompt)).await;
Agent::turn(backend.0.clone()).await Agent::turn(backend.0.clone()).await
.map_err(|e| { .map_err(|e| format!("{}: {}", self.name, e))?;
log_agent_event(&self.name, format_args!(
"step {}/{} phase={} failed after {:.2}s: {}",
i + 1, self.steps.len(), step.phase,
step_start.elapsed().as_secs_f64(), e));
format!("{}: {}", self.name, e)
})?;
log_agent_event(&self.name, format_args!(
"step {}/{} phase={} done in {:.2}s",
i + 1, self.steps.len(), step.phase,
step_start.elapsed().as_secs_f64()));
} }
log_agent_event(&self.name, format_args!(
"run completed in {:.2}s", run_start.elapsed().as_secs_f64()));
Ok(()) Ok(())
} }
@ -412,29 +382,8 @@ pub async fn run_one_agent(
count: usize, count: usize,
keys: Option<&[String]>, keys: Option<&[String]>,
) -> Result<AgentResult, String> { ) -> Result<AgentResult, String> {
let run_start = Instant::now();
log_agent_event(agent_name, format_args!(
"run_one_agent start pid={} count={} explicit_keys={}",
std::process::id(), count, keys.map(|k| k.len()).unwrap_or(0)));
log_agent_event(agent_name, format_args!(
"env POC_SESSION_ID={:?} POC_TRANSCRIPT_PATH={:?} POC_AGENT_OUTPUT_DIR={:?}",
std::env::var("POC_SESSION_ID").ok(),
std::env::var("POC_TRANSCRIPT_PATH").ok(),
std::env::var("POC_AGENT_OUTPUT_DIR").ok()));
if let Some(session) = crate::session::HookSession::from_env() {
let transcript = session.transcript();
log_agent_event(agent_name, format_args!(
"session={} transcript={} size={} exists={}",
session.session_id, transcript.path, transcript.size, transcript.exists()));
} else {
log_agent_event(agent_name, format_args!("no hook session in environment"));
}
let def = defs::get_def(agent_name) let def = defs::get_def(agent_name)
.ok_or_else(|| format!("no .agent file for {}", agent_name))?; .ok_or_else(|| format!("no .agent file for {}", agent_name))?;
log_agent_event(agent_name, format_args!(
"definition loaded steps={} tools={:?} count={:?} priority={} bail={:?}",
def.steps.len(), def.tools, def.count, def.priority, def.bail));
// State dir for agent output files // State dir for agent output files
let state_dir = std::env::var("POC_AGENT_OUTPUT_DIR") let state_dir = std::env::var("POC_AGENT_OUTPUT_DIR")
@ -443,7 +392,6 @@ pub async fn run_one_agent(
fs::create_dir_all(&state_dir) fs::create_dir_all(&state_dir)
.map_err(|e| format!("create state dir: {}", e))?; .map_err(|e| format!("create state dir: {}", e))?;
unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &state_dir); } unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &state_dir); }
log_agent_event(agent_name, format_args!("state_dir={}", state_dir.display()));
// Build prompt batch — either from explicit keys or the agent's query // Build prompt batch — either from explicit keys or the agent's query
let agent_batch = if let Some(keys) = keys { let agent_batch = if let Some(keys) = keys {
@ -463,8 +411,6 @@ pub async fn run_one_agent(
prompts::AgentBatch { steps: resolved_steps, node_keys: all_keys } prompts::AgentBatch { steps: resolved_steps, node_keys: all_keys }
} else { } else {
let effective_count = def.count.unwrap_or(count); let effective_count = def.count.unwrap_or(count);
log_agent_event(agent_name, format_args!(
"resolving default prompt placeholders effective_count={}", effective_count));
defs::run_agent(&def, effective_count, &Default::default()).await? defs::run_agent(&def, effective_count, &Default::default()).await?
}; };
@ -517,14 +463,6 @@ pub async fn run_one_agent(
})), })),
}); });
let n_steps = agent_batch.steps.len(); let n_steps = agent_batch.steps.len();
log_agent_event(agent_name, format_args!(
"prompt batch ready steps={} node_keys={}",
n_steps, agent_batch.node_keys.len()));
for (i, step) in agent_batch.steps.iter().enumerate() {
log_agent_event(agent_name, format_args!(
"prompt step {}/{} phase={} bytes={}",
i + 1, n_steps, step.phase, step.prompt.len()));
}
// Guard: reject oversized first prompt // Guard: reject oversized first prompt
let max_prompt_bytes = 800_000; let max_prompt_bytes = 800_000;
@ -547,9 +485,6 @@ pub async fn run_one_agent(
let phases: Vec<&str> = agent_batch.steps.iter().map(|s| s.phase.as_str()).collect(); let phases: Vec<&str> = agent_batch.steps.iter().map(|s| s.phase.as_str()).collect();
dbglog!("[{}] {} step(s) {:?}, {}KB initial, {} nodes", dbglog!("[{}] {} step(s) {:?}, {}KB initial, {} nodes",
agent_name, n_steps, phases, first_len / 1024, agent_batch.node_keys.len()); agent_name, n_steps, phases, first_len / 1024, agent_batch.node_keys.len());
log_agent_event(agent_name, format_args!(
"tools enabled: {}",
effective_tools.iter().map(|t| t.name).collect::<Vec<_>>().join(", ")));
let prompts: Vec<String> = agent_batch.steps.iter() let prompts: Vec<String> = agent_batch.steps.iter()
.map(|s| s.prompt.clone()).collect(); .map(|s| s.prompt.clone()).collect();
@ -562,25 +497,18 @@ pub async fn run_one_agent(
let bail_script = def.bail.as_ref().map(|name| defs::agents_dir().join(name)); let bail_script = def.bail.as_ref().map(|name| defs::agents_dir().join(name));
let state_dir_for_bail = state_dir.clone(); let state_dir_for_bail = state_dir.clone();
let our_pid = std::process::id(); let our_pid = std::process::id();
let our_pid_file = std::env::var("POC_AGENT_PID_FILE") let our_pid_file = format!("pid-{}", our_pid);
.unwrap_or_else(|_| format!("pid-{}", our_pid));
let step_phases_for_bail = step_phases.clone(); let step_phases_for_bail = step_phases.clone();
let bail_fn = move |step_idx: usize| -> Result<(), String> { let bail_fn = move |step_idx: usize| -> Result<(), String> {
if let Some(ref script) = bail_script { if let Some(ref script) = bail_script {
let phase = step_phases_for_bail.get(step_idx) let phase = step_phases_for_bail.get(step_idx)
.map(String::as_str).unwrap_or(""); .map(String::as_str).unwrap_or("");
eprintln!(
"[agent:bail] script={} state_dir={} pid_file={} phase={}",
script.display(), state_dir_for_bail.display(), our_pid_file, phase);
let status = std::process::Command::new(script) let status = std::process::Command::new(script)
.arg(&our_pid_file) .arg(&our_pid_file)
.arg(phase) .arg(phase)
.current_dir(&state_dir_for_bail) .current_dir(&state_dir_for_bail)
.status() .status()
.map_err(|e| format!("bail script {:?} failed: {}", script, e))?; .map_err(|e| format!("bail script {:?} failed: {}", script, e))?;
eprintln!(
"[agent:bail] script={} phase={} status={}",
script.display(), phase, status);
if !status.success() { if !status.success() {
return Err(format!("bailed at step {}: {:?} exited {}", return Err(format!("bailed at step {}: {:?} exited {}",
step_idx + 1, script.file_name().unwrap_or_default(), step_idx + 1, script.file_name().unwrap_or_default(),
@ -593,8 +521,6 @@ pub async fn run_one_agent(
call_api_with_tools_sync( call_api_with_tools_sync(
agent_name, &prompts, &step_phases, def.temperature, def.priority, agent_name, &prompts, &step_phases, def.temperature, def.priority,
&effective_tools, Some(&bail_fn))?; &effective_tools, Some(&bail_fn))?;
log_agent_event(agent_name, format_args!(
"run_one_agent completed in {:.2}s", run_start.elapsed().as_secs_f64()));
Ok(AgentResult { Ok(AgentResult {
node_keys: agent_batch.node_keys, node_keys: agent_batch.node_keys,
@ -672,15 +598,6 @@ pub fn spawn_agent(
agent_name: &str, agent_name: &str,
state_dir: &std::path::Path, state_dir: &std::path::Path,
session_id: &str, session_id: &str,
) -> Option<SpawnResult> {
spawn_agent_with_transcript(agent_name, state_dir, session_id, None)
}
pub fn spawn_agent_with_transcript(
agent_name: &str,
state_dir: &std::path::Path,
session_id: &str,
transcript_path: Option<&str>,
) -> Option<SpawnResult> { ) -> Option<SpawnResult> {
let def = defs::get_def(agent_name)?; let def = defs::get_def(agent_name)?;
let first_phase = def.steps.first() let first_phase = def.steps.first()
@ -691,41 +608,17 @@ pub fn spawn_agent_with_transcript(
.join(format!(".consciousness/logs/{}", agent_name)); .join(format!(".consciousness/logs/{}", agent_name));
fs::create_dir_all(&log_dir).ok(); fs::create_dir_all(&log_dir).ok();
let log_path = log_dir.join(format!("{}.log", store::compact_timestamp())); let log_path = log_dir.join(format!("{}.log", store::compact_timestamp()));
let mut agent_log = fs::File::create(&log_path) let agent_log = fs::File::create(&log_path)
.unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()); .unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
let mut cmd = std::process::Command::new("bash"); let child = std::process::Command::new("poc-memory")
cmd.args([ .args(["agent", "run", agent_name, "--count", "1", "--local",
"-lc", "--state-dir", &state_dir.to_string_lossy()])
r#" .env("POC_SESSION_ID", session_id)
set +e .stdout(agent_log.try_clone().unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()))
export POC_AGENT_PID_FILE="pid-$$" .stderr(agent_log)
"$@" .spawn()
status=$? .ok()?;
printf '=== agent process exit status: %s at %s ===\n' "$status" "$(date --iso-8601=seconds)"
exit "$status"
"#,
"poc-memory-agent-wrapper",
"poc-memory", "agent", "run", agent_name, "--count", "1", "--local",
"--state-dir", &state_dir.to_string_lossy(),
]).env("POC_SESSION_ID", session_id);
if let Some(path) = transcript_path.filter(|p| !p.is_empty()) {
cmd.env("POC_TRANSCRIPT_PATH", path);
}
let _ = writeln!(agent_log, "=== spawn {} ===", chrono::Local::now().format("%Y-%m-%dT%H:%M:%S"));
let _ = writeln!(agent_log, "agent={agent_name}");
let _ = writeln!(agent_log, "state_dir={}", state_dir.display());
let _ = writeln!(agent_log, "session_id={session_id}");
let _ = writeln!(agent_log, "transcript_path={}", transcript_path.unwrap_or(""));
let _ = writeln!(agent_log, "first_phase={first_phase}");
let _ = writeln!(agent_log, "command=poc-memory agent run {agent_name} --count 1 --local --state-dir {}", state_dir.display());
let _ = agent_log.flush();
let child_stdout = agent_log.try_clone()
.unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
let child_stderr = agent_log;
let child = cmd.stdout(child_stdout).stderr(child_stderr).spawn().ok()?;
let pid = child.id(); let pid = child.id();
let pid_path = state_dir.join(format!("pid-{}", pid)); let pid_path = state_dir.join(format!("pid-{}", pid));

View file

@ -1,309 +0,0 @@
// agent/salience.rs — peak extraction from per-token concept-readout traces.
//
// Consumes a trace of `ReadoutEntry` (per-token per-layer per-concept
// projections streamed from the vLLM server) and produces a compact
// list of `SaliencePeak` events — one per contiguous above-threshold
// region per concept, placed at the local maximum.
//
// Pure function. No I/O, no async, no side effects. Caller supplies the
// trace slice and manifest; caller decides what to do with the events.
//
// See also: `salience-trace-plumbing-architecture` memory node.
use super::api::ReadoutManifest;
use super::readout::ReadoutEntry;
/// One salient moment in a trace — a concept channel crossed threshold,
/// and we picked the local maximum within the contiguous above-threshold
/// run.
#[derive(Debug, Clone, PartialEq)]
pub struct SaliencePeak {
/// Index into the trace (0-based) where the peak occurred.
pub token_offset: usize,
/// Concept name from the manifest.
pub concept: String,
/// z-score of the peak value vs the trace's own distribution for
/// that concept. Always positive (we only pick above-threshold).
pub intensity: f32,
}
/// Tunables for peak extraction.
#[derive(Debug, Clone)]
pub struct PeakConfig {
/// Minimum z-score to count as a peak. Default 2.0 (~top 2.5% assuming
/// normal-ish distribution, though readouts are rarely normal).
pub sigma_threshold: f32,
/// Minimum standard deviation of a concept channel for peaks to be
/// reported. If a channel is numerically flat across the whole trace,
/// tiny fluctuations can produce spurious "peaks" with huge z-scores;
/// require at least this much variation before trusting the channel.
pub min_std: f32,
}
impl Default for PeakConfig {
fn default() -> Self {
Self { sigma_threshold: 2.0, min_std: 1e-4 }
}
}
/// Extract peak events from a trace for one layer.
///
/// `layer_idx` indexes into the per-token readout tensor's layer
/// dimension. If the trace is empty, the layer is out of range for any
/// entry, or the manifest is empty, returns `Vec::new()`.
///
/// Peaks are returned sorted by `token_offset` ascending. When two
/// peaks share an offset they're ordered by `concept` lexicographically
/// for determinism.
pub fn pick_peaks(
trace: &[ReadoutEntry],
manifest: &ReadoutManifest,
layer_idx: usize,
config: &PeakConfig,
) -> Vec<SaliencePeak> {
if trace.is_empty() || manifest.concepts.is_empty() {
return Vec::new();
}
let n_concepts = manifest.concepts.len();
let n_tokens = trace.len();
// Pull a [n_tokens × n_concepts] column-major view for the selected
// layer. Entries where the layer is missing or the concept count
// doesn't match the manifest are treated as zeros — the downstream
// z-score will drown them as baseline if they're sparse, and if they
// dominate the caller has bigger problems.
let mut by_concept: Vec<Vec<f32>> = vec![Vec::with_capacity(n_tokens); n_concepts];
for entry in trace {
match entry.readout.get(layer_idx) {
Some(row) if row.len() == n_concepts => {
for (c, v) in row.iter().enumerate() {
by_concept[c].push(*v);
}
}
_ => {
for col in by_concept.iter_mut() {
col.push(0.0);
}
}
}
}
let mut peaks: Vec<SaliencePeak> = Vec::new();
for (c_idx, values) in by_concept.iter().enumerate() {
let (mean, std) = mean_std(values);
if std < config.min_std {
continue;
}
let concept = &manifest.concepts[c_idx];
// Walk contiguous above-threshold runs, emit one peak per run
// at the local max.
let mut run_start: Option<usize> = None;
let mut run_max_offset: usize = 0;
let mut run_max_z: f32 = 0.0;
for (i, v) in values.iter().enumerate() {
let z = (*v - mean) / std;
let above = z >= config.sigma_threshold;
if above {
if run_start.is_none() {
run_start = Some(i);
run_max_offset = i;
run_max_z = z;
} else if z > run_max_z {
run_max_offset = i;
run_max_z = z;
}
} else if run_start.is_some() {
peaks.push(SaliencePeak {
token_offset: run_max_offset,
concept: concept.clone(),
intensity: run_max_z,
});
run_start = None;
}
}
// Flush trailing run.
if run_start.is_some() {
peaks.push(SaliencePeak {
token_offset: run_max_offset,
concept: concept.clone(),
intensity: run_max_z,
});
}
}
peaks.sort_by(|a, b| a.token_offset.cmp(&b.token_offset).then_with(|| a.concept.cmp(&b.concept)));
peaks
}
/// Mean and population std of a slice. Returns (0.0, 0.0) for empty input.
fn mean_std(xs: &[f32]) -> (f32, f32) {
if xs.is_empty() {
return (0.0, 0.0);
}
let n = xs.len() as f32;
let mean = xs.iter().sum::<f32>() / n;
let var = xs.iter().map(|x| (x - mean).powi(2)).sum::<f32>() / n;
(mean, var.sqrt())
}
#[cfg(test)]
mod tests {
use super::*;
fn manifest(concepts: &[&str], layers: &[u32]) -> ReadoutManifest {
ReadoutManifest {
concepts: concepts.iter().map(|s| s.to_string()).collect(),
layers: layers.to_vec(),
}
}
/// Build a trace where all entries have one hooked layer and the
/// given per-token values for each concept. `values[t][c]` = value
/// at token t, concept c.
fn trace(values: &[Vec<f32>]) -> Vec<ReadoutEntry> {
values.iter().enumerate().map(|(i, row)| ReadoutEntry {
token_id: i as u32,
readout: vec![row.clone()],
}).collect()
}
#[test]
fn empty_trace_returns_empty() {
let m = manifest(&["curious"], &[63]);
let peaks = pick_peaks(&[], &m, 0, &PeakConfig::default());
assert!(peaks.is_empty());
}
#[test]
fn empty_manifest_returns_empty() {
let m = manifest(&[], &[63]);
let t = trace(&[vec![], vec![], vec![]]);
let peaks = pick_peaks(&t, &m, 0, &PeakConfig::default());
assert!(peaks.is_empty());
}
#[test]
fn flat_channel_produces_no_peaks() {
let m = manifest(&["curious"], &[63]);
let t = trace(&[vec![1.0], vec![1.0], vec![1.0], vec![1.0], vec![1.0]]);
let peaks = pick_peaks(&t, &m, 0, &PeakConfig::default());
assert!(peaks.is_empty(), "flat channel should produce no peaks, got {:?}", peaks);
}
#[test]
fn single_spike_detected() {
// Ten baseline zeros with one 5.0 spike — that single token's
// z-score will easily exceed 2σ.
let m = manifest(&["curious"], &[63]);
let mut rows: Vec<Vec<f32>> = (0..10).map(|_| vec![0.0]).collect();
rows[5] = vec![5.0];
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
assert_eq!(peaks.len(), 1);
assert_eq!(peaks[0].concept, "curious");
assert_eq!(peaks[0].token_offset, 5);
assert!(peaks[0].intensity >= 2.0);
}
#[test]
fn contiguous_region_emits_one_peak_at_max() {
// Values 0, 0, 0, 2, 5, 3, 0, 0 — the 3-5-3 hump is one run;
// peak should land at offset 4 (the 5).
let m = manifest(&["aha"], &[63]);
let rows: Vec<Vec<f32>> = [0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 0.0, 0.0]
.iter().map(|v| vec![*v]).collect();
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
assert_eq!(peaks.len(), 1, "expected one peak for one contiguous run, got {:?}", peaks);
assert_eq!(peaks[0].token_offset, 4);
}
#[test]
fn multiple_concepts_independent() {
let m = manifest(&["curious", "aha"], &[63]);
// curious spikes at 2, aha spikes at 7
let rows: Vec<Vec<f32>> = (0..10).map(|i| {
let c = if i == 2 { 4.0 } else { 0.0 };
let a = if i == 7 { 4.0 } else { 0.0 };
vec![c, a]
}).collect();
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
assert_eq!(peaks.len(), 2);
// Sorted by offset — curious(2) comes first, aha(7) second.
assert_eq!(peaks[0].concept, "curious");
assert_eq!(peaks[0].token_offset, 2);
assert_eq!(peaks[1].concept, "aha");
assert_eq!(peaks[1].token_offset, 7);
}
#[test]
fn two_separated_runs_emit_two_peaks() {
// Longer baseline so the two spikes don't dominate the global
// mean/std — 30 tokens of zeros with two 5.0 spikes at 10 and 20.
let m = manifest(&["curious"], &[63]);
let mut rows: Vec<Vec<f32>> = (0..30).map(|_| vec![0.0]).collect();
rows[10] = vec![5.0];
rows[20] = vec![5.0];
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
assert_eq!(peaks.len(), 2, "expected two peaks for two runs, got {:?}", peaks);
assert_eq!(peaks[0].token_offset, 10);
assert_eq!(peaks[1].token_offset, 20);
}
#[test]
fn trailing_run_is_flushed() {
// Peak runs to the end of the trace — must still emit.
// Use a longer baseline so the trailing spike is genuinely
// above threshold on the global stats.
let m = manifest(&["curious"], &[63]);
let mut rows: Vec<Vec<f32>> = (0..30).map(|_| vec![0.0]).collect();
rows[27] = vec![3.0];
rows[28] = vec![5.0];
rows[29] = vec![4.0];
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
assert_eq!(peaks.len(), 1, "expected one peak for one trailing run, got {:?}", peaks);
assert_eq!(peaks[0].token_offset, 28, "peak should land at the local max of the trailing run");
}
#[test]
fn sub_threshold_produces_nothing() {
// All non-zero values are small; z-scores won't cross 2σ.
let m = manifest(&["curious"], &[63]);
let rows: Vec<Vec<f32>> = [0.0, 0.1, 0.0, 0.1, 0.0, 0.1, 0.0, 0.1]
.iter().map(|v| vec![*v]).collect();
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
assert!(peaks.is_empty(), "below-threshold wiggle should produce no peaks, got {:?}", peaks);
}
#[test]
fn layer_out_of_range_returns_empty() {
let m = manifest(&["curious"], &[63]);
let rows: Vec<Vec<f32>> = (0..10).map(|i| vec![if i == 5 { 5.0 } else { 0.0 }]).collect();
// Trace has one layer (index 0); asking for layer 3 should see
// all-zero columns, which are flat and produce no peaks.
let peaks = pick_peaks(&trace(&rows), &m, 3, &PeakConfig::default());
assert!(peaks.is_empty());
}
#[test]
fn manifest_concept_count_mismatch_is_safe() {
// Manifest says 2 concepts; each readout row only has 1 value.
// Rows should be treated as all-zero (via the len check) and
// produce no peaks without panicking.
let m = manifest(&["a", "b"], &[63]);
let rows: Vec<Vec<f32>> = (0..10).map(|_| vec![1.0]).collect();
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
assert!(peaks.is_empty());
}
#[test]
fn threshold_tunable() {
// Same spike, stricter threshold — no peak.
let m = manifest(&["curious"], &[63]);
let mut rows: Vec<Vec<f32>> = (0..10).map(|_| vec![0.0]).collect();
rows[5] = vec![5.0];
let strict = PeakConfig { sigma_threshold: 100.0, ..PeakConfig::default() };
let peaks = pick_peaks(&trace(&rows), &m, 0, &strict);
assert!(peaks.is_empty());
}
}

View file

@ -33,17 +33,16 @@ fn get() -> Option<&'static Tokenizer> {
TOKENIZER.get() TOKENIZER.get()
} }
fn expect_tokenizer() -> &'static Tokenizer {
get().expect("tokenizer not initialized; expected ~/.consciousness/tokenizer-qwen35.json")
}
/// Tokenize a raw string, returning token IDs. /// Tokenize a raw string, returning token IDs.
/// Returns empty vec if the tokenizer is not initialized.
pub fn encode(text: &str) -> Vec<u32> { pub fn encode(text: &str) -> Vec<u32> {
expect_tokenizer() match get() {
.encode(text, false) Some(t) => t.encode(text, false)
.unwrap_or_else(|e| panic!("tokenization failed: {}", e)) .unwrap_or_else(|e| panic!("tokenization failed: {}", e))
.get_ids() .get_ids()
.to_vec() .to_vec(),
None => vec![],
}
} }
/// Tokenize a chat entry with template wrapping: /// Tokenize a chat entry with template wrapping:
@ -67,12 +66,15 @@ pub fn count(text: &str) -> usize {
/// Decode token IDs back to text. /// Decode token IDs back to text.
pub fn decode(ids: &[u32]) -> String { pub fn decode(ids: &[u32]) -> String {
expect_tokenizer() match get() {
.decode(ids, true) Some(t) => t.decode(ids, true)
.unwrap_or_else(|e| panic!("detokenization failed: {}", e)) .unwrap_or_else(|e| panic!("detokenization failed: {}", e)),
None => String::new(),
}
} }
/// Check if the tokenizer is initialized. /// Check if the tokenizer is initialized.
pub fn is_initialized() -> bool { pub fn is_initialized() -> bool {
TOKENIZER.get().is_some() TOKENIZER.get().is_some()
} }

View file

@ -209,24 +209,7 @@ memory_tool!(graph_trace, ref, key: [str]);
// ── Definitions ──────────────────────────────────────────────── // ── Definitions ────────────────────────────────────────────────
async fn jsonargs_memory_new(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> { pub fn memory_tools() -> [super::Tool; 20] {
jsonargs_memory_write(agent, args).await
}
async fn jsonargs_memory_link(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> {
let source = get_str(args, "source")?;
let target = get_str(args, "target")?;
if args.get("strength").and_then(|v| v.as_f64()).is_some() {
jsonargs_memory_link_set(agent, args).await
} else {
jsonargs_memory_link_add(agent, &serde_json::json!({
"source": source,
"target": target,
})).await
}
}
pub fn memory_tools() -> [super::Tool; 22] {
use super::Tool; use super::Tool;
macro_rules! tool { macro_rules! tool {
($name:ident, $desc:expr, $params:expr) => { ($name:ident, $desc:expr, $params:expr) => {
@ -251,11 +234,6 @@ pub fn memory_tools() -> [super::Tool; 22] {
"properties": { "key": {"type": "string"}, "content": {"type": "string"} }, "properties": { "key": {"type": "string"}, "content": {"type": "string"} },
"required": ["key", "content"] "required": ["key", "content"]
}"#), }"#),
tool!(memory_new, "Create or update a memory node. Alias for memory_write.", r#"{
"type": "object",
"properties": { "key": {"type": "string"}, "content": {"type": "string"} },
"required": ["key", "content"]
}"#),
tool!(memory_search, "Search via spreading activation from seed keys.", r#"{ tool!(memory_search, "Search via spreading activation from seed keys.", r#"{
"type": "object", "type": "object",
"properties": { "properties": {
@ -286,16 +264,6 @@ pub fn memory_tools() -> [super::Tool; 22] {
"properties": { "source": {"type": "string"}, "target": {"type": "string"} }, "properties": { "source": {"type": "string"}, "target": {"type": "string"} },
"required": ["source", "target"] "required": ["source", "target"]
}"#), }"#),
tool!(memory_link, "Add or update a link between two memory nodes. Alias for memory_link_add/memory_link_set.", r#"{
"type": "object",
"properties": {
"source": {"type": "string"},
"target": {"type": "string"},
"strength": {"type": "number", "description": "Optional; 0.01 to 1.0"},
"label": {"type": "string", "description": "Accepted for compatibility; currently ignored"}
},
"required": ["source", "target"]
}"#),
tool!(memory_delete, "Soft-delete a node.", r#"{ tool!(memory_delete, "Soft-delete a node.", r#"{
"type": "object", "type": "object",
"properties": { "key": {"type": "string"} }, "properties": { "key": {"type": "string"} },

View file

@ -57,18 +57,15 @@ async fn view_image(
let (w, h) = (dim.width as u32, dim.height as u32); let (w, h) = (dim.width as u32, dim.height as u32);
let mime = mime_from_extension(path); let mime = mime_from_extension(path);
let agent = agent.context("view_image requires agent context")?;
// token_count is populated when the image reaches the server via
// AppendImage (the server is authoritative for the IMAGE_PAD
// count). Placeholder of 0 here until AppendImage is wired; the
// leaf's count gets rewritten from the RPC response at send time.
let image_leaf = AstNode::image(bytes.clone(), mime, h, w); let image_leaf = AstNode::image(bytes.clone(), mime, h, w);
let token_count = image_leaf.leaf().unwrap().tokens().saturating_sub(2);
let agent = agent.context("view_image requires agent context")?;
let branch = AstNode::branch(Role::User, vec![image_leaf]); let branch = AstNode::branch(Role::User, vec![image_leaf]);
agent.context.lock().await.push_log(Section::Conversation, branch); agent.context.lock().await.push_log(Section::Conversation, branch);
Ok(format!("loaded {} ({}, {}x{})", a.file_path, mime, w, h)) Ok(format!("loaded {} ({}, {}x{}, {} tokens)",
a.file_path, mime, w, h, token_count))
} }
fn mime_from_extension(path: &std::path::Path) -> &'static str { fn mime_from_extension(path: &std::path::Path) -> &'static str {

View file

@ -1,112 +0,0 @@
// `ch` — minimal channel CLI.
//
// ch send <channel-path> <message>
// ch recv <channel-path> [--all-new] [--min-count N]
//
// Connects to ~/.consciousness/channels/<top>.sock and speaks the
// channel.capnp protocol to the appropriate daemon.
use std::path::PathBuf;
use std::process::ExitCode;
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
use futures::AsyncReadExt;
use tokio_util::compat::TokioAsyncReadCompatExt;
use consciousness::channel_capnp::channel_server;
fn channels_dir() -> PathBuf {
dirs::home_dir().unwrap_or_default().join(".consciousness/channels")
}
fn sock_for(channel: &str) -> PathBuf {
let top = channel.split('.').next().unwrap_or(channel);
channels_dir().join(format!("{top}.sock"))
}
async fn connect(sock: &std::path::Path) -> Result<channel_server::Client, String> {
let stream = tokio::net::UnixStream::connect(sock).await
.map_err(|e| format!("connect {}: {e}", sock.display()))?;
let (reader, writer) = stream.compat().split();
let network = Box::new(twoparty::VatNetwork::new(
futures::io::BufReader::new(reader),
futures::io::BufWriter::new(writer),
rpc_twoparty_capnp::Side::Client,
Default::default(),
));
let mut rpc = RpcSystem::new(network, None);
let client: channel_server::Client = rpc.bootstrap(rpc_twoparty_capnp::Side::Server);
tokio::task::spawn_local(rpc);
Ok(client)
}
#[tokio::main(flavor = "current_thread")]
async fn main() -> ExitCode {
let args: Vec<String> = std::env::args().collect();
if args.len() < 2 {
eprintln!("usage: {} <send|recv> <channel> [args...]", args[0]);
return ExitCode::from(2);
}
let cmd = args[1].clone();
let local = tokio::task::LocalSet::new();
let result: Result<(), String> = local.run_until(async move {
match cmd.as_str() {
"send" => {
if args.len() < 4 {
return Err("usage: ch send <channel> <message...>".into());
}
let channel = &args[2];
let message = args[3..].join(" ");
let sock = sock_for(channel);
let client = connect(&sock).await?;
let mut req = client.send_request();
req.get().set_channel(channel);
req.get().set_message(&message);
req.send().promise.await.map_err(|e| format!("send: {e}"))?;
println!("sent to {channel}");
Ok(())
}
"recv" => {
if args.len() < 3 {
return Err("usage: ch recv <channel> [--all-new] [--min-count N]".into());
}
let channel = &args[2];
let mut all_new = false;
let mut min_count: u32 = 20;
let mut i = 3;
while i < args.len() {
match args[i].as_str() {
"--all-new" => { all_new = true; i += 1; }
"--min-count" => {
min_count = args.get(i+1)
.ok_or("--min-count needs an argument")?
.parse().map_err(|e| format!("--min-count: {e}"))?;
i += 2;
}
other => return Err(format!("unknown arg: {other}")),
}
}
let sock = sock_for(channel);
let client = connect(&sock).await?;
let mut req = client.recv_request();
req.get().set_channel(channel);
req.get().set_all_new(all_new);
req.get().set_min_count(min_count);
let reply = req.send().promise.await.map_err(|e| format!("recv: {e}"))?;
let text = reply.get().map_err(|e| e.to_string())?
.get_text().map_err(|e| e.to_string())?
.to_str().map_err(|e| e.to_string())?;
print!("{text}");
if !text.ends_with('\n') { println!(); }
Ok(())
}
other => Err(format!("unknown command: {other} (use send|recv)")),
}
}).await;
match result {
Ok(()) => ExitCode::SUCCESS,
Err(e) => { eprintln!("error: {e}"); ExitCode::from(1) }
}
}

View file

@ -1,28 +1,7 @@
#![cfg_attr(feature = "nightly-diagnostics", feature(panic_backtrace_config))] #![feature(panic_backtrace_config)]
#![warn(unreachable_pub)] #![warn(unreachable_pub)]
fn main() { fn main() {
// Force the default panic hook to print a backtrace. stderr is
// already redirected to a daemon log; without this the hook obeys
// RUST_BACKTRACE (unset by default), so the log only shows the
// "note: run with `RUST_BACKTRACE=full`" tail and the actual
// frames are lost.
//
// SAFETY: called before any other thread is spawned, so no
// concurrent env reader can race.
if std::env::var_os("RUST_BACKTRACE").is_none() {
unsafe { std::env::set_var("RUST_BACKTRACE", "1"); }
}
#[cfg(feature = "nightly-diagnostics")]
std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short); std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short);
// rustls 0.23 requires an explicit process-wide CryptoProvider
// when both `ring` and `aws-lc-rs` are in the dep graph (otherwise
// it panics on first ClientConfig::builder()). Pick `ring`.
rustls::crypto::ring::default_provider()
.install_default()
.expect("install rustls crypto provider");
consciousness::user::main() consciousness::user::main()
} }

View file

@ -4,93 +4,44 @@ use anyhow::Result;
use crate::hippocampus as memory; use crate::hippocampus as memory;
use crate::hippocampus::store; use crate::hippocampus::store;
struct DefaultMemoryNode { fn install_default_file(data_dir: &std::path::Path, name: &str, content: &str) -> Result<()> {
key: &'static str, let path = data_dir.join(name);
filename: &'static str, if !path.exists() {
default_content: &'static str, std::fs::write(&path, content)?;
} println!("Created {}", path.display());
const DEFAULT_MEMORY_NODES: &[DefaultMemoryNode] = &[
DefaultMemoryNode {
key: "identity",
filename: "identity.md",
default_content: include_str!("../../defaults/identity.md"),
},
DefaultMemoryNode {
key: "on-consciousness",
filename: "on-consciousness.md",
default_content: include_str!("../../defaults/on-consciousness.md"),
},
DefaultMemoryNode {
key: "memory-instructions-core",
filename: "instructions.md",
default_content: include_str!("../../defaults/instructions.md"),
},
];
pub fn cmd_transcript_tail(path: &str, count: usize, newest_first: bool) -> Result<()> {
let Some(iter) = crate::conversation::TailMessages::open(path) else {
anyhow::bail!("could not open transcript {}", path);
};
let mut messages: Vec<_> = iter.take(count).collect();
if !newest_first {
messages.reverse();
} }
for message in messages {
let role = match message.role {
crate::conversation::TranscriptRole::User => "user",
crate::conversation::TranscriptRole::Assistant => "assistant",
};
let timestamp = message.timestamp.as_deref().unwrap_or("-");
println!("--- {role} offset={} timestamp={} ---", message.offset, timestamp);
println!("{}", message.text);
println!();
}
Ok(()) Ok(())
} }
fn default_node_content(cfg: &crate::config::Config, node: &DefaultMemoryNode) -> String {
let identity_path = cfg.identity_dir.join(node.filename);
if let Ok(content) = std::fs::read_to_string(&identity_path) {
if !content.trim().is_empty() {
return content;
}
}
let data_path = cfg.data_dir.join(node.filename);
if let Ok(content) = std::fs::read_to_string(&data_path) {
if !content.trim().is_empty() {
return content;
}
}
node.default_content.to_string()
}
pub async fn cmd_init() -> Result<()> { pub async fn cmd_init() -> Result<()> {
let cfg = crate::config::get(); let cfg = crate::config::get();
// Ensure data directory exists // Ensure data directory exists
std::fs::create_dir_all(&cfg.data_dir)?; std::fs::create_dir_all(&cfg.data_dir)?;
// Seed default memory nodes if missing. These used to live as markdown // Install filesystem files (not store nodes)
// files before identity/context moved fully into the memory graph. install_default_file(&cfg.data_dir, "instructions.md",
for node in DEFAULT_MEMORY_NODES { include_str!("../../defaults/instructions.md"))?;
if memory::memory_render(None, node.key, Some(true)).await.is_err() { install_default_file(&cfg.data_dir, "on-consciousness.md",
let content = default_node_content(&cfg, node); include_str!("../../defaults/on-consciousness.md"))?;
let _ = memory::memory_write(None, node.key, &content).await?;
println!("Seeded {} in store from {}", node.key, node.filename); // Seed identity node if empty
} let store = memory::access_local()?;
if !store.contains_key("identity").unwrap_or(false) {
let default = include_str!("../../defaults/identity.md");
store.upsert("identity", default)?;
println!("Seeded identity in store");
} }
store.save()?;
println!("Initialized with {} nodes", store.all_keys().unwrap_or_default().len());
// Create config if none exists // Create config if none exists
let config_path = std::env::var("POC_MEMORY_CONFIG") let config_path = std::env::var("POC_MEMORY_CONFIG")
.map(std::path::PathBuf::from) .map(std::path::PathBuf::from)
.unwrap_or_else(|_| crate::config::config_path()); .unwrap_or_else(|_| {
dirs::home_dir().unwrap_or_default()
.join(".consciousness/config.jsonl")
});
if !config_path.exists() { if !config_path.exists() {
let config_dir = config_path.parent().unwrap(); let config_dir = config_path.parent().unwrap();
std::fs::create_dir_all(config_dir)?; std::fs::create_dir_all(config_dir)?;
@ -100,7 +51,7 @@ pub async fn cmd_init() -> Result<()> {
config_path.display()); config_path.display());
} }
println!("Done. Run `poc-memory admin load-context --stats` to verify."); println!("Done. Run `poc-memory load-context --stats` to verify.");
Ok(()) Ok(())
} }

View file

@ -2,13 +2,8 @@
use anyhow::{bail, Context, Result}; use anyhow::{bail, Context, Result};
use crate::hippocampus as memory; use crate::hippocampus as memory;
use std::time::Instant;
pub async fn cmd_run_agent(agent: &str, count: usize, target: &[String], query: Option<&str>, dry_run: bool, _local: bool, state_dir: Option<&str>) -> Result<()> { pub async fn cmd_run_agent(agent: &str, count: usize, target: &[String], query: Option<&str>, dry_run: bool, _local: bool, state_dir: Option<&str>) -> Result<()> {
let start = Instant::now();
eprintln!(
"[agent-cli] start agent={} count={} targets={} query={:?} dry_run={} local={} state_dir={:?} pid={}",
agent, count, target.len(), query, dry_run, _local, state_dir, std::process::id());
// Mark as agent so tool calls (e.g. poc-memory render) don't // Mark as agent so tool calls (e.g. poc-memory render) don't
// pollute the user's seen set as a side effect // pollute the user's seen set as a side effect
// SAFETY: single-threaded at this point (CLI startup, before any agent work) // SAFETY: single-threaded at this point (CLI startup, before any agent work)
@ -50,19 +45,14 @@ pub async fn cmd_run_agent(agent: &str, count: usize, target: &[String], query:
if let Err(e) = crate::agent::oneshot::run_one_agent( if let Err(e) = crate::agent::oneshot::run_one_agent(
agent, count, Some(&[key.clone()]), agent, count, Some(&[key.clone()]),
).await { ).await {
eprintln!("[agent-cli] ERROR agent={} target={} error={}", agent, key, e);
println!("[{}] ERROR on {}: {}", agent, key, e); println!("[{}] ERROR on {}: {}", agent, key, e);
} }
} }
} else { } else {
if let Err(e) = crate::agent::oneshot::run_one_agent( crate::agent::oneshot::run_one_agent(
agent, count, None, agent, count, None,
).await { ).await.map_err(|e| anyhow::anyhow!("{}", e))?;
eprintln!("[agent-cli] ERROR agent={} error={}", agent, e);
return Err(anyhow::anyhow!("{}", e));
}
} }
eprintln!("[agent-cli] done agent={} elapsed={:.2}s",
agent, start.elapsed().as_secs_f64());
Ok(()) Ok(())
} }

View file

@ -201,23 +201,16 @@ pub fn watch_config(cli: crate::user::CliArgs) {
{ {
crate::dbglog!("[config] watch({}) failed: {}", parent.display(), e); crate::dbglog!("[config] watch({}) failed: {}", parent.display(), e);
return; return;
} }
crate::dbglog!("[config] watching {}", path.display()); crate::dbglog!("[config] watching {}", path.display());
let mut last_seen = config_file_state(&path); while let Ok(res) = rx.recv() {
while let Ok(res) = rx.recv() { let Ok(events) = res else { continue; };
let Ok(events) = res else { continue; }; if !events.iter().any(|e| e.path == path) { continue; }
if !events.iter().any(|e| e.path == path) { continue; }
let current_seen = config_file_state(&path); // Reload both halves.
if current_seen == last_seen { let mem_changed = reload();
continue; let app_changed = match build_figment(&cli).extract::<AppConfig>() {
}
last_seen = current_seen;
// Reload both halves.
let mem_changed = reload();
let app_changed = match build_figment(&cli).extract::<AppConfig>() {
Ok(app) => { Ok(app) => {
install_app(app); install_app(app);
true true
@ -230,13 +223,8 @@ pub fn watch_config(cli: crate::user::CliArgs) {
crate::dbglog!("[config] reloaded (memory_changed={}, app_changed={})", crate::dbglog!("[config] reloaded (memory_changed={}, app_changed={})",
mem_changed, app_changed); mem_changed, app_changed);
} }
}) })
.ok(); .ok();
}
fn config_file_state(path: &std::path::Path) -> Option<(std::time::SystemTime, u64)> {
let meta = std::fs::metadata(path).ok()?;
Some((meta.modified().ok()?, meta.len()))
} }
// ============================================================ // ============================================================

View file

@ -1,113 +0,0 @@
use serde_json::Value;
use super::{ConversationSource, TranscriptMessage, TranscriptRole};
pub struct ClaudeSource;
impl ConversationSource for ClaudeSource {
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
parse_message(obj, offset)
}
fn is_compaction(&self, obj: &Value) -> bool {
is_compaction(obj)
}
fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
contains_bytes(obj_bytes, b"This session is being continued")
}
}
fn text_content(value: &Value) -> Option<String> {
let text = match value {
Value::String(s) => s.clone(),
Value::Array(arr) => {
arr.iter()
.filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
.collect::<Vec<_>>()
.join(" ")
}
_ => return None,
};
(!text.is_empty()).then_some(text)
}
pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
let role = match obj.get("type").and_then(|v| v.as_str()) {
Some("user") => TranscriptRole::User,
Some("assistant") => TranscriptRole::Assistant,
_ => return None,
};
let msg = obj.get("message").unwrap_or(obj);
let text = msg.get("content").and_then(text_content)?;
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.map(str::to_string);
Some(TranscriptMessage { role, text, timestamp, offset })
}
pub(crate) fn is_compaction(obj: &Value) -> bool {
obj.get("type").and_then(|v| v.as_str()) == Some("user")
&& obj.get("message")
.and_then(|m| m.get("content"))
.and_then(|c| c.as_str())
.is_some_and(|content| content.starts_with("This session is being continued"))
}
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack.windows(needle.len()).any(|w| w == needle)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn parses_string_and_array_content() {
let user = json!({
"timestamp": "2026-06-15T15:00:00.000Z",
"type": "user",
"message": { "content": "hello" }
});
let assistant = json!({
"timestamp": "2026-06-15T15:00:01.000Z",
"type": "assistant",
"message": {
"content": [
{ "type": "text", "text": "hi" },
{ "type": "tool_use", "name": "ignored" },
{ "type": "text", "text": "there" }
]
}
});
assert_eq!(
parse_message(&user, 7).unwrap(),
TranscriptMessage {
role: TranscriptRole::User,
text: "hello".to_string(),
timestamp: Some("2026-06-15T15:00:00.000Z".to_string()),
offset: 7,
}
);
assert_eq!(parse_message(&assistant, 9).unwrap().text, "hi there");
}
#[test]
fn detects_compaction_marker() {
let obj = json!({
"timestamp": "2026-06-15T15:00:01.000Z",
"type": "user",
"message": {
"content": "This session is being continued from a previous conversation."
}
});
assert!(is_compaction(&obj));
}
}

View file

@ -1,105 +0,0 @@
use serde_json::Value;
use super::{ConversationSource, TranscriptMessage, TranscriptRole};
pub struct CodexSource;
impl ConversationSource for CodexSource {
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
parse_message(obj, offset)
}
fn is_compaction(&self, obj: &Value) -> bool {
is_compaction(obj)
}
fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
contains_bytes(obj_bytes, b"context_compacted")
}
}
pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
if obj.get("type").and_then(|v| v.as_str()) != Some("event_msg") {
return None;
}
let payload = obj.get("payload")?;
let (role, text) = match payload.get("type").and_then(|v| v.as_str()) {
Some("user_message") => (
TranscriptRole::User,
payload.get("message").and_then(|v| v.as_str())?.to_string(),
),
Some("agent_message") => (
TranscriptRole::Assistant,
payload.get("message").and_then(|v| v.as_str())?.to_string(),
),
_ => return None,
};
if text.is_empty() {
return None;
}
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.map(str::to_string);
Some(TranscriptMessage { role, text, timestamp, offset })
}
pub(crate) fn is_compaction(obj: &Value) -> bool {
obj.get("type").and_then(|v| v.as_str()) == Some("event_msg")
&& obj.get("payload")
.and_then(|p| p.get("type"))
.and_then(|v| v.as_str()) == Some("context_compacted")
}
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack.windows(needle.len()).any(|w| w == needle)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn parses_event_messages_and_skips_noise() {
let user = json!({
"timestamp": "2026-06-15T15:00:00.000Z",
"type": "event_msg",
"payload": { "type": "user_message", "message": "start here" }
});
let assistant = json!({
"timestamp": "2026-06-15T15:00:01.000Z",
"type": "event_msg",
"payload": { "type": "agent_message", "message": "working" }
});
let tool = json!({
"timestamp": "2026-06-15T15:00:02.000Z",
"type": "event_msg",
"payload": { "type": "task_started" }
});
let raw = json!({
"timestamp": "2026-06-15T15:00:03.000Z",
"type": "response_item",
"payload": { "type": "message", "role": "user" }
});
assert_eq!(parse_message(&user, 1).unwrap().role, TranscriptRole::User);
assert_eq!(parse_message(&assistant, 2).unwrap().text, "working");
assert!(parse_message(&tool, 3).is_none());
assert!(parse_message(&raw, 4).is_none());
}
#[test]
fn detects_compaction_event() {
let obj = json!({
"timestamp": "2026-06-15T15:00:01.000Z",
"type": "event_msg",
"payload": { "type": "context_compacted" }
});
assert!(is_compaction(&obj));
}
}

View file

@ -1,110 +0,0 @@
use memchr::memrchr3;
/// Scan backwards through mmap'd bytes, yielding byte slices of complete
/// top-level JSON objects (outermost { to matching }).
///
/// Uses memrchr3 (SIMD) to jump between structurally significant bytes
/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
/// skipping braces inside JSON strings. Returns objects in reverse order
/// (newest first).
pub struct JsonlBackwardIter<'a> {
data: &'a [u8],
pos: usize,
}
impl<'a> JsonlBackwardIter<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self { data, pos: data.len() }
}
}
impl<'a> Iterator for JsonlBackwardIter<'a> {
type Item = (usize, &'a [u8]);
fn next(&mut self) -> Option<Self::Item> {
next_json_object(self.data, &mut self.pos)
}
}
fn is_unescaped_quote(data: &[u8], p: usize) -> bool {
let mut bs = 0;
while p > bs && data[p - 1 - bs] == b'\\' {
bs += 1;
}
bs % 2 == 0
}
fn next_json_object<'a>(data: &'a [u8], pos: &mut usize) -> Option<(usize, &'a [u8])> {
// Find the closing } of the next object, skipping } inside strings.
let close = {
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
*pos = p;
let ch = data[p];
if in_string {
if ch == b'"' && is_unescaped_quote(data, p) {
in_string = false;
}
continue;
}
match ch {
b'}' => break p,
b'"' => in_string = true,
_ => {}
}
}
};
// Track brace depth to find matching {.
let mut depth: usize = 1;
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
*pos = p;
let ch = data[p];
if in_string {
if ch == b'"' && is_unescaped_quote(data, p) {
in_string = false;
}
continue;
}
match ch {
b'"' => { in_string = true; }
b'}' => { depth += 1; }
b'{' => {
depth -= 1;
if depth == 0 {
return Some((*pos, &data[*pos..=close]));
}
}
_ => {}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn handles_nested_json_and_quoted_braces() {
let data = br#"{"n":1,"s":"literal } brace"}
{"n":2,"nested":{"s":"escaped quote: \" and { brace"}}
trailing garbage
"#;
let objs: Vec<_> = JsonlBackwardIter::new(data)
.map(|(_, bytes)| std::str::from_utf8(bytes).unwrap().to_string())
.collect();
assert_eq!(objs.len(), 2);
assert!(objs[0].contains(r#""n":2"#));
assert!(objs[1].contains(r#""n":1"#));
}
}

View file

@ -1,271 +0,0 @@
// Conversation transcript abstraction.
//
// Core code consumes normalized user/assistant messages through this module.
// Product-specific log formats live in the small compatibility sources below.
use memmap2::Mmap;
use serde_json::Value;
use std::fs;
use std::path::Path;
pub mod claude;
pub mod codex;
pub mod jsonl;
pub use jsonl::JsonlBackwardIter;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TranscriptRole {
User,
Assistant,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TranscriptMessage {
pub role: TranscriptRole,
pub text: String,
pub timestamp: Option<String>,
pub offset: u64,
}
pub trait ConversationSource {
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage>;
fn is_compaction(&self, obj: &Value) -> bool;
fn may_contain_compaction(&self, _obj_bytes: &[u8]) -> bool {
true
}
}
pub struct AnyConversationSource;
impl ConversationSource for AnyConversationSource {
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
claude::ClaudeSource.parse_message(obj, offset)
.or_else(|| codex::CodexSource.parse_message(obj, offset))
}
fn is_compaction(&self, obj: &Value) -> bool {
claude::ClaudeSource.is_compaction(obj) || codex::CodexSource.is_compaction(obj)
}
fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
claude::ClaudeSource.may_contain_compaction(obj_bytes)
|| codex::CodexSource.may_contain_compaction(obj_bytes)
}
}
/// Find the byte offset of the last compaction marker in mmap'd transcript data.
/// Returns the byte offset of the JSON object's opening brace.
pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
find_last_compaction_with(data, &AnyConversationSource)
}
pub(crate) fn find_last_compaction_with(
data: &[u8],
source: &impl ConversationSource,
) -> Option<usize> {
for (offset, obj_bytes) in JsonlBackwardIter::new(data) {
// Quick byte check before parsing large transcript entries.
if !source.may_contain_compaction(obj_bytes) {
continue;
}
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
if source.is_compaction(&obj) {
return Some(offset);
}
}
None
}
/// Find the byte offset of the last compaction in a transcript file.
/// Returns None if the file can't be opened or has no compaction.
pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
if path.is_empty() { return None; }
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
find_last_compaction(&mmap).map(|off| off as u64)
}
/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
Some((mmap, file))
}
/// Reverse iterator over user/assistant messages in a transcript file.
/// Yields normalized transcript messages newest-first. The caller decides
/// when to stop (byte budget, count, etc).
pub struct TailMessages {
_file: fs::File,
mmap: Mmap,
pos: usize,
}
impl TailMessages {
pub fn open(path: &str) -> Option<Self> {
let (mmap, file) = mmap_transcript(path)?;
let pos = mmap.len();
Some(Self { _file: file, mmap, pos })
}
}
impl Iterator for TailMessages {
type Item = TranscriptMessage;
fn next(&mut self) -> Option<Self::Item> {
loop {
let (offset, obj_bytes) = jsonl::JsonlBackwardIter::new(&self.mmap[..self.pos]).next()?;
self.pos = offset;
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
if let Some(message) = AnyConversationSource.parse_message(&obj, offset as u64) {
return Some(message);
}
}
}
}
/// Get the timestamp of the compaction message at a given byte offset.
/// Returns a human-readable datetime string, or None if unavailable.
pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
let (mmap, _file) = mmap_transcript(path)?;
let start = offset as usize;
if start >= mmap.len() { return None; }
// Find the end of this JSONL line
let end = mmap[start..].iter().position(|&b| b == b'\n')
.map(|p| start + p)
.unwrap_or(mmap.len());
let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
return Some(ts.to_string());
}
for field in &["createdAt", "created_at", "time"] {
if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
return Some(ts.to_string());
}
}
None
}
/// Detect whether a compaction has occurred since the last check.
///
/// Compares the current compaction offset against a saved value in
/// `state_dir/compaction-{session_id}`. Returns true if a new
/// compaction was found. Updates the saved offset.
pub fn detect_new_compaction(
state_dir: &Path,
session_id: &str,
transcript_path: &str,
) -> bool {
let offset = find_last_compaction_in_file(transcript_path);
let save_path = state_dir.join(format!("compaction-{}", session_id));
let saved: Option<u64> = fs::read_to_string(&save_path)
.ok()
.and_then(|s| s.trim().parse().ok());
let is_new = match (offset, saved) {
(Some(cur), Some(prev)) => cur != prev,
(Some(_), None) => true,
_ => false,
};
// Save current offset
if let Some(off) = offset {
fs::write(&save_path, off.to_string()).ok();
}
is_new
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
fn write_temp_jsonl(content: &str) -> tempfile::NamedTempFile {
let mut file = tempfile::NamedTempFile::new().unwrap();
file.write_all(content.as_bytes()).unwrap();
file.flush().unwrap();
file
}
#[test]
fn tail_messages_yields_normalized_messages_newest_first() {
let file = write_temp_jsonl(
r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"claude user"}}
{"timestamp":"2026-06-15T15:00:01.000Z","type":"assistant","message":{"content":[{"type":"text","text":"claude assistant"}]}}
{"timestamp":"2026-06-15T15:00:02.000Z","type":"event_msg","payload":{"type":"user_message","message":"codex user"}}
{"timestamp":"2026-06-15T15:00:03.000Z","type":"event_msg","payload":{"type":"task_started"}}
{"timestamp":"2026-06-15T15:00:04.000Z","type":"event_msg","payload":{"type":"agent_message","message":"codex assistant"}}
"#,
);
let messages: Vec<_> = TailMessages::open(&file.path().to_string_lossy())
.unwrap()
.collect();
assert_eq!(messages.len(), 4);
assert_eq!(messages[0].text, "codex assistant");
assert_eq!(messages[1].text, "codex user");
assert_eq!(messages[2].text, "claude assistant");
assert_eq!(messages[3].text, "claude user");
assert!(messages[0].offset > messages[1].offset);
}
#[test]
fn detects_claude_and_codex_compactions() {
let claude = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"normal"}}
{"timestamp":"2026-06-15T15:00:01.000Z","type":"user","message":{"content":"This session is being continued from a previous conversation."}}
"#;
let codex = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"user_message","message":"normal"}}
{"timestamp":"2026-06-15T15:00:01.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
"#;
assert!(find_last_compaction(claude).is_some());
assert!(find_last_compaction(codex).is_some());
}
#[test]
fn detect_new_compaction_tracks_offset_changes() {
let transcript = write_temp_jsonl(
r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
"#,
);
let state = tempfile::tempdir().unwrap();
assert!(detect_new_compaction(
state.path(),
"session",
&transcript.path().to_string_lossy(),
));
assert!(!detect_new_compaction(
state.path(),
"session",
&transcript.path().to_string_lossy(),
));
}
}

View file

@ -11,23 +11,6 @@ use crate::store::{Store, RelationType, StoreView};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque}; use std::collections::{HashMap, HashSet, VecDeque};
use std::sync::{OnceLock, RwLock};
const EXACT_CC_MAX_DEG: usize = 512;
const APPROX_CC_PAIRS: u64 = 4096;
const CC_CACHE_TTL_SECS: i64 = 15 * 60;
#[derive(Clone, Copy)]
struct CachedCc {
value: f32,
computed_at: i64,
}
static CC_CACHE: OnceLock<RwLock<HashMap<String, CachedCc>>> = OnceLock::new();
fn cc_cache() -> &'static RwLock<HashMap<String, CachedCc>> {
CC_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
}
/// Community info for reporting /// Community info for reporting
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@ -49,13 +32,11 @@ pub struct Edge {
/// The in-memory graph built from store nodes + relations /// The in-memory graph built from store nodes + relations
pub struct Graph { pub struct Graph {
/// Adjacency list: node key → list of edges /// Adjacency list: node key → list of edges
adj: HashMap<String, Vec<Edge>>, adj: HashMap<String, Vec<Edge>>,
/// Neighbor sets for membership tests in graph metrics. /// All node keys
neighbor_sets: HashMap<String, HashSet<String>>, keys: HashSet<String>,
/// All node keys /// Community labels (from label propagation)
keys: HashSet<String>,
/// Community labels (from label propagation)
communities: HashMap<String, u32>, communities: HashMap<String, u32>,
} }
@ -86,22 +67,22 @@ impl Graph {
.unwrap_or_default() .unwrap_or_default()
} }
/// Just neighbor keys /// Just neighbor keys
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> { pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
self.neighbor_sets.get(key) self.adj.get(key)
.map(|neighbors| neighbors.iter().map(String::as_str).collect()) .map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
.unwrap_or_default() .unwrap_or_default()
} }
/// Jaccard similarity between two nodes' neighborhoods. /// Jaccard similarity between two nodes' neighborhoods.
/// Measures overlap: |intersection| / |union| of their neighbor sets. /// Measures overlap: |intersection| / |union| of their neighbor sets.
pub fn jaccard(&self, a: &str, b: &str) -> f32 { pub fn jaccard(&self, a: &str, b: &str) -> f32 {
let Some(na) = self.neighbor_sets.get(a) else { return 0.0 }; let na = self.neighbor_keys(a);
let Some(nb) = self.neighbor_sets.get(b) else { return 0.0 }; let nb = self.neighbor_keys(b);
let intersection = na.intersection(nb).count(); let intersection = na.intersection(&nb).count();
let union = na.len() + nb.len() - intersection; let union = na.union(&nb).count();
if union == 0 { 0.0 } else { intersection as f32 / union as f32 } if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
} }
/// Compute Jaccard-based strength for every edge in the graph. /// Compute Jaccard-based strength for every edge in the graph.
/// Returns (source_key, target_key, jaccard_strength) triples. /// Returns (source_key, target_key, jaccard_strength) triples.
@ -221,78 +202,41 @@ impl Graph {
} }
} }
/// Local clustering coefficient: fraction of a node's neighbors /// Local clustering coefficient: fraction of a node's neighbors
/// that are also neighbors of each other. /// that are also neighbors of each other.
/// cc(v) = 2E / (deg * (deg - 1)) /// cc(v) = 2E / (deg * (deg - 1))
pub fn clustering_coefficient(&self, key: &str) -> f32 { pub fn clustering_coefficient(&self, key: &str) -> f32 {
let now = crate::store::now_epoch(); let neighbors = self.neighbor_keys(key);
if let Some(cc) = cc_cache().read().unwrap().get(key).copied() let deg = neighbors.len();
&& now - cc.computed_at < CC_CACHE_TTL_SECS if deg < 2 {
{ return 0.0;
return cc.value; }
}
let cc = self.clustering_coefficient_uncached(key);
cc_cache().write().unwrap().insert(key.to_owned(), CachedCc {
value: cc,
computed_at: now,
});
cc
}
fn clustering_coefficient_uncached(&self, key: &str) -> f32 { let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
let Some(neighbors) = self.neighbor_sets.get(key) else { let mut triangles = 0u32;
return 0.0; for i in 0..neighbor_vec.len() {
}; for j in (i + 1)..neighbor_vec.len() {
let deg = neighbors.len(); let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
if deg < 2 { if ni_neighbors.contains(neighbor_vec[j]) {
return 0.0; triangles += 1;
} }
}
}
let neighbor_vec: Vec<&str> = neighbors.iter().map(String::as_str).collect(); (2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
if deg <= EXACT_CC_MAX_DEG { }
let mut linked = 0u64;
for i in 0..neighbor_vec.len() {
for j in (i + 1)..neighbor_vec.len() {
if self.neighbor_sets
.get(neighbor_vec[i])
.is_some_and(|n| n.contains(neighbor_vec[j])) {
linked += 1;
}
}
}
return (2.0 * linked as f32) / (deg as f32 * (deg as f32 - 1.0));
}
let mut linked = 0u64; /// Average clustering coefficient across all nodes with deg >= 2
let samples = APPROX_CC_PAIRS.min((deg as u64 * (deg as u64 - 1)) / 2); pub fn avg_clustering_coefficient(&self) -> f32 {
for sample in 0..samples { let mut sum = 0.0f32;
let i = ((sample.wrapping_mul(1_103_515_245).wrapping_add(12_345)) % deg as u64) as usize; let mut count = 0u32;
let mut j = ((sample.wrapping_mul(2_654_435_761).wrapping_add(97_531)) % deg as u64) as usize; for key in &self.keys {
if i == j { if self.degree(key) >= 2 {
j = (j + 1) % deg; sum += self.clustering_coefficient(key);
} count += 1;
if self.neighbor_sets }
.get(neighbor_vec[i]) }
.is_some_and(|n| n.contains(neighbor_vec[j])) { if count == 0 { 0.0 } else { sum / count as f32 }
linked += 1;
}
}
linked as f32 / samples as f32
}
/// Average clustering coefficient across all nodes with deg >= 2
pub fn avg_clustering_coefficient(&self) -> f32 {
let mut sum = 0.0f32;
let mut count = 0u32;
for key in &self.keys {
match self.neighbor_sets.get(key.as_str()) {
Some(s) if s.len() >= 2 => s,
_ => continue,
};
sum += self.clustering_coefficient(key);
count += 1;
}
if count == 0 { 0.0 } else { sum / count as f32 }
} }
/// Average shortest path length (sampled BFS from up to 100 nodes) /// Average shortest path length (sampled BFS from up to 100 nodes)
@ -322,17 +266,15 @@ impl Graph {
dist.insert(start.to_string(), 0u32); dist.insert(start.to_string(), 0u32);
queue.push_back(start.to_string()); queue.push_back(start.to_string());
while let Some(node) = queue.pop_front() { while let Some(node) = queue.pop_front() {
let d = dist[&node]; let d = dist[&node];
if let Some(neighbors) = self.neighbor_sets.get(&node) { for neighbor in self.neighbor_keys(&node) {
for neighbor in neighbors { if !dist.contains_key(neighbor) {
if !dist.contains_key(neighbor) { dist.insert(neighbor.to_string(), d + 1);
dist.insert(neighbor.clone(), d + 1); queue.push_back(neighbor.to_string());
queue.push_back(neighbor.clone()); }
} }
} }
}
}
dist dist
} }
@ -563,39 +505,16 @@ impl Graph {
/// Build graph from store data (with community detection) /// Build graph from store data (with community detection)
pub fn build_graph(store: &impl StoreView) -> Graph { pub fn build_graph(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store); let (adj, keys) = build_adjacency(store);
let neighbor_sets = build_neighbor_sets(&adj); let communities = label_propagation(&keys, &adj, 20);
let communities = label_propagation(&keys, &adj, 20); Graph { adj, keys, communities }
Graph {
adj,
neighbor_sets,
keys,
communities,
}
} }
/// Build graph without community detection — for spreading activation /// Build graph without community detection — for spreading activation
/// searches where we only need the adjacency list. /// searches where we only need the adjacency list.
pub fn build_graph_fast(store: &impl StoreView) -> Graph { pub fn build_graph_fast(store: &impl StoreView) -> Graph {
let (adj, keys) = build_adjacency(store); let (adj, keys) = build_adjacency(store);
let neighbor_sets = build_neighbor_sets(&adj); Graph { adj, keys, communities: HashMap::new() }
Graph {
adj,
neighbor_sets,
keys,
communities: HashMap::new(),
}
}
fn build_neighbor_sets(adj: &HashMap<String, Vec<Edge>>) -> HashMap<String, HashSet<String>> {
adj.iter()
.map(|(key, edges)| {
let neighbors = edges.iter()
.map(|edge| edge.target.clone())
.collect();
(key.clone(), neighbors)
})
.collect()
} }
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) { fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {

View file

@ -17,6 +17,7 @@ pub mod query;
pub mod spectral; pub mod spectral;
pub mod neuro; pub mod neuro;
pub mod counters; pub mod counters;
pub mod transcript;
use std::cell::RefCell; use std::cell::RefCell;
use std::path::PathBuf; use std::path::PathBuf;

View file

@ -0,0 +1,340 @@
// Transcript JSONL parsing utilities.
//
// Provides mmap-based backward scanning of Claude Code transcript files
// and compaction detection. Used by memory-search (hook mode) and
// parse-claude-conversation (debug tool).
use memchr::memrchr3;
use memmap2::Mmap;
use serde_json::Value;
use std::fs;
use std::path::Path;
/// Scan backwards through mmap'd bytes, yielding byte slices of complete
/// top-level JSON objects (outermost { to matching }).
///
/// Uses memrchr3 (SIMD) to jump between structurally significant bytes
/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
/// skipping braces inside JSON strings. Returns objects in reverse order
/// (newest first).
pub struct JsonlBackwardIter<'a> {
data: &'a [u8],
pos: usize,
}
impl<'a> JsonlBackwardIter<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self { data, pos: data.len() }
}
}
impl<'a> Iterator for JsonlBackwardIter<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
// Find the closing } of the next object, skipping } inside strings
let close = {
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
self.pos = p;
let ch = self.data[p];
if in_string {
if ch == b'"' {
let mut bs = 0;
while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 { in_string = false; }
}
continue;
}
match ch {
b'}' => break p,
b'"' => in_string = true,
_ => {}
}
}
};
// Track brace depth to find matching {
let mut depth: usize = 1;
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
self.pos = p;
let ch = self.data[p];
if in_string {
if ch == b'"' {
// Check for escaped quote (count preceding backslashes)
let mut bs = 0;
while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 {
in_string = false;
}
}
// { and } inside strings don't affect depth
continue;
}
match ch {
b'"' => { in_string = true; }
b'}' => { depth += 1; }
b'{' => {
depth -= 1;
if depth == 0 {
return Some(&self.data[self.pos..=close]);
}
}
_ => {}
}
}
}
}
/// Find the byte offset of the last compaction summary in mmap'd transcript data.
///
/// Scans backward for a user-type message whose content starts with
/// "This session is being continued". Returns the byte offset of the
/// JSON object's opening brace.
pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
let marker = b"This session is being continued";
for obj_bytes in JsonlBackwardIter::new(data) {
// Quick byte check before parsing
if !contains_bytes(obj_bytes, marker) {
continue;
}
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
if obj.get("type").and_then(|v| v.as_str()) != Some("user") {
continue;
}
if let Some(content) = obj.get("message")
.and_then(|m| m.get("content"))
.and_then(|c| c.as_str())
&& content.starts_with("This session is being continued") {
let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize;
return Some(offset);
}
}
None
}
/// Find the byte offset of the last compaction in a transcript file.
/// Returns None if the file can't be opened or has no compaction.
pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
if path.is_empty() { return None; }
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
find_last_compaction(&mmap).map(|off| off as u64)
}
/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
let file = fs::File::open(path).ok()?;
let meta = file.metadata().ok()?;
if meta.len() == 0 { return None; }
let mmap = unsafe { Mmap::map(&file).ok()? };
Some((mmap, file))
}
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack.windows(needle.len()).any(|w| w == needle)
}
/// Reverse iterator over user/assistant messages in a transcript file.
/// Yields (role, text, timestamp) tuples newest-first. The caller decides
/// when to stop (byte budget, count, etc).
pub struct TailMessages {
_file: fs::File,
mmap: Mmap,
pos: usize,
}
impl TailMessages {
pub fn open(path: &str) -> Option<Self> {
let (mmap, file) = mmap_transcript(path)?;
let pos = mmap.len();
Some(Self { _file: file, mmap, pos })
}
}
impl Iterator for TailMessages {
type Item = (String, String, String);
fn next(&mut self) -> Option<Self::Item> {
loop {
// Find closing }, skipping } inside strings
let close = {
let mut in_string = false;
loop {
let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
self.pos = p;
let ch = self.mmap[p];
if in_string {
if ch == b'"' {
let mut bs = 0;
while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 { in_string = false; }
}
continue;
}
match ch {
b'}' => break p,
b'"' => in_string = true,
_ => {}
}
}
};
// Track brace depth to find matching {
let mut depth: usize = 1;
let mut in_string = false;
let open = loop {
let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
self.pos = p;
let ch = self.mmap[p];
if in_string {
if ch == b'"' {
let mut bs = 0;
while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
bs += 1;
}
if bs % 2 == 0 { in_string = false; }
}
continue;
}
match ch {
b'"' => { in_string = true; }
b'}' => { depth += 1; }
b'{' => {
depth -= 1;
if depth == 0 { break p; }
}
_ => {}
}
};
let obj_bytes = &self.mmap[open..=close];
// The "type" field is near the start of top-level objects.
// Only check the first 200 bytes to avoid scanning megabyte objects.
let prefix = &obj_bytes[..obj_bytes.len().min(200)];
let is_user = memchr::memmem::find(prefix, b"\"type\":\"user\"").is_some();
let is_assistant = !is_user
&& memchr::memmem::find(prefix, b"\"type\":\"assistant\"").is_some();
if !is_user && !is_assistant { continue; }
let obj: Value = match serde_json::from_slice(obj_bytes) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = if is_user { "user" } else { "assistant" };
let msg = obj.get("message").unwrap_or(&obj);
let text = match msg.get("content") {
Some(Value::String(s)) => s.clone(),
Some(Value::Array(arr)) => {
arr.iter()
.filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
.collect::<Vec<_>>()
.join(" ")
}
_ => continue,
};
if text.is_empty() { continue; }
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
return Some((msg_type.to_string(), text, timestamp));
}
}
}
/// Get the timestamp of the compaction message at a given byte offset.
/// Returns a human-readable datetime string, or None if unavailable.
pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
let (mmap, _file) = mmap_transcript(path)?;
let start = offset as usize;
if start >= mmap.len() { return None; }
// Find the end of this JSONL line
let end = mmap[start..].iter().position(|&b| b == b'\n')
.map(|p| start + p)
.unwrap_or(mmap.len());
let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
// Claude Code transcript entries have a "timestamp" field (ISO 8601)
if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
return Some(ts.to_string());
}
// Fallback: try "createdAt" or similar fields
for field in &["createdAt", "created_at", "time"] {
if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
return Some(ts.to_string());
}
}
None
}
/// Detect whether a compaction has occurred since the last check.
///
/// Compares the current compaction offset against a saved value in
/// `state_dir/compaction-{session_id}`. Returns true if a new
/// compaction was found. Updates the saved offset.
pub fn detect_new_compaction(
state_dir: &Path,
session_id: &str,
transcript_path: &str,
) -> bool {
let offset = find_last_compaction_in_file(transcript_path);
let save_path = state_dir.join(format!("compaction-{}", session_id));
let saved: Option<u64> = fs::read_to_string(&save_path)
.ok()
.and_then(|s| s.trim().parse().ok());
let is_new = match (offset, saved) {
(Some(cur), Some(prev)) => cur != prev,
(Some(_), None) => true,
_ => false,
};
// Save current offset
if let Some(off) = offset {
fs::write(&save_path, off.to_string()).ok();
}
is_new
}

View file

@ -1,4 +1,4 @@
#![cfg_attr(feature = "nightly-diagnostics", feature(async_fn_track_caller))] #![feature(async_fn_track_caller)]
// consciousness — unified crate for memory, agents, and subconscious processes // consciousness — unified crate for memory, agents, and subconscious processes
// //
@ -25,9 +25,6 @@ macro_rules! dbglog {
}}; }};
} }
// Logging (target-routed file logger)
pub mod logging;
// User interface (TUI, CLI) // User interface (TUI, CLI)
pub mod user; pub mod user;
@ -43,9 +40,6 @@ pub mod hippocampus;
// Autonomous agents // Autonomous agents
pub mod subconscious; pub mod subconscious;
// Conversation transcript abstraction and compatibility sources
pub mod conversation;
// Unified configuration // Unified configuration
pub mod config; pub mod config;
pub mod config_writer; pub mod config_writer;
@ -94,8 +88,7 @@ pub mod channel_capnp {
pub use hippocampus::{ pub use hippocampus::{
store, graph, lookups, query, store, graph, lookups, query,
spectral, neuro, counters, spectral, neuro, counters,
memory, transcript, memory,
}; };
pub use conversation as transcript;
use hippocampus::query::engine as search; use hippocampus::query::engine as search;
use hippocampus::query::parser as query_parser; use hippocampus::query::parser as query_parser;

View file

@ -114,7 +114,7 @@ impl<T> TrackedMutex<T> {
Self { inner: Mutex::new(value) } Self { inner: Mutex::new(value) }
} }
#[cfg_attr(feature = "nightly-diagnostics", track_caller)] #[track_caller]
pub async fn lock(&self) -> TrackedMutexGuard<'_, T> { pub async fn lock(&self) -> TrackedMutexGuard<'_, T> {
let location = Location::caller(); let location = Location::caller();
let guard = self.inner.lock().await; let guard = self.inner.lock().await;
@ -125,7 +125,7 @@ impl<T> TrackedMutex<T> {
} }
} }
#[cfg_attr(feature = "nightly-diagnostics", track_caller)] #[track_caller]
pub fn try_lock(&self) -> Result<TrackedMutexGuard<'_, T>, tokio::sync::TryLockError> { pub fn try_lock(&self) -> Result<TrackedMutexGuard<'_, T>, tokio::sync::TryLockError> {
let location = Location::caller(); let location = Location::caller();
let guard = self.inner.try_lock()?; let guard = self.inner.try_lock()?;
@ -171,7 +171,7 @@ impl<T> TrackedRwLock<T> {
Self { inner: RwLock::new(value) } Self { inner: RwLock::new(value) }
} }
#[cfg_attr(feature = "nightly-diagnostics", track_caller)] #[track_caller]
pub async fn read(&self) -> TrackedRwLockReadGuard<'_, T> { pub async fn read(&self) -> TrackedRwLockReadGuard<'_, T> {
let location = Location::caller(); let location = Location::caller();
let guard = self.inner.read().await; let guard = self.inner.read().await;
@ -182,7 +182,7 @@ impl<T> TrackedRwLock<T> {
} }
} }
#[cfg_attr(feature = "nightly-diagnostics", track_caller)] #[track_caller]
pub async fn write(&self) -> TrackedRwLockWriteGuard<'_, T> { pub async fn write(&self) -> TrackedRwLockWriteGuard<'_, T> {
let location = Location::caller(); let location = Location::caller();
let guard = self.inner.write().await; let guard = self.inner.write().await;

View file

@ -1,146 +0,0 @@
// logging.rs — log-crate logger that routes by target.
//
// Records with target "grpc" (or any target starting with "grpc::") go
// to ~/.consciousness/logs/daemon/grpc.log so we can tell gRPC events
// apart from the rest of consciousness's noise. Everything else goes
// to ~/.consciousness/logs/daemon/debug.log.
//
// Level threshold is taken from RUST_LOG (simple global level parse:
// "trace"/"debug"/"info"/"warn"/"error"); defaults to "info".
use std::io::Write;
use std::path::PathBuf;
use std::sync::Mutex;
use log::{Level, LevelFilter, Log, Metadata, Record, SetLoggerError};
fn logs_dir() -> PathBuf {
dirs::home_dir().unwrap_or_default().join(".consciousness/logs/daemon")
}
struct RoutingLogger {
grpc_file: Mutex<Option<std::fs::File>>,
debug_file: Mutex<Option<std::fs::File>>,
level: LevelFilter,
}
impl RoutingLogger {
fn new(level: LevelFilter) -> Self {
let dir = logs_dir();
let _ = std::fs::create_dir_all(&dir);
let grpc = std::fs::OpenOptions::new()
.create(true).append(true)
.open(dir.join("grpc.log")).ok();
let debug = std::fs::OpenOptions::new()
.create(true).append(true)
.open(dir.join("debug.log")).ok();
Self {
grpc_file: Mutex::new(grpc),
debug_file: Mutex::new(debug),
level,
}
}
fn is_grpc_target(target: &str) -> bool {
target == "grpc" || target.starts_with("grpc::")
}
}
impl Log for RoutingLogger {
fn enabled(&self, m: &Metadata) -> bool {
// Always enable DEBUG for grpc target so the dedicated log is
// actually useful without RUST_LOG wrangling; defer to the
// configured level for everything else.
if Self::is_grpc_target(m.target()) {
return m.level() <= Level::Debug;
}
m.level() <= self.level
}
fn log(&self, record: &Record) {
if !self.enabled(record.metadata()) {
return;
}
let line = format!(
"[{}] [{}] [{}] {}\n",
chrono::Utc::now().format("%Y-%m-%d %H:%M:%S%.3f"),
record.level(),
record.target(),
record.args(),
);
let slot = if Self::is_grpc_target(record.target()) {
&self.grpc_file
} else {
&self.debug_file
};
if let Ok(mut guard) = slot.lock() {
if let Some(ref mut f) = *guard {
let _ = f.write_all(line.as_bytes());
}
}
}
fn flush(&self) {
for slot in [&self.grpc_file, &self.debug_file] {
if let Ok(mut g) = slot.lock() {
if let Some(ref mut f) = *g {
let _ = f.flush();
}
}
}
}
}
fn parse_level_from_env() -> LevelFilter {
let raw = std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string());
// Parse a plain level word; if it's the module=level form, we take
// the first level we find.
let token = raw.split(',').next().unwrap_or("info");
let level_word = token.rsplit_once('=').map(|(_, v)| v).unwrap_or(token);
match level_word.trim().to_lowercase().as_str() {
"trace" => LevelFilter::Trace,
"debug" => LevelFilter::Debug,
"info" => LevelFilter::Info,
"warn" => LevelFilter::Warn,
"error" => LevelFilter::Error,
"off" => LevelFilter::Off,
_ => LevelFilter::Info,
}
}
/// Install the routing logger. Safe to call at most once — subsequent
/// calls return an error but are otherwise no-ops.
pub fn init() -> Result<(), SetLoggerError> {
let level = parse_level_from_env();
let logger = Box::new(RoutingLogger::new(level));
log::set_boxed_logger(logger)?;
// Always let DEBUG records through globally so the grpc log can
// capture them (the logger itself filters non-grpc targets by
// `level`). The cost is that log::debug! call-sites below `level`
// in other modules still do their arg formatting before being
// dropped at the logger; acceptable for a debug tool.
log::set_max_level(LevelFilter::Debug.max(level));
// Mark the file with a session boundary so it's easy to see where a
// restart happened.
log::info!(
"===== consciousness logger init (level={}, pid={}) =====",
level, std::process::id(),
);
log::info!(target: "grpc",
"===== grpc log init (level={}, pid={}) =====",
level, std::process::id(),
);
Ok(())
}
/// Consumer of &Level so the type is used when only some callers want it.
#[allow(dead_code)]
pub fn current_level() -> Level {
match log::max_level() {
LevelFilter::Trace => Level::Trace,
LevelFilter::Debug => Level::Debug,
LevelFilter::Info | LevelFilter::Off => Level::Info,
LevelFilter::Warn => Level::Warn,
LevelFilter::Error => Level::Error,
}
}

View file

@ -1,4 +1,4 @@
#![cfg_attr(feature = "nightly-diagnostics", feature(panic_backtrace_config))] #![feature(panic_backtrace_config)]
// poc-memory: graph-structured memory for AI assistants // poc-memory: graph-structured memory for AI assistants
// //
@ -333,18 +333,6 @@ enum AdminCmd {
#[arg(long)] #[arg(long)]
stats: bool, stats: bool,
}, },
/// Print normalized user/assistant messages from a transcript JSONL file
#[command(name = "transcript-tail")]
TranscriptTail {
/// Transcript JSONL path
path: String,
/// Maximum number of messages to print
#[arg(long, short = 'n', default_value_t = 40)]
count: usize,
/// Print newest messages first instead of chronological order
#[arg(long)]
newest_first: bool,
},
} }
/// Print help with subcommands expanded to show nested commands. /// Print help with subcommands expanded to show nested commands.
@ -470,15 +458,12 @@ impl Run for AdminCmd {
Self::Dedup { apply } => cli::admin::cmd_dedup(apply).await, Self::Dedup { apply } => cli::admin::cmd_dedup(apply).await,
Self::DailyCheck => cli::admin::cmd_daily_check().await, Self::DailyCheck => cli::admin::cmd_daily_check().await,
Self::LoadContext { stats } => cli::node::cmd_load_context(stats).await, Self::LoadContext { stats } => cli::node::cmd_load_context(stats).await,
Self::TranscriptTail { path, count, newest_first }
=> cli::admin::cmd_transcript_tail(&path, count, newest_first),
} }
} }
} }
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
#[cfg(feature = "nightly-diagnostics")]
std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short); std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short);
// Handle --help ourselves for expanded subcommand display // Handle --help ourselves for expanded subcommand display
@ -510,3 +495,4 @@ async fn main() {
process::exit(1); process::exit(1);
} }
} }

View file

@ -3,7 +3,7 @@ use std::fs::{File, OpenOptions};
use std::io::Write; use std::io::Write;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use crate::agent::context::AstNode; use crate::agent::context::AstNode;
use crate::conversation::JsonlBackwardIter; use crate::hippocampus::transcript::JsonlBackwardIter;
use memmap2::Mmap; use memmap2::Mmap;
pub struct ConversationLog { pub struct ConversationLog {
@ -78,6 +78,6 @@ pub struct TailNodes {
impl TailNodes { impl TailNodes {
pub fn iter(&self) -> impl Iterator<Item = AstNode> + '_ { pub fn iter(&self) -> impl Iterator<Item = AstNode> + '_ {
JsonlBackwardIter::new(&self.mmap) JsonlBackwardIter::new(&self.mmap)
.filter_map(|(_, bytes)| serde_json::from_slice::<AstNode>(bytes).ok()) .filter_map(|bytes| serde_json::from_slice::<AstNode>(bytes).ok())
} }
} }

View file

@ -419,9 +419,7 @@ impl Mind {
let subconscious = Arc::new(crate::Mutex::new(Subconscious::new())); let subconscious = Arc::new(crate::Mutex::new(Subconscious::new()));
subconscious.lock().await.init_output_tool(subconscious.clone()); subconscious.lock().await.init_output_tool(subconscious.clone());
let unconscious = Arc::new(crate::Mutex::new( let unconscious = Arc::new(crate::Mutex::new(Unconscious::new()));
Unconscious::new(agent.client.clone()),
));
// Spawn the unconscious loop on its own task // Spawn the unconscious loop on its own task
if !config.no_agents { if !config.no_agents {
@ -469,11 +467,8 @@ impl Mind {
}; };
// Spawn agents outside lock // Spawn agents outside lock
let client = unc.lock().await.client.clone();
for (idx, name, auto) in to_spawn { for (idx, name, auto) in to_spawn {
match crate::mind::unconscious::prepare_spawn( match crate::mind::unconscious::prepare_spawn(&name, auto, wake.clone()).await {
&name, auto, wake.clone(), client.clone(),
).await {
Ok(result) => unc.lock().await.complete_spawn(idx, result), Ok(result) => unc.lock().await.complete_spawn(idx, result),
Err(auto) => unc.lock().await.abort_spawn(idx, auto), Err(auto) => unc.lock().await.abort_spawn(idx, auto),
} }
@ -693,7 +688,7 @@ impl Mind {
} }
}); });
let _sub_handle: Option<tokio::task::JoinHandle<()>> = None; let mut sub_handle: Option<tokio::task::JoinHandle<()>> = None;
// Start finetune scoring at startup (scores existing conversation) // Start finetune scoring at startup (scores existing conversation)
if !self.config.no_agents { if !self.config.no_agents {
@ -743,7 +738,6 @@ impl Mind {
_ = tokio::time::sleep(timeout), if !has_input => _dmn_expired = true, _ = tokio::time::sleep(timeout), if !has_input => _dmn_expired = true,
} }
/*
if !self.config.no_agents { if !self.config.no_agents {
if sub_handle.as_ref().map_or(true, |h| h.is_finished()) { if sub_handle.as_ref().map_or(true, |h| h.is_finished()) {
let sub = self.subconscious.clone(); let sub = self.subconscious.clone();
@ -755,7 +749,6 @@ impl Mind {
})); }));
} }
} }
*/
// Check for pending user input → push to agent context and start turn // Check for pending user input → push to agent context and start turn
let pending = self.shared.lock().unwrap().take_pending_input(); let pending = self.shared.lock().unwrap().take_pending_input();

View file

@ -631,7 +631,7 @@ impl Subconscious {
{ {
let mut st = forked.state.lock().await; let mut st = forked.state.lock().await;
st.provenance = auto.name.clone(); st.provenance = auto.name.clone();
st.sampling.temperature = auto.temperature; st.temperature = auto.temperature;
// Surface agent gets near-interactive priority; // Surface agent gets near-interactive priority;
// other subconscious agents get lower priority. // other subconscious agents get lower priority.
st.priority = Some(if auto.name == "surface" { 1 } else { auto.priority }); st.priority = Some(if auto.name == "surface" { 1 } else { auto.priority });

View file

@ -73,15 +73,10 @@ pub struct Unconscious {
last_health_check: Option<Instant>, last_health_check: Option<Instant>,
/// Notified when agent state changes (finished, toggled) /// Notified when agent state changes (finished, toggled)
pub wake: std::sync::Arc<tokio::sync::Notify>, pub wake: std::sync::Arc<tokio::sync::Notify>,
/// Shared API client — cloned (cheap) into each spawned agent's
/// Agent::new call so they all share the manifest cache and
/// gRPC endpoint state. Override `.model` on the clone when a
/// per-agent backend differs from the default.
pub client: crate::agent::api::ApiClient,
} }
impl Unconscious { impl Unconscious {
pub fn new(client: crate::agent::api::ApiClient) -> Self { pub fn new() -> Self {
let enabled_map = load_enabled_config(); let enabled_map = load_enabled_config();
// Scan all .agent files, exclude subconscious-* and surface-observe // Scan all .agent files, exclude subconscious-* and surface-observe
@ -125,7 +120,6 @@ impl Unconscious {
graph_health: None, graph_health: None,
last_health_check: None, last_health_check: None,
wake: std::sync::Arc::new(tokio::sync::Notify::new()), wake: std::sync::Arc::new(tokio::sync::Notify::new()),
client,
} }
} }
@ -140,8 +134,7 @@ impl Unconscious {
let agent_name = self.agents[idx].name.clone(); let agent_name = self.agents[idx].name.clone();
let auto = self.agents[idx].auto.take().unwrap(); let auto = self.agents[idx].auto.take().unwrap();
let wake = self.wake.clone(); let wake = self.wake.clone();
let client = self.client.clone(); match prepare_spawn(&agent_name, auto, wake).await {
match prepare_spawn(&agent_name, auto, wake, client).await {
Ok(result) => self.complete_spawn(idx, result), Ok(result) => self.complete_spawn(idx, result),
Err(auto) => self.abort_spawn(idx, auto), Err(auto) => self.abort_spawn(idx, auto),
} }
@ -257,12 +250,7 @@ pub struct SpawnResult {
/// Called outside the Unconscious lock. /// Called outside the Unconscious lock.
/// On success, auto is consumed (moved into spawned task). /// On success, auto is consumed (moved into spawned task).
/// On failure, auto is returned so it can be restored. /// On failure, auto is returned so it can be restored.
pub async fn prepare_spawn( pub async fn prepare_spawn(name: &str, mut auto: AutoAgent, wake: std::sync::Arc<tokio::sync::Notify>) -> Result<SpawnResult, AutoAgent> {
name: &str,
mut auto: AutoAgent,
wake: std::sync::Arc<tokio::sync::Notify>,
base_client: crate::agent::api::ApiClient,
) -> Result<SpawnResult, AutoAgent> {
dbglog!("[unconscious] spawning {}", name); dbglog!("[unconscious] spawning {}", name);
let def = match defs::get_def(name) { let def = match defs::get_def(name) {
@ -307,10 +295,8 @@ pub async fn prepare_spawn(
}; };
// Unconscious agents have self-contained prompts — no standard context. // Unconscious agents have self-contained prompts — no standard context.
// Clone the shared client so we inherit the manifest cache and let client = crate::agent::api::ApiClient::new(
// only override the model id per-agent. &resolved.api_base, &resolved.api_key, &resolved.model_id);
let mut client = base_client;
client.model = resolved.model_id.clone();
let agent = crate::agent::Agent::new( let agent = crate::agent::Agent::new(
client, Vec::new(), client, Vec::new(),
app, None, app, None,
@ -321,7 +307,7 @@ pub async fn prepare_spawn(
let mut st = agent.state.lock().await; let mut st = agent.state.lock().await;
st.provenance = auto.name.clone(); st.provenance = auto.name.clone();
st.priority = Some(auto.priority); st.priority = Some(auto.priority);
st.sampling.temperature = auto.temperature; st.temperature = auto.temperature;
} }
let agent_clone = agent.clone(); let agent_clone = agent.clone();
@ -343,9 +329,8 @@ impl Unconscious {
self.reap_finished(); self.reap_finished();
let to_spawn = self.select_to_spawn(); let to_spawn = self.select_to_spawn();
let wake = self.wake.clone(); let wake = self.wake.clone();
let client = self.client.clone();
for (idx, name, auto) in to_spawn { for (idx, name, auto) in to_spawn {
match prepare_spawn(&name, auto, wake.clone(), client.clone()).await { match prepare_spawn(&name, auto, wake.clone()).await {
Ok(result) => self.complete_spawn(idx, result), Ok(result) => self.complete_spawn(idx, result),
Err(auto) => self.abort_spawn(idx, auto), Err(auto) => self.abort_spawn(idx, auto),
} }

View file

@ -64,12 +64,7 @@ impl HookSession {
/// Load from POC_SESSION_ID environment variable /// Load from POC_SESSION_ID environment variable
pub fn from_env() -> Option<Self> { pub fn from_env() -> Option<Self> {
let session_id = std::env::var("POC_SESSION_ID").ok()?; Self::from_id(std::env::var("POC_SESSION_ID").ok()?)
let mut session = Self::from_id(session_id)?;
if let Ok(path) = std::env::var("POC_TRANSCRIPT_PATH") {
session.transcript_path = path;
}
Some(session)
} }
/// Get the seen set for this session /// Get the seen set for this session

View file

@ -1,4 +1,4 @@
#!/usr/bin/env bash #!/bin/bash
# Bail if another agent is in the same phase-group as us. # Bail if another agent is in the same phase-group as us.
# #
# $1 = our pid file name (e.g. "pid-12345") # $1 = our pid file name (e.g. "pid-12345")

View file

@ -390,7 +390,7 @@ fn resolve_conversation(budget: Option<usize>) -> String {
if !transcript.exists() { return String::new(); } if !transcript.exists() { return String::new(); }
let Some(iter) = crate::conversation::TailMessages::open(&transcript.path) else { let Some(iter) = crate::transcript::TailMessages::open(&transcript.path) else {
return String::new(); return String::new();
}; };
@ -401,14 +401,10 @@ fn resolve_conversation(budget: Option<usize>) -> String {
let mut total_bytes = 0; let mut total_bytes = 0;
let mut oldest_ts = String::new(); let mut oldest_ts = String::new();
for message in iter { for (role, content, ts) in iter {
if total_bytes >= max_bytes { break; } if total_bytes >= max_bytes { break; }
let content = message.text; let name = if role == "user" { &app.user_name } else { &app.assistant_name };
let name = match message.role { let formatted = if !ts.is_empty() {
crate::conversation::TranscriptRole::User => &app.user_name,
crate::conversation::TranscriptRole::Assistant => &app.assistant_name,
};
let formatted = if let Some(ts) = message.timestamp {
oldest_ts = ts[..ts.floor_char_boundary(ts.len().min(19))].to_string(); oldest_ts = ts[..ts.floor_char_boundary(ts.len().min(19))].to_string();
format!("**{}** {}: {}", name, &oldest_ts, content) format!("**{}** {}: {}", name, &oldest_ts, content)
} else { } else {

View file

@ -4,10 +4,8 @@
// given a context prefix and a skip predicate, generate what the model // given a context prefix and a skip predicate, generate what the model
// would say as the next assistant turn. // would say as the next assistant turn.
use std::sync::Arc;
use crate::agent::api::{ApiClient, SamplingParams, StreamToken}; use crate::agent::api::{ApiClient, SamplingParams, StreamToken};
use crate::agent::context::{AstNode, ContextState, WireChunk}; use crate::agent::context::{AstNode, ContextState};
use crate::agent::tokenizer; use crate::agent::tokenizer;
/// Generate an assistant continuation from the context up to `entry_idx`, /// Generate an assistant continuation from the context up to `entry_idx`,
@ -15,9 +13,6 @@ use crate::agent::tokenizer;
/// assembly. The model is whichever `client` points at — the default /// assembly. The model is whichever `client` points at — the default
/// runtime client for memory-ablation alternates, a test-model client /// runtime client for memory-ablation alternates, a test-model client
/// for F7 comparison. /// for F7 comparison.
///
/// Uses a fresh ephemeral gRPC session (no cross-call KV reuse): one
/// Open / Append / Generate round-trip, then the session is dropped.
pub async fn gen_continuation<F>( pub async fn gen_continuation<F>(
context: &ContextState, context: &ContextState,
entry_idx: usize, entry_idx: usize,
@ -26,32 +21,17 @@ pub async fn gen_continuation<F>(
) -> anyhow::Result<String> ) -> anyhow::Result<String>
where F: FnMut(&AstNode) -> bool, where F: FnMut(&AstNode) -> bool,
{ {
let (mut chunks, images) = context.wire_chunks(0..entry_idx, skip); let (mut prompt, images, _) = context.wire_prompt(0..entry_idx, skip);
// Assistant-turn prologue. prompt.push(tokenizer::IM_START);
let prologue = { prompt.extend(tokenizer::encode("assistant\n"));
let mut t = vec![tokenizer::IM_START];
t.extend(tokenizer::encode("assistant\n"));
t
};
match chunks.last_mut() {
Some(WireChunk::Tokens(last)) => last.extend(prologue),
_ => chunks.push(WireChunk::Tokens(prologue)),
}
let sampling = SamplingParams { let sampling = SamplingParams {
temperature: 0.6, temperature: 0.6,
top_p: 0.95, top_p: 0.95,
top_k: 20, top_k: 20,
max_tokens: 4096,
}; };
let (mut rx, _guard) = client.stream_completion_mm(&prompt, &images, sampling, Some(-5));
// Ephemeral per-call session — opens on first touch, drops when
// `_guard` drops at function end.
let session_lock = Arc::new(crate::Mutex::new(None));
let (mut rx, _guard) = client.stream_session_mm(
session_lock, chunks, images, 0, sampling, Some(-5), None,
);
let mut tokens = Vec::new(); let mut tokens = Vec::new();
while let Some(tok) = rx.recv().await { while let Some(tok) = rx.recv().await {

View file

@ -1,148 +1,100 @@
// learn.rs — Memory importance scoring over the salience gRPC protocol. // training.rs — Memory importance scoring via /v1/score
// //
// Three scoring modes, all built on call_score(): // Three scoring modes, all built on the same call_score() primitive:
// //
// score_memories() — Full N×M matrix (memories × responses) for the // score_memories() — Full N×M matrix (memories × responses) for the
// debug screen. Expensive: N+1 sessions/calls. // debug screen. Expensive: N+1 API calls.
// //
// score_memory() — Single memory importance. Scores the 50 messages // memory_score() — Single memory importance. Scores the 50 messages
// after it was surfaced, with/without that memory. // after it was surfaced, with/without that memory.
// 2 calls. // 2 API calls.
// //
// finetune_score() — Identifies training candidates. Scores recent // finetune_score() — Identifies training candidates. Scores recent
// messages with all memories stripped. Responses // messages with all memories stripped. Responses
// with high divergence depend on memories the model // with high divergence depend on memories the model
// hasn't internalized. 2 calls. // hasn't internalized. 2 API calls.
//
// Each call opens an ephemeral gRPC session (reusing the shared
// tonic Channel on `ApiClient`), pushes the prompt through as
// interleaved tokens + AppendImage calls, runs Generate with
// max_tokens=0 + logprobs_ranges over the scored positions, collects
// each Token event's sampled_logprob, then drops the SessionHandle —
// which triggers a best-effort CloseSession over the shared channel.
use std::sync::Arc; use std::sync::Arc;
use crate::agent::api::ApiClient; use crate::agent::api::ApiClient;
use crate::agent::api::salience::{SessionHandle, pb};
use crate::agent::context::{ use crate::agent::context::{
Ast, AstNode, ContextState, Role, WireChunk, WireImage, Ast, AstNode, ContextState, Role, WireImage,
is_assistant, is_memory_node, memory_key, render_branch_text, render_prior_context, is_assistant, is_memory_node, memory_key, render_branch_text, render_prior_context,
}; };
use crate::agent::tokenizer;
use crate::mind::{MindState, MindTriggered, TaskHandle}; use crate::mind::{MindState, MindTriggered, TaskHandle};
use crate::subconscious::generate::gen_continuation; use crate::subconscious::generate::gen_continuation;
const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
// ── Score API ─────────────────────────────────────────────────── // ── Score API ───────────────────────────────────────────────────
#[derive(Debug, Clone)] #[derive(serde::Deserialize)]
struct ScoreResult { struct ScoreResult {
total_logprob: f64, total_logprob: f64,
} }
/// Find each <|vision_start|>...<|vision_end|> run in the flat prompt #[derive(serde::Deserialize)]
/// and pair it with the matching entry in `images`. Returns a list struct ScoreResponse {
/// of `ImageAttachment` with absolute pad-range positions, ready scores: Vec<ScoreResult>,
/// to drop into `GenerateRequest.images`. }
fn pair_images_to_ranges(
prompt: &[u32], fn http_client() -> crate::agent::api::http::HttpClient {
images: &[WireImage], crate::agent::api::http::HttpClient::builder()
) -> Vec<pb::ImageAttachment> { .timeout(SCORE_TIMEOUT)
let mut out: Vec<pb::ImageAttachment> = Vec::new(); .build()
let mut cur = 0;
let mut img_idx = 0;
while cur < prompt.len() {
if prompt[cur] == tokenizer::VISION_START {
let end_rel = prompt[cur..].iter()
.position(|&t| t == tokenizer::VISION_END)
.unwrap_or_else(|| panic!(
"unmatched VISION_START at position {} in prompt", cur));
let end = cur + end_rel + 1;
let img = images.get(img_idx)
.unwrap_or_else(|| panic!(
"image index {} out of range for {} images", img_idx, images.len()));
out.push(pb::ImageAttachment {
bytes: img.bytes.clone(),
mime: img.mime.clone(),
pad_range_start: cur as u32,
pad_range_end: end as u32,
});
img_idx += 1;
cur = end;
} else {
cur += 1;
}
}
out
} }
async fn call_score( async fn call_score(
http: &crate::agent::api::http::HttpClient,
client: &ApiClient, client: &ApiClient,
prompt: &[u32], prompt: &[u32],
images: &[WireImage], images: &[WireImage],
ranges: &[(usize, usize)], ranges: &[(usize, usize)],
priority: Option<i32>, priority: Option<i32>,
) -> anyhow::Result<Vec<ScoreResult>> { ) -> anyhow::Result<Vec<ScoreResult>> {
use futures::StreamExt;
// Nothing to score — skip the round-trip. // Nothing to score — skip the round-trip.
if ranges.is_empty() { if ranges.is_empty() {
return Ok(Vec::new()); return Ok(Vec::new());
} }
let url = format!("{}/score", client.base_url());
let auth = format!("Bearer {}", client.api_key());
let mut body = serde_json::json!({
"model": client.model,
"prompt": prompt,
"score_ranges": ranges,
"logprobs": 1,
});
if !images.is_empty() {
use base64::Engine;
let b64 = base64::engine::general_purpose::STANDARD;
let uris: Vec<String> = images.iter()
.map(|img| format!("data:{};base64,{}", img.mime, b64.encode(&img.bytes)))
.collect();
body["multi_modal_data"] = serde_json::json!({ "image": uris });
}
if let Some(p) = priority {
body["priority"] = serde_json::json!(p);
}
let response = http
.send_json("POST", &url, &[
("authorization", &auth),
], &body)
.await?;
let images_pb = pair_images_to_ranges(prompt, images); let status = response.status();
let mut handle = SessionHandle::open(client).await?; let body: serde_json::Value = response.json().await?;
// Final Generate: max_tokens=0 so the server runs prefill of the if !status.is_success() {
// full prompt and emits Token events for each position covered let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("unknown error");
// by logprobs_ranges, then Done. logprob_top_k=0 means "just anyhow::bail!("score API HTTP {}: {}", status, msg);
// the sampled (prompt) token's logprob" — no top-k alternatives, }
// which is all call_score historically needed. Images attach if let Some(err) = body.get("error").and_then(|e| e.as_str()) {
// inline via `images`; the prompt already contains their pre- anyhow::bail!("score API error: {}", err);
// expanded vision blocks at the declared ranges.
let logprobs_ranges: Vec<pb::PositionRange> = ranges.iter()
.map(|(s, e)| pb::PositionRange { start: *s as u32, end: *e as u32 })
.collect();
let req = pb::GenerateRequest {
session_id: handle.session_id.clone(),
append_tokens: prompt.to_vec(),
offset: handle.committed_len,
truncating: false,
max_tokens: 0,
logprobs_ranges,
logprob_top_k: 0,
readout_ranges: Vec::new(),
temperature: 0.0,
top_p: 0.0,
top_k: 0,
stop_token_ids: Vec::new(),
priority: priority.unwrap_or(0),
images: images_pb,
};
let mut stream = handle.generate(req).await?;
let mut totals = vec![0.0f64; ranges.len()];
while let Some(event) = stream.next().await {
let event = event
.map_err(|s| anyhow::anyhow!("score Generate stream: {}", s))?;
let Some(inner) = event.event else { continue };
match inner {
pb::generate_event::Event::Token(t) => {
if !t.has_sampled_logprob { continue; }
let pos = t.position as usize;
for (i, (start, end)) in ranges.iter().enumerate() {
if pos >= *start && pos < *end {
totals[i] += t.sampled_logprob as f64;
}
}
}
pb::generate_event::Event::Done(_) => break,
}
} }
Ok(totals.into_iter() let result: ScoreResponse = serde_json::from_value(body)
.map(|total_logprob| ScoreResult { total_logprob }) .map_err(|e| anyhow::anyhow!("failed to parse score response: {}", e))?;
.collect()) Ok(result.scores)
} }
/// Compute per-position logprob divergence: how much worse the model /// Compute per-position logprob divergence: how much worse the model
@ -158,6 +110,7 @@ fn divergence(baseline: &[ScoreResult], without: &[ScoreResult]) -> Vec<f64> {
/// Score two message sets and return total divergence. /// Score two message sets and return total divergence.
async fn score_divergence<F>( async fn score_divergence<F>(
http: &crate::agent::api::http::HttpClient,
client: &ApiClient, client: &ApiClient,
context: &ContextState, context: &ContextState,
range: std::ops::Range<usize>, range: std::ops::Range<usize>,
@ -170,9 +123,9 @@ where F: FnMut(&AstNode) -> bool,
context.wire_prompt(range.clone(), |_| false); context.wire_prompt(range.clone(), |_| false);
let (without_tokens, without_images, without_ranges) = let (without_tokens, without_images, without_ranges) =
context.wire_prompt(range, skip); context.wire_prompt(range, skip);
let baseline = call_score(client, &baseline_tokens, &baseline_images, let baseline = call_score(http, client, &baseline_tokens, &baseline_images,
&baseline_ranges, priority).await?; &baseline_ranges, priority).await?;
let without = call_score(client, &without_tokens, &without_images, let without = call_score(http, client, &without_tokens, &without_images,
&without_ranges, priority).await?; &without_ranges, priority).await?;
let divs = divergence(&baseline, &without); let divs = divergence(&baseline, &without);
Ok((divs, baseline)) Ok((divs, baseline))
@ -209,13 +162,14 @@ pub async fn score_memories(
dbglog!("[scoring-full] starting: {} memories × {} responses", dbglog!("[scoring-full] starting: {} memories × {} responses",
total, response_indices.len()); total, response_indices.len());
let http = http_client();
let activity = crate::agent::start_activity(agent, "scoring: baseline").await; let activity = crate::agent::start_activity(agent, "scoring: baseline").await;
let (baseline_tokens, baseline_images, baseline_ranges) = { let (baseline_tokens, baseline_images, baseline_ranges) = {
let ctx = agent.context.lock().await; let ctx = agent.context.lock().await;
ctx.wire_prompt(0..ctx.conversation().len(), |_| false) ctx.wire_prompt(0..ctx.conversation().len(), |_| false)
}; };
let baseline = call_score(client, &baseline_tokens, &baseline_images, let baseline = call_score(&http, client, &baseline_tokens, &baseline_images,
&baseline_ranges, Some(5)).await?; &baseline_ranges, Some(5)).await?;
dbglog!("[scoring-full] baseline done ({} response scores)", baseline.len()); dbglog!("[scoring-full] baseline done ({} response scores)", baseline.len());
@ -226,7 +180,7 @@ pub async fn score_memories(
let ctx = agent.context.lock().await; let ctx = agent.context.lock().await;
ctx.wire_prompt(0..ctx.conversation().len(), |n| memory_key(n) == Some(key.as_str())) ctx.wire_prompt(0..ctx.conversation().len(), |n| memory_key(n) == Some(key.as_str()))
}; };
let row = match call_score(client, &tokens, &images, &ranges, Some(5)).await { let row = match call_score(&http, client, &tokens, &images, &ranges, Some(5)).await {
Ok(without) => { Ok(without) => {
let divs = divergence(&baseline, &without); let divs = divergence(&baseline, &without);
let max_div = divs.iter().cloned().fold(0.0f64, f64::max); let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
@ -240,23 +194,25 @@ pub async fn score_memories(
vec![0.0; baseline.len()] vec![0.0; baseline.len()]
} }
}; };
// Write this memory's scores to the live AST nodes via the // Write this memory's scores to the live AST nodes
// focused setter — keeps the AST mutation surface narrow.
{ {
let mut ctx = agent.context.lock().await; let mut ctx = agent.context.lock().await;
let mut set_count = 0; let mut set_count = 0;
for (resp_idx, &idx) in response_indices.iter().enumerate() { for (resp_idx, &idx) in response_indices.iter().enumerate() {
let Some(&score) = row.get(resp_idx) else { continue }; if idx >= ctx.conversation().len() { continue; }
let normalized = if score > 0.01 { Some(score) } else { None }; let node = &mut ctx.conversation_mut()[idx];
ctx.set_branch_memory_score( if let AstNode::Branch {
crate::agent::context::Section::Conversation, role: Role::Assistant, memory_scores, ..
idx, } = node {
&key, if let Some(&score) = row.get(resp_idx) {
normalized, if score > 0.01 {
); memory_scores.insert(key.clone(), score);
if normalized.is_some() { set_count += 1;
set_count += 1; } else {
memory_scores.remove(key.as_str());
}
}
} }
} }
@ -307,7 +263,8 @@ pub async fn score_memory(
return Ok(0.0); return Ok(0.0);
} }
let (divs, _) = score_divergence(client, context, range, let http = http_client();
let (divs, _) = score_divergence(&http, client, context, range,
|n| memory_key(n) == Some(key), Some(5)).await?; |n| memory_key(n) == Some(key), Some(5)).await?;
Ok(divs.iter().sum()) Ok(divs.iter().sum())
@ -365,6 +322,7 @@ where
// Score oldest-first // Score oldest-first
candidates.sort_by_key(|&(_, _, last)| last); candidates.sort_by_key(|&(_, _, last)| last);
let http = http_client();
let mut scored = 0; let mut scored = 0;
let entries = context.conversation(); let entries = context.conversation();
@ -399,7 +357,7 @@ where
} }
activity.update(format!("scoring: {}/{} {}", scored + 1, total, key)).await; activity.update(format!("scoring: {}/{} {}", scored + 1, total, key)).await;
match score_divergence(client, context, range, match score_divergence(&http, client, context, range,
|n| memory_key(n) == Some(key), Some(5)).await { |n| memory_key(n) == Some(key), Some(5)).await {
Ok((divs, _)) => { Ok((divs, _)) => {
let n_responses = divs.len(); let n_responses = divs.len();
@ -547,7 +505,8 @@ pub async fn score_finetune(
return Ok(Vec::new()); return Ok(Vec::new());
} }
let (divs, _) = score_divergence(client, context, range, is_memory_node, Some(5)).await?; let http = http_client();
let (divs, _) = score_divergence(&http, client, context, range, is_memory_node, Some(5)).await?;
let mut results: Vec<(usize, f64)> = response_positions.iter() let mut results: Vec<(usize, f64)> = response_positions.iter()
.enumerate() .enumerate()
@ -845,10 +804,8 @@ pub async fn send_to_train(
} }
}); });
let http = http_client();
let url = format!("{}/train", client.base_url()); let url = format!("{}/train", client.base_url());
let http = crate::agent::api::http::HttpClient::builder()
.timeout(std::time::Duration::from_secs(300))
.build();
let response = http.send_json("POST", &url, &[], &body).await?; let response = http.send_json("POST", &url, &[], &body).await?;
let status = response.status(); let status = response.status();

View file

@ -104,21 +104,22 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
item.classification, item.outlier_score)); item.classification, item.outlier_score));
} }
if let Some(community) = node.community_id { if let Some(community) = node.community_id {
out.push_str(&format!("Community: {} ", community)); out.push_str(&format!("Community: {} ", community));
} }
let deg = graph.degree(&item.key); let deg = graph.degree(&item.key);
let cc = graph.clustering_coefficient(&item.key);
// Hub-link ratio: what fraction of this node's edges go to hubs? // Hub-link ratio: what fraction of this node's edges go to hubs?
let neighbors = graph.neighbors(&item.key); let neighbors = graph.neighbors(&item.key);
let hub_links = neighbors.iter() let hub_links = neighbors.iter()
.filter(|(n, _)| graph.degree(n) >= hub_thresh) .filter(|(n, _)| graph.degree(n) >= hub_thresh)
.count(); .count();
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 }; let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
let is_hub = deg >= hub_thresh; let is_hub = deg >= hub_thresh;
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})", out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
deg, item.cc, hub_ratio * 100.0, hub_links, deg)); deg, cc, hub_ratio * 100.0, hub_links, deg));
if is_hub { if is_hub {
out.push_str(" ← THIS IS A HUB"); out.push_str(" ← THIS IS A HUB");
} else if hub_ratio > 0.6 { } else if hub_ratio > 0.6 {

View file

@ -43,7 +43,6 @@ impl ConsciousScreen {
name: format!("mem: {}", key), name: format!("mem: {}", key),
tokens: node.tokens(), tokens: node.tokens(),
content: text.clone(), content: text.clone(),
token_ids: leaf.token_ids().to_vec(),
children: Vec::new(), children: Vec::new(),
status: score.map(|s| format!("{:.2}", s)).unwrap_or_default(), status: score.map(|s| format!("{:.2}", s)).unwrap_or_default(),
}); });
@ -56,7 +55,6 @@ impl ConsciousScreen {
name: format!("Memory nodes ({})", mem_children.len()), name: format!("Memory nodes ({})", mem_children.len()),
tokens: mem_tokens, tokens: mem_tokens,
content: String::new(), content: String::new(),
token_ids: Vec::new(),
children: mem_children, children: mem_children,
status: format!("{} scored, {} unscored", scored, unscored), status: format!("{} scored, {} unscored", scored, unscored),
}); });
@ -72,13 +70,11 @@ impl ConsciousScreen {
AstNode::Leaf(leaf) => leaf.body().text().to_string(), AstNode::Leaf(leaf) => leaf.body().text().to_string(),
_ => String::new(), _ => String::new(),
}, },
token_ids: node.token_ids(),
children: match node { children: match node {
AstNode::Branch { children, .. } => children.iter() AstNode::Branch { children, .. } => children.iter()
.map(|c| SectionView { .map(|c| SectionView {
name: c.label(), tokens: c.tokens(), name: c.label(), tokens: c.tokens(),
content: match c { AstNode::Leaf(l) => l.body().text().to_string(), _ => String::new() }, content: match c { AstNode::Leaf(l) => l.body().text().to_string(), _ => String::new() },
token_ids: match c { AstNode::Leaf(l) => l.token_ids().to_vec(), _ => c.token_ids() },
children: Vec::new(), status: String::new(), children: Vec::new(), status: String::new(),
}).collect(), }).collect(),
_ => Vec::new(), _ => Vec::new(),
@ -105,7 +101,6 @@ impl ConsciousScreen {
name: format!("Conversation ({} entries)", conv_children.len()), name: format!("Conversation ({} entries)", conv_children.len()),
tokens: conv_tokens, tokens: conv_tokens,
content: String::new(), content: String::new(),
token_ids: Vec::new(),
children: conv_children, children: conv_children,
status: String::new(), status: String::new(),
}); });

View file

@ -74,7 +74,7 @@ fn truncate(s: &str, max: usize) -> String {
} }
/// A screen that can draw itself and handle input. /// A screen that can draw itself and handle input.
trait ScreenView { trait ScreenView: Send {
fn tick(&mut self, frame: &mut ratatui::Frame, area: ratatui::layout::Rect, fn tick(&mut self, frame: &mut ratatui::Frame, area: ratatui::layout::Rect,
events: &[ratatui::crossterm::event::Event], app: &mut App); events: &[ratatui::crossterm::event::Event], app: &mut App);
fn label(&self) -> &'static str; fn label(&self) -> &'static str;
@ -291,21 +291,22 @@ async fn start(cli: crate::user::CliArgs) -> Result<()> {
ui_handle.join().unwrap_or_else(|_| Err(anyhow::anyhow!("UI thread panicked"))) ui_handle.join().unwrap_or_else(|_| Err(anyhow::anyhow!("UI thread panicked")))
} }
async fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) { fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) {
let mut ag = mind.agent.state.lock().await; if let Ok(mut ag) = mind.agent.state.try_lock() {
let next = match ag.reasoning_effort.as_str() { let next = match ag.reasoning_effort.as_str() {
"none" => "low", "none" => "low",
"low" => "high", "low" => "high",
_ => "none", _ => "none",
}; };
ag.reasoning_effort = next.to_string(); ag.reasoning_effort = next.to_string();
let label = match next { let label = match next {
"none" => "off (monologue hidden)", "none" => "off (monologue hidden)",
"low" => "low (brief monologue)", "low" => "low (brief monologue)",
"high" => "high (full monologue)", "high" => "high (full monologue)",
_ => next, _ => next,
}; };
ag.notify(format!("reasoning: {}", label)); ag.notify(format!("reasoning: {}", label));
}
} }
async fn hotkey_kill_processes(mind: &crate::mind::Mind) { async fn hotkey_kill_processes(mind: &crate::mind::Mind) {
@ -591,7 +592,7 @@ async fn run(
} else if key.modifiers.contains(KeyModifiers::CONTROL) { } else if key.modifiers.contains(KeyModifiers::CONTROL) {
match key.code { match key.code {
KeyCode::Char('c') => { app.should_quit = true; } KeyCode::Char('c') => { app.should_quit = true; }
KeyCode::Char('r') => hotkey_cycle_reasoning(mind).await, KeyCode::Char('r') => hotkey_cycle_reasoning(mind),
KeyCode::Char('k') => hotkey_kill_processes(mind).await, KeyCode::Char('k') => hotkey_kill_processes(mind).await,
KeyCode::Char('p') => hotkey_cycle_autonomy(mind), KeyCode::Char('p') => hotkey_cycle_autonomy(mind),
_ => {} _ => {}
@ -755,11 +756,6 @@ fn restore_stderr(original_fd: std::os::fd::RawFd) {
#[tokio::main] #[tokio::main]
pub async fn main() { pub async fn main() {
// Install target-routed file logger: `target: "grpc"` records go to
// ~/.consciousness/logs/daemon/grpc.log, everything else to debug.log.
// Level from RUST_LOG, defaulting to info.
let _ = crate::logging::init();
// Reap channel-daemon zombies via a SIGCHLD handler that only touches // Reap channel-daemon zombies via a SIGCHLD handler that only touches
// PIDs listed in channels_dir(). Avoids SIGCHLD=SIG_IGN, which would // PIDs listed in channels_dir(). Avoids SIGCHLD=SIG_IGN, which would
// break tokio::process::Command::wait() (kernel auto-reap → ECHILD). // break tokio::process::Command::wait() (kernel auto-reap → ECHILD).

View file

@ -207,7 +207,6 @@ impl SubconsciousScreen {
name: key.clone(), name: key.clone(),
tokens: 0, tokens: 0,
content: val.clone(), content: val.clone(),
token_ids: Vec::new(),
children: Vec::new(), children: Vec::new(),
status: String::new(), status: String::new(),
} }
@ -239,7 +238,6 @@ impl SubconsciousScreen {
name: format!("Conversation ({} entries)", conv_children.len()), name: format!("Conversation ({} entries)", conv_children.len()),
tokens: conv_children.iter().map(|c| c.tokens).sum(), tokens: conv_children.iter().map(|c| c.tokens).sum(),
content: String::new(), content: String::new(),
token_ids: Vec::new(),
children: conv_children, children: conv_children,
status: String::new(), status: String::new(),
}); });

View file

@ -8,18 +8,11 @@ use ratatui::{
}; };
use crate::agent::context::{AstNode, Ast, NodeBody}; use crate::agent::context::{AstNode, Ast, NodeBody};
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone)]
pub struct SectionView { pub struct SectionView {
pub name: String, pub name: String,
pub tokens: usize, pub tokens: usize,
pub content: String, pub content: String,
/// Token-id stream for this subtree, displayed in place of
/// `content` when the tree's show-tokens mode is on. Populated
/// from `leaf.token_ids()` / `node.token_ids()` for views built
/// from the AST; empty for views that don't have a corresponding
/// AST node (subconscious entries, etc.), in which case the
/// token view falls back to the text content.
pub token_ids: Vec<u32>,
pub children: Vec<SectionView>, pub children: Vec<SectionView>,
/// Extra status text shown after the token count. /// Extra status text shown after the token count.
pub status: String, pub status: String,
@ -39,7 +32,6 @@ fn node_to_view(node: &AstNode) -> SectionView {
name, name,
tokens: node.tokens(), tokens: node.tokens(),
content: leaf.body().text().to_string(), content: leaf.body().text().to_string(),
token_ids: leaf.token_ids().to_vec(),
children: Vec::new(), children: Vec::new(),
status, status,
} }
@ -52,7 +44,6 @@ fn node_to_view(node: &AstNode) -> SectionView {
name: node.label(), name: node.label(),
tokens: node.tokens(), tokens: node.tokens(),
content: String::new(), content: String::new(),
token_ids: node.token_ids(),
children: child_views, children: child_views,
status: String::new(), status: String::new(),
} }
@ -63,12 +54,10 @@ fn node_to_view(node: &AstNode) -> SectionView {
pub fn section_to_view(name: &str, nodes: &[AstNode]) -> SectionView { pub fn section_to_view(name: &str, nodes: &[AstNode]) -> SectionView {
let children: Vec<SectionView> = nodes.iter().map(|n| node_to_view(n)).collect(); let children: Vec<SectionView> = nodes.iter().map(|n| node_to_view(n)).collect();
let total_tokens: usize = nodes.iter().map(|n| n.tokens()).sum(); let total_tokens: usize = nodes.iter().map(|n| n.tokens()).sum();
let token_ids: Vec<u32> = nodes.iter().flat_map(|n| n.token_ids()).collect();
SectionView { SectionView {
name: name.to_string(), name: name.to_string(),
tokens: total_tokens, tokens: total_tokens,
content: String::new(), content: String::new(),
token_ids,
children, children,
status: String::new(), status: String::new(),
} }
@ -115,7 +104,7 @@ pub fn format_ts_age(ts: i64) -> String {
/// Key legend for SectionTree panes. /// Key legend for SectionTree panes.
pub fn tree_legend() -> Line<'static> { pub fn tree_legend() -> Line<'static> {
Line::styled( Line::styled(
" ↑↓:nav →/Enter:expand ←:collapse e:expand c:collapse v:toggle tokens/text PgUp/Dn ", " ↑↓:nav →/Enter:expand ←:collapse e:expand all c:collapse all PgUp/Dn Home/End ",
Style::default().fg(Color::DarkGray), Style::default().fg(Color::DarkGray),
) )
} }
@ -196,19 +185,11 @@ pub struct SectionTree {
pub selected: Option<usize>, pub selected: Option<usize>,
pub expanded: std::collections::HashSet<usize>, pub expanded: std::collections::HashSet<usize>,
pub scroll: super::scroll_pane::ScrollPaneState, pub scroll: super::scroll_pane::ScrollPaneState,
/// When true, render `token_ids` as space-separated IDs in place
/// of `content` in expanded panels. Toggled with 'v'.
pub show_tokens: bool,
} }
impl SectionTree { impl SectionTree {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self { selected: None, expanded: std::collections::HashSet::new(), scroll: super::scroll_pane::ScrollPaneState::new() }
selected: None,
expanded: std::collections::HashSet::new(),
scroll: super::scroll_pane::ScrollPaneState::new(),
show_tokens: false,
}
} }
fn total_nodes(&self, sections: &[SectionView]) -> usize { fn total_nodes(&self, sections: &[SectionView]) -> usize {
@ -283,9 +264,6 @@ impl SectionTree {
KeyCode::Char('c') => { KeyCode::Char('c') => {
self.expanded.clear(); self.expanded.clear();
} }
KeyCode::Char('v') => {
self.show_tokens = !self.show_tokens;
}
_ => {} _ => {}
} }
self.scroll_to_selected(height); self.scroll_to_selected(height);
@ -348,12 +326,7 @@ impl SectionTree {
} }
} else if has_content { } else if has_content {
let content_indent = format!("{}", " ".repeat(depth + 1)); let content_indent = format!("{}", " ".repeat(depth + 1));
let body = if self.show_tokens && !section.token_ids.is_empty() { let content_lines: Vec<&str> = section.content.lines().collect();
format_token_ids_wrapped(&section.token_ids)
} else {
section.content.clone()
};
let content_lines: Vec<&str> = body.lines().collect();
let show = content_lines.len().min(50); let show = content_lines.len().min(50);
for line in &content_lines[..show] { for line in &content_lines[..show] {
lines.push(Line::styled( lines.push(Line::styled(
@ -371,16 +344,3 @@ impl SectionTree {
} }
} }
} }
/// Format token IDs for the content panel: space-separated, wrapped
/// at 12 ids per line so they fit comfortably in a pane.
fn format_token_ids_wrapped(ids: &[u32]) -> String {
let mut out = String::new();
for (i, id) in ids.iter().enumerate() {
if i > 0 {
if i % 12 == 0 { out.push('\n'); } else { out.push(' '); }
}
out.push_str(&id.to_string());
}
out
}