Compare commits
No commits in common. "master" and "master" have entirely different histories.
49 changed files with 1180 additions and 4680 deletions
514
Cargo.lock
generated
514
Cargo.lock
generated
|
|
@ -165,39 +165,6 @@ dependencies = [
|
|||
"tree-sitter-yaml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-stream"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
|
||||
dependencies = [
|
||||
"async-stream-impl",
|
||||
"futures-core",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-stream-impl"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.89"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic"
|
||||
version = "0.6.1"
|
||||
|
|
@ -241,53 +208,6 @@ dependencies = [
|
|||
"fs_extra",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http-body-util",
|
||||
"itoa",
|
||||
"matchit",
|
||||
"memchr",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"serde",
|
||||
"sync_wrapper",
|
||||
"tower 0.5.3",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http-body-util",
|
||||
"mime",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"sync_wrapper",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.1"
|
||||
|
|
@ -571,7 +491,6 @@ dependencies = [
|
|||
"anyhow",
|
||||
"ast-grep-core",
|
||||
"ast-grep-language",
|
||||
"async-stream",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"capnp",
|
||||
|
|
@ -599,14 +518,11 @@ dependencies = [
|
|||
"notify-debouncer-mini",
|
||||
"paste",
|
||||
"peg",
|
||||
"prost",
|
||||
"protoc-bin-vendored",
|
||||
"ratatui",
|
||||
"redb",
|
||||
"regex",
|
||||
"rustls",
|
||||
"rustls-native-certs",
|
||||
"rustls-pemfile",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
|
|
@ -615,10 +531,7 @@ dependencies = [
|
|||
"tokenizers",
|
||||
"tokio",
|
||||
"tokio-rustls",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
"tui-markdown",
|
||||
"tui-textarea-2",
|
||||
"uuid",
|
||||
|
|
@ -1151,12 +1064,6 @@ version = "0.4.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
|
||||
|
||||
[[package]]
|
||||
name = "fixedbitset"
|
||||
version = "0.5.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.1.9"
|
||||
|
|
@ -1381,31 +1288,6 @@ dependencies = [
|
|||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.4.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
|
||||
dependencies = [
|
||||
"atomic-waker",
|
||||
"bytes",
|
||||
"fnv",
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"http",
|
||||
"indexmap 2.14.0",
|
||||
"slab",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.5"
|
||||
|
|
@ -1511,12 +1393,6 @@ version = "1.10.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
|
||||
|
||||
[[package]]
|
||||
name = "httpdate"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "1.9.0"
|
||||
|
|
@ -1527,11 +1403,9 @@ dependencies = [
|
|||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"h2",
|
||||
"http",
|
||||
"http-body",
|
||||
"httparse",
|
||||
"httpdate",
|
||||
"itoa",
|
||||
"pin-project-lite",
|
||||
"smallvec",
|
||||
|
|
@ -1539,19 +1413,6 @@ dependencies = [
|
|||
"want",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-timeout"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
|
||||
dependencies = [
|
||||
"hyper",
|
||||
"hyper-util",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-util"
|
||||
version = "0.1.20"
|
||||
|
|
@ -1559,17 +1420,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"libc",
|
||||
"pin-project-lite",
|
||||
"socket2 0.6.3",
|
||||
"tokio",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1630,16 +1485,6 @@ version = "0.14.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09e54e57b4c48b40f7aec75635392b12b3421fa26fe8b4332e63138ed278459c"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown 0.12.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.14.0"
|
||||
|
|
@ -2013,12 +1858,6 @@ dependencies = [
|
|||
"xml5ever",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.8.0"
|
||||
|
|
@ -2049,12 +1888,6 @@ dependencies = [
|
|||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mime"
|
||||
version = "0.3.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
|
|
@ -2105,12 +1938,6 @@ dependencies = [
|
|||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "multimap"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
|
||||
|
||||
[[package]]
|
||||
name = "new_debug_unreachable"
|
||||
version = "1.0.6"
|
||||
|
|
@ -2406,16 +2233,6 @@ dependencies = [
|
|||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "petgraph"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
|
||||
dependencies = [
|
||||
"fixedbitset 0.5.7",
|
||||
"indexmap 2.14.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.11.3"
|
||||
|
|
@ -2468,26 +2285,6 @@ dependencies = [
|
|||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517"
|
||||
dependencies = [
|
||||
"pin-project-internal",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-internal"
|
||||
version = "1.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.17"
|
||||
|
|
@ -2507,7 +2304,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"indexmap 2.14.0",
|
||||
"indexmap",
|
||||
"quick-xml",
|
||||
"serde",
|
||||
"time",
|
||||
|
|
@ -2581,122 +2378,6 @@ dependencies = [
|
|||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost"
|
||||
version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"prost-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-build"
|
||||
version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"itertools",
|
||||
"log",
|
||||
"multimap",
|
||||
"once_cell",
|
||||
"petgraph",
|
||||
"prettyplease",
|
||||
"prost",
|
||||
"prost-types",
|
||||
"regex",
|
||||
"syn 2.0.117",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-derive"
|
||||
version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-types"
|
||||
version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
|
||||
dependencies = [
|
||||
"prost",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "protoc-bin-vendored"
|
||||
version = "3.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d1c381df33c98266b5f08186583660090a4ffa0889e76c7e9a5e175f645a67fa"
|
||||
dependencies = [
|
||||
"protoc-bin-vendored-linux-aarch_64",
|
||||
"protoc-bin-vendored-linux-ppcle_64",
|
||||
"protoc-bin-vendored-linux-s390_64",
|
||||
"protoc-bin-vendored-linux-x86_32",
|
||||
"protoc-bin-vendored-linux-x86_64",
|
||||
"protoc-bin-vendored-macos-aarch_64",
|
||||
"protoc-bin-vendored-macos-x86_64",
|
||||
"protoc-bin-vendored-win32",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "protoc-bin-vendored-linux-aarch_64"
|
||||
version = "3.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c350df4d49b5b9e3ca79f7e646fde2377b199e13cfa87320308397e1f37e1a4c"
|
||||
|
||||
[[package]]
|
||||
name = "protoc-bin-vendored-linux-ppcle_64"
|
||||
version = "3.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a55a63e6c7244f19b5c6393f025017eb5d793fd5467823a099740a7a4222440c"
|
||||
|
||||
[[package]]
|
||||
name = "protoc-bin-vendored-linux-s390_64"
|
||||
version = "3.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1dba5565db4288e935d5330a07c264a4ee8e4a5b4a4e6f4e83fad824cc32f3b0"
|
||||
|
||||
[[package]]
|
||||
name = "protoc-bin-vendored-linux-x86_32"
|
||||
version = "3.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8854774b24ee28b7868cd71dccaae8e02a2365e67a4a87a6cd11ee6cdbdf9cf5"
|
||||
|
||||
[[package]]
|
||||
name = "protoc-bin-vendored-linux-x86_64"
|
||||
version = "3.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b38b07546580df720fa464ce124c4b03630a6fb83e05c336fea2a241df7e5d78"
|
||||
|
||||
[[package]]
|
||||
name = "protoc-bin-vendored-macos-aarch_64"
|
||||
version = "3.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89278a9926ce312e51f1d999fee8825d324d603213344a9a706daa009f1d8092"
|
||||
|
||||
[[package]]
|
||||
name = "protoc-bin-vendored-macos-x86_64"
|
||||
version = "3.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81745feda7ccfb9471d7a4de888f0652e806d5795b61480605d4943176299756"
|
||||
|
||||
[[package]]
|
||||
name = "protoc-bin-vendored-win32"
|
||||
version = "3.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95067976aca6421a523e491fce939a3e65249bac4b977adee0ee9771568e8aa3"
|
||||
|
||||
[[package]]
|
||||
name = "pulldown-cmark"
|
||||
version = "0.13.3"
|
||||
|
|
@ -2752,8 +2433,6 @@ version = "0.8.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha 0.3.1",
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
|
|
@ -2763,20 +2442,10 @@ version = "0.9.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
|
||||
dependencies = [
|
||||
"rand_chacha 0.9.0",
|
||||
"rand_chacha",
|
||||
"rand_core 0.9.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.9.0"
|
||||
|
|
@ -2792,9 +2461,6 @@ name = "rand_core"
|
|||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom 0.2.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
|
|
@ -3043,15 +2709,6 @@ dependencies = [
|
|||
"security-framework",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls-pemfile"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
|
||||
dependencies = [
|
||||
"rustls-pki-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls-pki-types"
|
||||
version = "1.14.0"
|
||||
|
|
@ -3174,7 +2831,7 @@ version = "1.0.149"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
|
||||
dependencies = [
|
||||
"indexmap 2.14.0",
|
||||
"indexmap",
|
||||
"itoa",
|
||||
"memchr",
|
||||
"serde",
|
||||
|
|
@ -3278,16 +2935,6 @@ version = "0.3.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.5.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.6.3"
|
||||
|
|
@ -3402,12 +3049,6 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sync_wrapper"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
|
||||
|
||||
[[package]]
|
||||
name = "syntect"
|
||||
version = "5.3.0"
|
||||
|
|
@ -3486,7 +3127,7 @@ dependencies = [
|
|||
"fancy-regex",
|
||||
"filedescriptor",
|
||||
"finl_unicode",
|
||||
"fixedbitset 0.4.2",
|
||||
"fixedbitset",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
|
|
@ -3646,7 +3287,7 @@ dependencies = [
|
|||
"parking_lot",
|
||||
"pin-project-lite",
|
||||
"signal-hook-registry",
|
||||
"socket2 0.6.3",
|
||||
"socket2",
|
||||
"tokio-macros",
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
|
@ -3672,17 +3313,6 @@ dependencies = [
|
|||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-stream"
|
||||
version = "0.1.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-util"
|
||||
version = "0.7.18"
|
||||
|
|
@ -3697,130 +3327,6 @@ dependencies = [
|
|||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"h2",
|
||||
"http",
|
||||
"http-body",
|
||||
"http-body-util",
|
||||
"hyper",
|
||||
"hyper-timeout",
|
||||
"hyper-util",
|
||||
"percent-encoding",
|
||||
"pin-project",
|
||||
"prost",
|
||||
"rustls-native-certs",
|
||||
"rustls-pemfile",
|
||||
"socket2 0.5.10",
|
||||
"tokio",
|
||||
"tokio-rustls",
|
||||
"tokio-stream",
|
||||
"tower 0.4.13",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic-build"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11"
|
||||
dependencies = [
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"prost-build",
|
||||
"prost-types",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.4.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"indexmap 1.9.3",
|
||||
"pin-project",
|
||||
"pin-project-lite",
|
||||
"rand 0.8.5",
|
||||
"slab",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
"sync_wrapper",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-layer"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
|
||||
|
||||
[[package]]
|
||||
name = "tower-service"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
|
||||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
version = "0.1.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
|
||||
dependencies = [
|
||||
"pin-project-lite",
|
||||
"tracing-attributes",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter"
|
||||
version = "0.26.8"
|
||||
|
|
@ -4379,7 +3885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"indexmap 2.14.0",
|
||||
"indexmap",
|
||||
"wasm-encoder",
|
||||
"wasmparser",
|
||||
]
|
||||
|
|
@ -4392,7 +3898,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
|
|||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"hashbrown 0.15.5",
|
||||
"indexmap 2.14.0",
|
||||
"indexmap",
|
||||
"semver",
|
||||
]
|
||||
|
||||
|
|
@ -4761,7 +4267,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"heck",
|
||||
"indexmap 2.14.0",
|
||||
"indexmap",
|
||||
"prettyplease",
|
||||
"syn 2.0.117",
|
||||
"wasm-metadata",
|
||||
|
|
@ -4792,7 +4298,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"bitflags 2.11.0",
|
||||
"indexmap 2.14.0",
|
||||
"indexmap",
|
||||
"log",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
|
|
@ -4811,7 +4317,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"id-arena",
|
||||
"indexmap 2.14.0",
|
||||
"indexmap",
|
||||
"log",
|
||||
"semver",
|
||||
"serde",
|
||||
|
|
|
|||
11
Cargo.toml
11
Cargo.toml
|
|
@ -18,9 +18,6 @@ name = "consciousness"
|
|||
version.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
[features]
|
||||
nightly-diagnostics = []
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
html2md = "0.2"
|
||||
|
|
@ -64,11 +61,6 @@ futures = "0.3"
|
|||
capnp = "0.25"
|
||||
capnp-rpc = "0.25"
|
||||
|
||||
tonic = { version = "0.12", features = ["tls", "tls-roots"] }
|
||||
prost = "0.13"
|
||||
async-stream = "0.3"
|
||||
tokio-stream = "0.1"
|
||||
|
||||
tokenizers = "0.22"
|
||||
|
||||
http = "1"
|
||||
|
|
@ -82,13 +74,10 @@ imagesize = "0.14"
|
|||
rustls = "0.23"
|
||||
tokio-rustls = "0.26"
|
||||
rustls-native-certs = "0.8"
|
||||
rustls-pemfile = "2"
|
||||
serde_urlencoded = "0.7"
|
||||
|
||||
[build-dependencies]
|
||||
capnpc = "0.25"
|
||||
tonic-build = { version = "0.12", default-features = false, features = ["prost", "transport"] }
|
||||
protoc-bin-vendored = "3"
|
||||
|
||||
[lib]
|
||||
name = "consciousness"
|
||||
|
|
|
|||
17
build.rs
17
build.rs
|
|
@ -13,21 +13,4 @@ fn main() {
|
|||
.file("schema/channel.capnp")
|
||||
.run()
|
||||
.expect("capnp compile failed (channel.capnp)");
|
||||
|
||||
// Generate salience.v1 gRPC client + message types from proto.
|
||||
// Server side (python) is generated separately via grpcio-tools.
|
||||
// Use vendored protoc so we don't require a system install.
|
||||
let protoc = protoc_bin_vendored::protoc_bin_path()
|
||||
.expect("vendored protoc not available for this platform");
|
||||
// SAFETY: build script is single-threaded at this point; setting env
|
||||
// before invoking tonic_build is the documented way to point it at a
|
||||
// non-PATH protoc.
|
||||
unsafe { std::env::set_var("PROTOC", protoc); }
|
||||
tonic_build::configure()
|
||||
.build_server(false)
|
||||
.build_client(true)
|
||||
.compile_protos(&["proto/salience.proto"], &["proto"])
|
||||
.expect("tonic_build compile failed (salience.proto)");
|
||||
|
||||
println!("cargo:rerun-if-changed=proto/salience.proto");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -181,8 +181,6 @@ struct TelegramMessage {
|
|||
chat_id: i64,
|
||||
sender: String,
|
||||
text: String,
|
||||
/// Absolute path to a downloaded media file (photo, etc.), if any.
|
||||
media_path: Option<String>,
|
||||
}
|
||||
|
||||
/// Fetch and parse pending updates from Telegram via long polling.
|
||||
|
|
@ -208,115 +206,19 @@ async fn get_updates(
|
|||
let sender = msg["from"]["first_name"].as_str().unwrap_or("unknown").to_string();
|
||||
let chat_id = msg["chat"]["id"].as_i64().unwrap_or(0);
|
||||
|
||||
// Photo: array of PhotoSize, largest is last. Download largest,
|
||||
// surface message with [image: <path>] marker so the multimodal
|
||||
// model can Read the image.
|
||||
let (text, media_path) = if let Some(sizes) = msg["photo"].as_array() {
|
||||
let caption = msg["caption"].as_str().unwrap_or("").to_string();
|
||||
let largest = sizes.last();
|
||||
let file_id = largest
|
||||
.and_then(|s| s["file_id"].as_str())
|
||||
.unwrap_or("");
|
||||
if file_id.is_empty() {
|
||||
error!("telegram photo: missing file_id in update {update_id}");
|
||||
(caption, None)
|
||||
} else {
|
||||
// Bound the download — HttpClient::request_timeout only covers
|
||||
// send_request, not body collect, so an indefinitely-slow body
|
||||
// would otherwise stall every subsequent poll.
|
||||
let dl = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(60),
|
||||
download_telegram_file(client, token, file_id),
|
||||
).await
|
||||
.unwrap_or_else(|_| Err("download timed out after 60s".into()));
|
||||
match dl {
|
||||
Ok(path) => (caption, Some(path)),
|
||||
Err(e) => {
|
||||
error!("telegram photo download failed (file_id={file_id}): {e}");
|
||||
// Surface what we have: caption plus a marker that
|
||||
// a photo was sent but couldn't be fetched.
|
||||
let marker = format!("[image: download failed: {e}]");
|
||||
let combined = if caption.is_empty() {
|
||||
marker
|
||||
} else {
|
||||
format!("{marker}\n{caption}")
|
||||
};
|
||||
(combined, None)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if let Some(text) = msg["text"].as_str() {
|
||||
(text.to_string(), None)
|
||||
} else {
|
||||
// Other media types (voice, video, sticker, etc.) — skip for now,
|
||||
// but log so we can extend later.
|
||||
let kind = ["voice", "video", "sticker", "document", "audio", "animation"]
|
||||
.iter()
|
||||
.find(|k| !msg[**k].is_null())
|
||||
.copied()
|
||||
.unwrap_or("unknown");
|
||||
info!("telegram: skipping non-text/photo message (kind={kind}, update_id={update_id})");
|
||||
continue;
|
||||
};
|
||||
|
||||
if let Some(text) = msg["text"].as_str() {
|
||||
messages.push(TelegramMessage {
|
||||
update_id,
|
||||
chat_id,
|
||||
sender,
|
||||
text,
|
||||
media_path,
|
||||
text: text.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
/// Resolve a Telegram file_id to a downloadable URL path via getFile.
|
||||
async fn get_file_path(
|
||||
client: &HttpClient,
|
||||
token: &str,
|
||||
file_id: &str,
|
||||
) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let url = format!(
|
||||
"https://api.telegram.org/bot{}/getFile?file_id={}",
|
||||
token, file_id,
|
||||
);
|
||||
let response = client.get(&url).await?;
|
||||
let body = response.text().await?;
|
||||
let resp: serde_json::Value = serde_json::from_str(&body)
|
||||
.map_err(|e| format!("getFile JSON parse error: {e}"))?;
|
||||
if !resp["ok"].as_bool().unwrap_or(false) {
|
||||
return Err(format!("getFile failed: {}", resp["description"].as_str().unwrap_or("?")).into());
|
||||
}
|
||||
let file_path = resp["result"]["file_path"].as_str()
|
||||
.ok_or("getFile: missing result.file_path")?;
|
||||
Ok(file_path.to_string())
|
||||
}
|
||||
|
||||
/// Download a Telegram file by file_id into the channel media dir.
|
||||
/// Returns the absolute local path on success.
|
||||
async fn download_telegram_file(
|
||||
client: &HttpClient,
|
||||
token: &str,
|
||||
file_id: &str,
|
||||
) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let file_path = get_file_path(client, token, file_id).await?;
|
||||
let url = format!("https://api.telegram.org/file/bot{}/{}", token, file_path);
|
||||
let response = client.get(&url).await?;
|
||||
let status = response.status();
|
||||
if !status.is_success() {
|
||||
return Err(format!("file download failed: {status}").into());
|
||||
}
|
||||
let bytes = response.bytes().await?;
|
||||
|
||||
let ext = file_path.rsplit('.').next().filter(|e| !e.contains('/')).unwrap_or("dat");
|
||||
let media_dir = log_dir().join("media");
|
||||
std::fs::create_dir_all(&media_dir)?;
|
||||
let dest = media_dir.join(format!("{file_id}.{ext}"));
|
||||
std::fs::write(&dest, &bytes)?;
|
||||
Ok(dest.to_string_lossy().to_string())
|
||||
}
|
||||
|
||||
/// Send a text message to a Telegram chat.
|
||||
async fn send_message(
|
||||
client: &HttpClient,
|
||||
|
|
@ -467,19 +369,11 @@ async fn poll_once(
|
|||
let sender_lower = msg.sender.to_lowercase();
|
||||
let channel = format!("telegram.{}", sender_lower);
|
||||
|
||||
// If the message has media, prepend an [image: <abs_path>] marker
|
||||
// so the multimodal model can Read the file directly.
|
||||
let body = match &msg.media_path {
|
||||
Some(path) if msg.text.is_empty() => format!("[image: {path}]"),
|
||||
Some(path) => format!("[image: {path}]\n{}", msg.text),
|
||||
None => msg.text.clone(),
|
||||
};
|
||||
|
||||
channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &body);
|
||||
channel_log::append_disk_log(&log_dir(), &sender_lower, &msg.sender, &msg.text);
|
||||
|
||||
let mut s = state.borrow_mut();
|
||||
s.config.chat_ids.insert(sender_lower, msg.chat_id);
|
||||
let line = format!("[{}] {}", msg.sender, body);
|
||||
let line = format!("[{}] {}", msg.sender, msg.text);
|
||||
s.push_message(line, 2, &channel);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,12 +26,10 @@ use consciousness::thalamus::channel_log::ChannelLog;
|
|||
|
||||
#[derive(Clone, serde::Serialize, serde::Deserialize)]
|
||||
struct PaneConfig {
|
||||
/// Human-readable label: becomes the channel name "tmux.<label>",
|
||||
/// and the tmux pane title / window name the live pane id is
|
||||
/// resolved from. The pane id is deliberately not stored — it is
|
||||
/// ephemeral (recycled across pane and tmux-server restarts), so it
|
||||
/// is looked up fresh on every connect attempt.
|
||||
/// Human-readable label, becomes the channel name "tmux.<label>"
|
||||
label: String,
|
||||
/// Tmux pane ID, e.g. "%5"
|
||||
pane_id: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, serde::Serialize, serde::Deserialize)]
|
||||
|
|
@ -88,9 +86,11 @@ impl State {
|
|||
}
|
||||
}
|
||||
|
||||
/// Whether a pane with this label is registered.
|
||||
fn has_pane(&self, label: &str) -> bool {
|
||||
self.config.panes.iter().any(|p| p.label == label)
|
||||
/// Get pane_id for a label
|
||||
fn get_pane(&self, label: &str) -> Option<&str> {
|
||||
self.config.panes.iter()
|
||||
.find(|p| p.label == label)
|
||||
.map(|p| p.pane_id.as_str())
|
||||
}
|
||||
|
||||
/// Check if a pane is connected
|
||||
|
|
@ -103,124 +103,98 @@ impl State {
|
|||
self.connected.insert(label.to_string(), connected);
|
||||
}
|
||||
|
||||
/// Register a pane and persist.
|
||||
fn add_pane(&mut self, label: String) {
|
||||
/// Add a pane and persist
|
||||
fn add_pane(&mut self, label: String, pane_id: String) {
|
||||
if !self.config.panes.iter().any(|p| p.label == label) {
|
||||
self.config.panes.push(PaneConfig { label });
|
||||
self.config.panes.push(PaneConfig { label, pane_id });
|
||||
save_config(&self.config);
|
||||
}
|
||||
}
|
||||
|
||||
/// Unregister a pane and persist. Returns whether it was registered.
|
||||
fn remove_pane(&mut self, label: &str) -> bool {
|
||||
/// Remove a pane and persist
|
||||
fn remove_pane(&mut self, label: &str) -> Option<String> {
|
||||
if let Some(idx) = self.config.panes.iter().position(|p| p.label == label) {
|
||||
self.config.panes.remove(idx);
|
||||
let pane = self.config.panes.remove(idx);
|
||||
self.connected.remove(label);
|
||||
save_config(&self.config);
|
||||
true
|
||||
Some(pane.pane_id)
|
||||
} else {
|
||||
false
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Pipe-Pane Reader ──────────────────────────────────────────
|
||||
|
||||
/// Wait between connect attempts for a pane that is not yet reachable.
|
||||
const RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_secs(2);
|
||||
|
||||
/// Keep a pane streamed into its channel log for as long as it stays
|
||||
/// registered. The pane id is resolved fresh by label on every connect
|
||||
/// attempt — tmux pane ids are ephemeral, so the label (pane title /
|
||||
/// window name) is the durable identity. Retries until the pane exists
|
||||
/// and pipe-pane succeeds, and reconnects the same way if the pipe
|
||||
/// later drops. Returns once close() unregisters the pane.
|
||||
async fn pipe_pane_reader(state: SharedState, label: String) {
|
||||
/// Set up pipe-pane for a single pane, reading output into the channel log.
|
||||
async fn pipe_pane_reader(state: SharedState, pane: PaneConfig) {
|
||||
let pipe_dir = dirs::home_dir()
|
||||
.unwrap_or_default()
|
||||
.join(".consciousness/channels/tmux-pipes");
|
||||
std::fs::create_dir_all(&pipe_dir).ok();
|
||||
let pipe_path = pipe_dir.join(format!("{}.pipe", label));
|
||||
let channel_key = format!("tmux.{}", label);
|
||||
|
||||
loop {
|
||||
if !state.borrow().has_pane(&label) {
|
||||
return;
|
||||
}
|
||||
let pipe_path = pipe_dir.join(format!("{}.pipe", pane.label));
|
||||
let _ = std::fs::remove_file(&pipe_path);
|
||||
|
||||
connect_and_stream(&state, &label, &pipe_path, &channel_key).await;
|
||||
state.borrow_mut().set_connected(&label, false);
|
||||
|
||||
if !state.borrow().has_pane(&label) {
|
||||
return;
|
||||
}
|
||||
tokio::time::sleep(RETRY_INTERVAL).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// One connect attempt: resolve the pane's live id by label, point its
|
||||
/// output at the FIFO with pipe-pane, and stream lines into the channel
|
||||
/// log. Returns on the first failure, or when the stream ends.
|
||||
async fn connect_and_stream(
|
||||
state: &SharedState,
|
||||
label: &str,
|
||||
pipe_path: &std::path::Path,
|
||||
channel_key: &str,
|
||||
) {
|
||||
let pane_id = match find_pane_by_name(label) {
|
||||
Some(id) => id,
|
||||
None => return,
|
||||
};
|
||||
|
||||
// Fresh FIFO for this attempt.
|
||||
let _ = std::fs::remove_file(pipe_path);
|
||||
// Create a named pipe (FIFO)
|
||||
unsafe {
|
||||
let c_path = std::ffi::CString::new(pipe_path.to_str().unwrap()).unwrap();
|
||||
libc::mkfifo(c_path.as_ptr(), 0o644);
|
||||
}
|
||||
|
||||
// Point the pane's output at our FIFO.
|
||||
let pipe_cmd = format!("cat >> {}", pipe_path.to_string_lossy());
|
||||
match std::process::Command::new("tmux")
|
||||
.args(["pipe-pane", "-t", &pane_id, &pipe_cmd])
|
||||
.output()
|
||||
{
|
||||
Ok(o) if o.status.success() => {}
|
||||
Ok(o) => {
|
||||
warn!("pipe-pane failed for {} ({}): {}", label, pane_id,
|
||||
String::from_utf8_lossy(&o.stderr));
|
||||
// Tell tmux to pipe this pane's output to our FIFO
|
||||
let pipe_path_str = pipe_path.to_string_lossy().to_string();
|
||||
let result = std::process::Command::new("tmux")
|
||||
.args(["pipe-pane", "-t", &pane.pane_id, &format!("cat >> {}", pipe_path_str)])
|
||||
.output();
|
||||
|
||||
match result {
|
||||
Ok(output) if output.status.success() => {
|
||||
info!("pipe-pane set up for {} ({})", pane.label, pane.pane_id);
|
||||
}
|
||||
Ok(output) => {
|
||||
error!("pipe-pane failed for {}: {}", pane.label,
|
||||
String::from_utf8_lossy(&output.stderr));
|
||||
state.borrow_mut().set_connected(&pane.label, false);
|
||||
return;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("running tmux pipe-pane for {}: {}", label, e);
|
||||
error!("failed to run tmux pipe-pane for {}: {}", pane.label, e);
|
||||
state.borrow_mut().set_connected(&pane.label, false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let file = match tokio::fs::File::open(pipe_path).await {
|
||||
// Open the FIFO and read lines
|
||||
let file = match tokio::fs::File::open(&pipe_path).await {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
warn!("opening pipe for {}: {}", label, e);
|
||||
error!("failed to open pipe for {}: {}", pane.label, e);
|
||||
state.borrow_mut().set_connected(&pane.label, false);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
info!("connected channel tmux.{} (pane {})", label, pane_id);
|
||||
state.borrow_mut().set_connected(label, true);
|
||||
// Mark as connected once pipe is open
|
||||
state.borrow_mut().set_connected(&pane.label, true);
|
||||
|
||||
let reader = tokio::io::BufReader::new(file);
|
||||
let mut lines = reader.lines();
|
||||
let channel_key = format!("tmux.{}", pane.label);
|
||||
|
||||
let mut lines = tokio::io::BufReader::new(file).lines();
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let mut s = state.borrow_mut();
|
||||
s.channel_logs
|
||||
.entry(channel_key.to_string())
|
||||
.or_insert_with(ChannelLog::new)
|
||||
.push(line);
|
||||
let log = s.channel_logs
|
||||
.entry(channel_key.clone())
|
||||
.or_insert_with(ChannelLog::new);
|
||||
log.push(line);
|
||||
}
|
||||
|
||||
warn!("pipe-pane stream ended for {}", label);
|
||||
warn!("pipe-pane reader ended for {}", pane.label);
|
||||
state.borrow_mut().set_connected(&pane.label, false);
|
||||
}
|
||||
|
||||
// ── ChannelServer Implementation ───────────────────────────────
|
||||
|
|
@ -270,10 +244,10 @@ impl channel_server::Server for ChannelServerImpl {
|
|||
let channel = pry!(pry!(params.get_channel()).to_str()).to_string();
|
||||
let message = pry!(pry!(params.get_message()).to_str()).to_string();
|
||||
|
||||
// Send to tmux pane via send-keys — resolve the live pane id by
|
||||
// label (it is not stored).
|
||||
// Send to tmux pane via send-keys
|
||||
let label = channel.strip_prefix("tmux.").unwrap_or(&channel);
|
||||
if let Some(pane_id) = find_pane_by_name(label) {
|
||||
let pane_id = self.state.borrow().get_pane(label).map(String::from);
|
||||
if let Some(pane_id) = pane_id {
|
||||
let _ = std::process::Command::new("tmux")
|
||||
.args(["send-keys", "-t", &pane_id, &message, "Enter"])
|
||||
.output();
|
||||
|
|
@ -328,22 +302,28 @@ impl channel_server::Server for ChannelServerImpl {
|
|||
let params = pry!(params.get());
|
||||
let label = pry!(pry!(params.get_label()).to_str()).to_string();
|
||||
|
||||
// Already registered — nothing to do.
|
||||
if self.state.borrow().has_pane(&label) {
|
||||
// Check if already open
|
||||
if self.state.borrow().get_pane(&label).is_some() {
|
||||
return std::future::ready(Ok(()));
|
||||
}
|
||||
|
||||
info!("opening channel tmux.{}", label);
|
||||
// Find the tmux pane by name (window or pane title)
|
||||
let pane_id = match find_pane_by_name(&label) {
|
||||
Some(id) => id,
|
||||
None => return std::future::ready(Err(capnp::Error::failed(
|
||||
format!("no tmux pane named '{}'", label)))),
|
||||
};
|
||||
|
||||
// Register the label and persist. The pane id is not stored —
|
||||
// the reader resolves it by label on every connect attempt, so
|
||||
// this succeeds even if the pane does not exist yet; the reader
|
||||
// connects once it appears.
|
||||
self.state.borrow_mut().add_pane(label.clone());
|
||||
info!("opening channel tmux.{} (pane {})", label, pane_id);
|
||||
|
||||
// Register in state and persist
|
||||
self.state.borrow_mut().add_pane(label.clone(), pane_id.clone());
|
||||
|
||||
// Start pipe-pane reader
|
||||
let pane = PaneConfig { label, pane_id };
|
||||
let reader_state = self.state.clone();
|
||||
tokio::task::spawn_local(async move {
|
||||
pipe_pane_reader(reader_state, label).await;
|
||||
pipe_pane_reader(reader_state, pane).await;
|
||||
});
|
||||
|
||||
std::future::ready(Ok(()))
|
||||
|
|
@ -359,19 +339,15 @@ impl channel_server::Server for ChannelServerImpl {
|
|||
let label = channel.strip_prefix("tmux.").unwrap_or(&channel).to_string();
|
||||
|
||||
let mut s = self.state.borrow_mut();
|
||||
if s.remove_pane(&label) {
|
||||
if let Some(pane_id) = s.remove_pane(&label) {
|
||||
info!("closing channel tmux.{}", label);
|
||||
s.channel_logs.remove(&format!("tmux.{}", label));
|
||||
|
||||
// Stop piping if the pane is still around (if it is gone the
|
||||
// pipe is already dead). The reader then sees the pane
|
||||
// unregistered and exits.
|
||||
if let Some(pane_id) = find_pane_by_name(&label) {
|
||||
// Disconnect pipe-pane
|
||||
let _ = std::process::Command::new("tmux")
|
||||
.args(["pipe-pane", "-t", &pane_id])
|
||||
.output();
|
||||
}
|
||||
}
|
||||
|
||||
std::future::ready(Ok(()))
|
||||
}
|
||||
|
|
@ -421,13 +397,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
|
||||
tokio::task::LocalSet::new()
|
||||
.run_until(async move {
|
||||
// Start a pipe-pane reader for each configured pane; each
|
||||
// resolves its live pane id by label and retries until
|
||||
// connected.
|
||||
// Start a pipe-pane reader for each configured pane
|
||||
for pane in state.borrow().config.panes.clone() {
|
||||
let reader_state = state.clone();
|
||||
tokio::task::spawn_local(async move {
|
||||
pipe_pane_reader(reader_state, pane.label).await;
|
||||
pipe_pane_reader(reader_state, pane).await;
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
27
flake.lock
generated
27
flake.lock
generated
|
|
@ -1,27 +0,0 @@
|
|||
{
|
||||
"nodes": {
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1781074563,
|
||||
"narHash": "sha256-md8WlXOlfnIeHeOScMTTHFyf2d6iaTwPl2apR5EQ3P4=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "9ae611a455b90cf061d8f332b977e387bda8e1ca",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
||||
42
flake.nix
42
flake.nix
|
|
@ -1,42 +0,0 @@
|
|||
{
|
||||
description = "Development shell for consciousness";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
};
|
||||
|
||||
outputs = { nixpkgs, ... }:
|
||||
let
|
||||
systems = [
|
||||
"x86_64-linux"
|
||||
"aarch64-linux"
|
||||
];
|
||||
forAllSystems = nixpkgs.lib.genAttrs systems;
|
||||
in
|
||||
{
|
||||
devShells = forAllSystems (system:
|
||||
let
|
||||
pkgs = import nixpkgs { inherit system; };
|
||||
in
|
||||
{
|
||||
default = pkgs.mkShell {
|
||||
packages = with pkgs; [
|
||||
cargo
|
||||
rustc
|
||||
rustfmt
|
||||
clippy
|
||||
rust-analyzer
|
||||
|
||||
capnproto
|
||||
pkg-config
|
||||
|
||||
jq
|
||||
sqlite
|
||||
python3
|
||||
];
|
||||
|
||||
RUST_BACKTRACE = "1";
|
||||
};
|
||||
});
|
||||
};
|
||||
}
|
||||
|
|
@ -1,276 +0,0 @@
|
|||
// salience.proto — stateful generation + per-token concept readout over gRPC.
|
||||
//
|
||||
// Shape:
|
||||
// - One server-streaming RPC (Generate) for inference. Every other
|
||||
// operation is unary. This is the minimum streaming we need —
|
||||
// tokens arrive one at a time with optional readouts / logprobs —
|
||||
// and keeping everything else unary makes the client dramatically
|
||||
// simpler than a single bidi state machine did.
|
||||
//
|
||||
// - Server-side sessions hold the token list and image binaries.
|
||||
// Sessions exist for bandwidth: at 200K tokens we'd otherwise
|
||||
// re-ship ~800KB every turn, which hurts badly over a WAN link.
|
||||
// vLLM's prefix cache holds the KV; the session just gives the
|
||||
// client a handle so it can send deltas.
|
||||
//
|
||||
// - The client is the source of truth for prompt content. The server
|
||||
// is the source of truth for image token expansion (how many
|
||||
// IMAGE_PAD tokens an image becomes under this model). The client
|
||||
// never writes vision tokens itself — AppendImage appends the whole
|
||||
// <|vision_start|> + IMAGE_PAD×N + <|vision_end|> block server-side.
|
||||
//
|
||||
// - Every mutation carries (offset, truncating): the client's view of
|
||||
// the server's current length, plus whether the client is deliberately
|
||||
// rewriting history. Server validates on each call and rejects drift.
|
||||
// No silent divergence, no migration bugs.
|
||||
//
|
||||
// - Errors use gRPC status codes. NOT_FOUND for missing sessions,
|
||||
// FAILED_PRECONDITION for offset drift or image-block splits,
|
||||
// RESOURCE_EXHAUSTED for context overflow, ABORTED for "session busy".
|
||||
//
|
||||
// Not in v1:
|
||||
// - Authentication beyond a shared bearer token in gRPC metadata.
|
||||
// - Multi-tenant session namespacing.
|
||||
// - Sampling traces beyond top-k logprobs.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package salience.v1;
|
||||
|
||||
// ============================================================
|
||||
// Service
|
||||
// ============================================================
|
||||
|
||||
service Salience {
|
||||
// Create a fresh session. Client uses session_id on every subsequent
|
||||
// RPC until CloseSession or TTL eviction (default 30 min idle). To
|
||||
// refresh TTL across a long pause, issue a no-op Generate (empty
|
||||
// append_tokens, max_tokens=0, no ranges).
|
||||
rpc OpenSession(OpenSessionRequest) returns (OpenSessionResponse);
|
||||
|
||||
// Release the session's tokens + images. Idempotent.
|
||||
rpc CloseSession(CloseSessionRequest) returns (CloseSessionResponse);
|
||||
|
||||
// Branch a session at a given token position. The new session
|
||||
// inherits tokens [0, at_position) and any images whose vision
|
||||
// block lies fully in that range. Rejected with FAILED_PRECONDITION
|
||||
// if at_position falls inside an image block (client picks a clean
|
||||
// boundary).
|
||||
rpc ForkSession(ForkSessionRequest) returns (ForkSessionResponse);
|
||||
|
||||
// Prefill + optionally decode. Images are attached inline via
|
||||
// `GenerateRequest.images`; the client writes its own pre-expanded
|
||||
// <|vision_start|> + N*<|image_pad|> + <|vision_end|> runs into
|
||||
// `append_tokens` and declares each run's range in `images[i]`.
|
||||
// Server validates run length against the actual vision-encoder
|
||||
// feature count and returns INVALID_ARGUMENT on mismatch. Stream
|
||||
// yields Token events (with optional readouts / logprobs per
|
||||
// position) followed by a terminating Done.
|
||||
rpc Generate(GenerateRequest) returns (stream GenerateEvent);
|
||||
|
||||
// Readout manifest for the currently-loaded model — concept names,
|
||||
// layer indices, tensor dtype. Stateless; fetch once at client
|
||||
// startup and cache.
|
||||
rpc GetReadoutManifest(GetReadoutManifestRequest) returns (ReadoutManifest);
|
||||
|
||||
// Dump the full token stream of a session. Debug-only: used by the
|
||||
// client to verify its local accounting against the server's
|
||||
// session.tokens byte-for-byte when divergence is suspected. Not
|
||||
// cheap — copies the whole sequence across the wire.
|
||||
rpc DumpSession(DumpSessionRequest) returns (DumpSessionResponse);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Lifecycle
|
||||
// ============================================================
|
||||
|
||||
message OpenSessionRequest {
|
||||
// Model identifier, must match vLLM's served model. The server
|
||||
// only has one model loaded; this is a safety check on what the
|
||||
// client thinks it's talking to.
|
||||
string model = 1;
|
||||
}
|
||||
|
||||
message OpenSessionResponse {
|
||||
string session_id = 1;
|
||||
uint32 max_model_len = 2;
|
||||
}
|
||||
|
||||
message CloseSessionRequest {
|
||||
string session_id = 1;
|
||||
}
|
||||
|
||||
message CloseSessionResponse {}
|
||||
|
||||
message ForkSessionRequest {
|
||||
string session_id = 1; // source session
|
||||
uint32 at_position = 2; // new session inherits tokens [0, at_position)
|
||||
}
|
||||
|
||||
message ForkSessionResponse {
|
||||
string session_id = 1; // new session
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Inference
|
||||
// ============================================================
|
||||
|
||||
// One image attached to a Generate call. The client is responsible
|
||||
// for writing the expanded placeholder run (VISION_START +
|
||||
// N*IMAGE_PAD + VISION_END) into `GenerateRequest.append_tokens` at
|
||||
// positions [pad_range_start, pad_range_end) and pairing it with
|
||||
// the corresponding `ImageAttachment` entry. Server validates that
|
||||
// the declared range's pad count matches what the vision encoder
|
||||
// produces, and returns INVALID_ARGUMENT if they disagree.
|
||||
message ImageAttachment {
|
||||
// Image bytes (PNG / JPEG / WebP / …).
|
||||
bytes bytes = 1;
|
||||
|
||||
// MIME type, e.g. "image/png".
|
||||
string mime = 2;
|
||||
|
||||
// Absolute token positions (in `session.tokens` AFTER `append_tokens`
|
||||
// is applied) spanning the full vision block — `[vision_start,
|
||||
// pad*N, vision_end]`. end is exclusive, so end - start == N + 2.
|
||||
uint32 pad_range_start = 3;
|
||||
uint32 pad_range_end = 4;
|
||||
}
|
||||
|
||||
message GenerateRequest {
|
||||
string session_id = 1;
|
||||
|
||||
// Tokens to append before prefill. May be empty. Client writes the
|
||||
// full vision block (VISION_START + N*IMAGE_PAD + VISION_END) for
|
||||
// any newly-attached image directly into this stream; each such
|
||||
// block must be paired with a matching entry in `images`. The
|
||||
// server validates that the declared ranges all point at IMAGE_PAD
|
||||
// runs and that each run's length matches what the vision encoder
|
||||
// produces for the corresponding image.
|
||||
repeated uint32 append_tokens = 2;
|
||||
|
||||
// Client's view of session.tokens length at the time of the call.
|
||||
// Must equal server's actual length, OR be strictly less when
|
||||
// truncating=true (server rewinds before appending). Any other
|
||||
// mismatch is FAILED_PRECONDITION.
|
||||
uint32 offset = 3;
|
||||
bool truncating = 4;
|
||||
|
||||
// Decode budget. 0 = prefill only (no decode, emit Token events
|
||||
// for positions covered by logprobs_ranges / readout_ranges, then
|
||||
// Done; replaces the old /score endpoint). >0 = decode up to this
|
||||
// many tokens, stopping early on EOS / stop_token_ids.
|
||||
uint32 max_tokens = 5;
|
||||
|
||||
// Position ranges (absolute, within the session's post-append
|
||||
// token list) at which to emit logprobs on Token events. Empty =
|
||||
// no logprobs. `logprob_top_k > 0` returns the top-k alternative
|
||||
// tokens at each covered position; `logprob_top_k == 0` returns
|
||||
// only the sampled-token's logprob.
|
||||
repeated PositionRange logprobs_ranges = 6;
|
||||
uint32 logprob_top_k = 7;
|
||||
|
||||
// Position ranges at which to emit concept-readout vectors. Empty
|
||||
// = no readouts. Logical shape per position is
|
||||
// [n_layers][n_concepts] — see GetReadoutManifest.
|
||||
repeated PositionRange readout_ranges = 8;
|
||||
|
||||
// Sampling parameters. Meaningful only when max_tokens > 0.
|
||||
float temperature = 9; // default 1.0 when zero
|
||||
float top_p = 10; // default 1.0 when zero
|
||||
uint32 top_k = 11; // default 0 (disabled)
|
||||
repeated uint32 stop_token_ids = 12;
|
||||
|
||||
// vLLM scheduler priority (0 = interactive, 10 = batch).
|
||||
int32 priority = 13;
|
||||
|
||||
// Images newly attached on this call. Each entry describes one
|
||||
// image's binary bytes, its mime type, and the exact token-position
|
||||
// range of its pre-expanded placeholder run inside `session.tokens`
|
||||
// after `append_tokens` is applied. See `ImageAttachment`.
|
||||
repeated ImageAttachment images = 14;
|
||||
}
|
||||
|
||||
message PositionRange {
|
||||
uint32 start = 1; // inclusive
|
||||
uint32 end = 2; // exclusive
|
||||
}
|
||||
|
||||
message GenerateEvent {
|
||||
oneof event {
|
||||
Token token = 1;
|
||||
GenerateDone done = 2;
|
||||
}
|
||||
}
|
||||
|
||||
message Token {
|
||||
// Token id at this position. For prefill this is the prompt token;
|
||||
// for decode it's the sampled token.
|
||||
uint32 id = 1;
|
||||
|
||||
// Absolute position in the session's token list.
|
||||
uint32 position = 2;
|
||||
|
||||
// True for prefill positions, false for decode.
|
||||
bool is_prefill = 3;
|
||||
|
||||
// Concept readout at this position. Empty if the position wasn't
|
||||
// covered by readout_ranges.
|
||||
repeated float readout = 4 [packed = true];
|
||||
|
||||
// Top-k alternative tokens' logprobs at this position — populated
|
||||
// when the position is covered by logprobs_ranges and
|
||||
// logprob_top_k > 0.
|
||||
repeated TokenLogprob logprobs = 5;
|
||||
|
||||
// Logprob of the token at `position` (the prompt token for
|
||||
// prefill, the sampled token for decode). Populated when the
|
||||
// position is covered by logprobs_ranges.
|
||||
float sampled_logprob = 6;
|
||||
bool has_sampled_logprob = 7;
|
||||
}
|
||||
|
||||
message TokenLogprob {
|
||||
uint32 id = 1;
|
||||
float logprob = 2;
|
||||
}
|
||||
|
||||
message GenerateDone {
|
||||
uint32 prompt_tokens = 1;
|
||||
uint32 completion_tokens = 2;
|
||||
uint32 total_tokens = 3;
|
||||
|
||||
enum FinishReason {
|
||||
FINISH_REASON_UNSPECIFIED = 0;
|
||||
FINISH_REASON_EOS = 1; // emitted EOS / stop token
|
||||
FINISH_REASON_LENGTH = 2; // hit max_tokens
|
||||
FINISH_REASON_CANCELLED = 3; // client cancelled
|
||||
FINISH_REASON_STOP_STRING = 4; // matched a stop string
|
||||
}
|
||||
FinishReason finish_reason = 4;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Readout manifest
|
||||
// ============================================================
|
||||
|
||||
message GetReadoutManifestRequest {}
|
||||
|
||||
message ReadoutManifest {
|
||||
repeated string concepts = 1;
|
||||
repeated uint32 layers = 2;
|
||||
uint32 hidden_size = 3;
|
||||
string dtype = 4;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Debug
|
||||
// ============================================================
|
||||
|
||||
message DumpSessionRequest {
|
||||
string session_id = 1;
|
||||
}
|
||||
|
||||
message DumpSessionResponse {
|
||||
// The full session.tokens sequence, verbatim.
|
||||
repeated uint32 tokens = 1 [packed = true];
|
||||
}
|
||||
|
|
@ -1,327 +0,0 @@
|
|||
"""Quantize Qwen3.6-27B (multimodal) to FP8 for vLLM serving.
|
||||
|
||||
Why this exists
|
||||
---------------
|
||||
The earlier `quantize_qwen3_6.py` (in shell history, never committed)
|
||||
loaded the model with `AutoModelForCausalLM`, which silently strips
|
||||
the multimodal arch. Result: an FP8 checkpoint with no vision tower
|
||||
weights at all. vLLM happily instantiated the vision tower from the
|
||||
config and ran it with default/uninitialized weights, producing
|
||||
gibberish image features and `!!!!!!`-style output. We chased that
|
||||
through the protocol layer for a long time before tracing it back
|
||||
to the quant. This script avoids that trap by loading via the
|
||||
config-declared class explicitly.
|
||||
|
||||
Recipe
|
||||
------
|
||||
FP8_DYNAMIC (per-channel weight scales, per-token dynamic activation
|
||||
scales, both E4M3) for Linear weights, with an `ignore` list derived
|
||||
from Unsloth's UD-Q8_K_XL (`unsloth/Qwen3.6-27B-GGUF`). Their
|
||||
sensitivity sweep flagged specific layers as quantization-fragile;
|
||||
we honor those layer indices even though their algorithm is
|
||||
GGUF-native Q8_K and ours is FP8 — sensitivity is a layer property,
|
||||
not an algorithm property.
|
||||
|
||||
vLLM fusion constraint
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
vLLM's Qwen3.5/3.6 model code fuses sub-modules at load time:
|
||||
qkv_proj ← q_proj, k_proj, v_proj
|
||||
gate_up_proj ← gate_proj, up_proj
|
||||
in_proj_qkvz ← in_proj_qkv, in_proj_z
|
||||
in_proj_ba ← in_proj_b, in_proj_a
|
||||
compressed_tensors rejects checkpoints where sub-modules of a fused
|
||||
layer have different quantization schemes. Our ignore list is shaped
|
||||
around this — within any fused layer, all components share a scheme.
|
||||
That's the reason `in_proj_qkv` is ignored even though Unsloth's
|
||||
sweep doesn't single it out, and the reason late-stack attn override
|
||||
covers q/k/v rather than just q/k.
|
||||
|
||||
MTP merge
|
||||
---------
|
||||
`Qwen3_5ForConditionalGeneration` doesn't expose the MTP submodule,
|
||||
so `oneshot()` produces a checkpoint with the 15 `mtp.*` tensors
|
||||
silently dropped. After quantization we read the MTP weights back
|
||||
out of the upstream cached snapshot and splice them into the saved
|
||||
safetensors at BF16. They're small (~850 MB) so quantizing them
|
||||
isn't worth the calibration risk; speculative-decoding code paths
|
||||
in vLLM expect the MTP head present.
|
||||
|
||||
Output
|
||||
------
|
||||
`OUTPUT_DIR` gets the FP8 model.safetensors + config + processor +
|
||||
recipe.yaml. Vision tower stays BF16 (in `ignore`); LM Linears go
|
||||
to FP8; norms, SSM internals (not Linear), and MTP tensors stay
|
||||
BF16 untouched.
|
||||
|
||||
Verification at end: re-opens the saved safetensors and asserts
|
||||
- vision .weight tensors present (>= 150; full count is 167)
|
||||
- lm_head + embed_tokens at fp16/bf16 (NOT FP8)
|
||||
- a sampled FP8'd Linear actually has float8 dtype
|
||||
- 15 mtp.* tensors present
|
||||
|
||||
Run
|
||||
---
|
||||
~/vllm-venv/bin/python quantize_qwen3_6_mm.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import glob
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from huggingface_hub import snapshot_download
|
||||
from llmcompressor import oneshot
|
||||
from llmcompressor.modifiers.quantization import QuantizationModifier
|
||||
from safetensors import safe_open
|
||||
from safetensors.torch import save_file
|
||||
from transformers import AutoProcessor
|
||||
from transformers.models.qwen3_5.modeling_qwen3_5 import (
|
||||
Qwen3_5ForConditionalGeneration,
|
||||
)
|
||||
|
||||
|
||||
MODEL = "Qwen/Qwen3.6-27B"
|
||||
OUTPUT_DIR = "/home/ubuntu/amygdala-training/Qwen3.6-27B-FP8-mm"
|
||||
|
||||
|
||||
# Layers Unsloth's UD-Q8_K_XL keeps at F16 (perplexity-sensitive
|
||||
# in their sweep). Late-stack clustering is consistent with the
|
||||
# general finding that errors near the output propagate directly
|
||||
# to logits.
|
||||
LATE_FFN_LAYERS = (50, 51, 59, 62, 63)
|
||||
LATE_ATTN_LAYERS = (51, 59, 63)
|
||||
|
||||
|
||||
# Build the ignore regex list. Note: llmcompressor matches these
|
||||
# patterns against MODULE names (no `.weight` suffix) when walking
|
||||
# `named_modules()` for `targets=["Linear"]`. The first pass of
|
||||
# this script used `\.weight$` patterns and silently quantized
|
||||
# lm_head + every linear_attn projection — verified post-hoc by
|
||||
# inspecting the saved safetensors. Patterns now anchor on `$`
|
||||
# at the module name.
|
||||
IGNORE_PATTERNS: list[str] = [
|
||||
# Original recipe: lm_head and embeddings always full-precision.
|
||||
# (embed_tokens is an Embedding, not a Linear, so it's already
|
||||
# ignored by `targets=["Linear"]`. Pattern kept as belt-and-
|
||||
# suspenders in case future llmcompressor versions widen the
|
||||
# target set.)
|
||||
"re:lm_head$",
|
||||
"re:.*embed_tokens$",
|
||||
|
||||
# Vision tower — entire `model.visual.*` subtree (vision
|
||||
# transformer blocks + merger + patch_embed + pos_embed).
|
||||
# Unsloth ships the vision tower as a separate `mmproj-BF16.gguf`
|
||||
# for GGUF consumers; in our single-file FP8 setup we just leave
|
||||
# them at BF16.
|
||||
"re:model\\.visual\\..*",
|
||||
|
||||
# MTP (multi-token prediction) module — Unsloth's GGUF doesn't
|
||||
# carry MTP weights so we have no precision signal from them;
|
||||
# safest to keep BF16.
|
||||
"re:mtp\\..*",
|
||||
|
||||
# Linear-attention block — keep ENTIRELY at BF16. vLLM fuses
|
||||
# `in_proj_qkv` and `in_proj_z` into a single `in_proj_qkvz`
|
||||
# layer, and compressed_tensors rejects mixed schemes within a
|
||||
# fused layer. Unsloth's recipe keeps z, a, b, out at F16/F32
|
||||
# (gate/SSM internals are quantization-fragile in the GatedDeltaNet
|
||||
# update), so the principled choice is to also keep `in_proj_qkv`
|
||||
# at BF16 rather than FP8'ing the gate to match. We give up ~1 GB
|
||||
# of FP8 coverage; in exchange we follow Unsloth's quality intent
|
||||
# and load cleanly under vLLM. (`in_proj_a` + `in_proj_b` are
|
||||
# likewise fused as `in_proj_ba` — both ignored, consistent.)
|
||||
"re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_qkv$",
|
||||
"re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_z$",
|
||||
"re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_a$",
|
||||
"re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.in_proj_b$",
|
||||
"re:model\\.language_model\\.layers\\.\\d+\\.linear_attn\\.out_proj$",
|
||||
|
||||
# Per-layer high-precision MLP (Unsloth flagged exactly these
|
||||
# late-stack indices in their UD-Q8_K_XL sensitivity sweep, all
|
||||
# three of {gate, up, down} per layer). vLLM fuses gate+up into
|
||||
# `gate_up_proj`; ignoring both keeps the fused layer consistent.
|
||||
# `down_proj` is its own (non-fused) layer.
|
||||
"re:model\\.language_model\\.layers\\.("
|
||||
+ "|".join(str(n) for n in LATE_FFN_LAYERS)
|
||||
+ ")\\.mlp\\.(down|gate|up)_proj$",
|
||||
|
||||
# Per-layer high-precision attention q/k/v (Unsloth's sweep upgrades
|
||||
# only q and k; we extend to v because vLLM fuses q/k/v into
|
||||
# `qkv_proj` and rejects mixed schemes. `o_proj` is its own
|
||||
# non-fused layer and stays at FP8.
|
||||
"re:model\\.language_model\\.layers\\.("
|
||||
+ "|".join(str(n) for n in LATE_ATTN_LAYERS)
|
||||
+ ")\\.self_attn\\.(q|k|v)_proj$",
|
||||
]
|
||||
|
||||
|
||||
def main() -> None:
|
||||
print(f"Loading {MODEL} as multimodal "
|
||||
f"(Qwen3_5ForConditionalGeneration)...", flush=True)
|
||||
model = Qwen3_5ForConditionalGeneration.from_pretrained(
|
||||
MODEL,
|
||||
dtype=torch.bfloat16,
|
||||
device_map="auto",
|
||||
trust_remote_code=True,
|
||||
)
|
||||
print(f" loaded: {model.__class__.__name__}", flush=True)
|
||||
|
||||
print(f"Loading processor (text + image preprocessing)...", flush=True)
|
||||
processor = AutoProcessor.from_pretrained(MODEL, trust_remote_code=True)
|
||||
|
||||
print("Running FP8_DYNAMIC oneshot quantization...", flush=True)
|
||||
print(f" ignore list: {len(IGNORE_PATTERNS)} patterns",
|
||||
flush=True)
|
||||
recipe = QuantizationModifier(
|
||||
targets=["Linear"],
|
||||
scheme="FP8_DYNAMIC",
|
||||
ignore=IGNORE_PATTERNS,
|
||||
)
|
||||
oneshot(model=model, recipe=recipe, output_dir=OUTPUT_DIR)
|
||||
processor.save_pretrained(OUTPUT_DIR)
|
||||
print(f" wrote model + processor to {OUTPUT_DIR}", flush=True)
|
||||
|
||||
merge_mtp(OUTPUT_DIR)
|
||||
verify_output(OUTPUT_DIR)
|
||||
|
||||
|
||||
def merge_mtp(out_dir: str) -> None:
|
||||
"""Splice upstream MTP tensors into the saved FP8 safetensors.
|
||||
|
||||
`Qwen3_5ForConditionalGeneration` skips the MTP submodule on load,
|
||||
so oneshot's output is missing the 15 `mtp.*` tensors. We resolve
|
||||
the upstream snapshot via the HF cache (already populated by
|
||||
from_pretrained), pull just the MTP tensors out at BF16, and
|
||||
rewrite the safetensors with them merged in. The compressed_tensors
|
||||
metadata header (which carries the FP8 format identifier vLLM
|
||||
needs to dequantize) is preserved verbatim.
|
||||
|
||||
Atomic-rename is used so a crash mid-write doesn't corrupt the
|
||||
33+ GB checkpoint we just spent minutes producing.
|
||||
"""
|
||||
print("\nMerging upstream MTP tensors...", flush=True)
|
||||
upstream_dir = Path(snapshot_download(
|
||||
MODEL,
|
||||
allow_patterns=["model.safetensors.index.json",
|
||||
"model-*-of-*.safetensors"],
|
||||
))
|
||||
|
||||
with open(upstream_dir / "model.safetensors.index.json") as f:
|
||||
idx = json.load(f)
|
||||
mtp_shards = sorted({v for k, v in idx["weight_map"].items()
|
||||
if k.startswith("mtp.")})
|
||||
print(f" MTP tensors live in shards: {mtp_shards}", flush=True)
|
||||
|
||||
mtp_tensors: dict[str, torch.Tensor] = {}
|
||||
for shard in mtp_shards:
|
||||
with safe_open(upstream_dir / shard, framework="pt") as f:
|
||||
for k in f.keys():
|
||||
if k.startswith("mtp."):
|
||||
mtp_tensors[k] = f.get_tensor(k).contiguous()
|
||||
mtp_bytes = sum(t.numel() * t.element_size()
|
||||
for t in mtp_tensors.values())
|
||||
print(f" loaded {len(mtp_tensors)} mtp tensors "
|
||||
f"({mtp_bytes/1e6:.1f} MB)", flush=True)
|
||||
|
||||
fp8_files = sorted(Path(out_dir).glob("*.safetensors"))
|
||||
if len(fp8_files) != 1:
|
||||
sys.exit(f"FAIL: expected single safetensors shard, "
|
||||
f"got {fp8_files}")
|
||||
existing_path = fp8_files[0]
|
||||
|
||||
with safe_open(existing_path, framework="pt") as f:
|
||||
metadata = f.metadata() or {}
|
||||
all_tensors = {k: f.get_tensor(k) for k in f.keys()}
|
||||
|
||||
overlap = set(all_tensors) & set(mtp_tensors)
|
||||
if overlap:
|
||||
sys.exit(f"FAIL: MTP key collision with FP8 output: "
|
||||
f"{sorted(overlap)[:5]}")
|
||||
all_tensors.update(mtp_tensors)
|
||||
|
||||
tmp_path = existing_path.with_name(existing_path.name + ".new")
|
||||
print(f" rewriting {existing_path.name} "
|
||||
f"({len(all_tensors)} tensors)...", flush=True)
|
||||
save_file(all_tensors, str(tmp_path), metadata=metadata)
|
||||
tmp_path.replace(existing_path)
|
||||
print(" done", flush=True)
|
||||
|
||||
|
||||
def verify_output(out_dir: str) -> None:
|
||||
"""Open the saved safetensors and assert the recipe actually
|
||||
landed: vision tower present at BF16, FP8 dtype on at least one
|
||||
quantized Linear, lm_head not FP8."""
|
||||
print(f"\nVerifying {out_dir}...", flush=True)
|
||||
|
||||
files = sorted(glob.glob(f"{out_dir}/*.safetensors"))
|
||||
if not files:
|
||||
sys.exit(f"FAIL: no safetensors in {out_dir}")
|
||||
|
||||
vision_keys: list[tuple[str, str]] = []
|
||||
fp8_sample: tuple[str, str] | None = None
|
||||
lm_head_dtype: str | None = None
|
||||
mtp_keys: list[str] = []
|
||||
|
||||
for fp in files:
|
||||
with safe_open(fp, framework="pt") as f:
|
||||
for k in f.keys():
|
||||
if k.startswith("mtp."):
|
||||
mtp_keys.append(k)
|
||||
# Some FP8 quants write a sibling `_scale` / `_zero_point`;
|
||||
# we just care about the .weight tensors.
|
||||
if not k.endswith(".weight"):
|
||||
continue
|
||||
t = f.get_tensor(k)
|
||||
dtype = str(t.dtype).replace("torch.", "")
|
||||
if "model.visual." in k:
|
||||
vision_keys.append((k, dtype))
|
||||
if k == "lm_head.weight":
|
||||
lm_head_dtype = dtype
|
||||
if (fp8_sample is None
|
||||
and "float8" in dtype
|
||||
and "language_model.layers" in k):
|
||||
fp8_sample = (k, dtype)
|
||||
|
||||
# Qwen3.6-27B has 167 vision `.weight` tensors (333 vision tensors
|
||||
# total, the rest are `.bias` and per-block norms). 150 is a
|
||||
# sanity floor that catches "vision tower didn't make it through"
|
||||
# without being brittle to minor arch revisions.
|
||||
if len(vision_keys) < 150:
|
||||
sys.exit(f"FAIL: only {len(vision_keys)} vision tensors found "
|
||||
f"(expected >= 150). Vision tower didn't make it "
|
||||
f"through the quant.")
|
||||
|
||||
bad_vision = [(k, d) for k, d in vision_keys if "float8" in d]
|
||||
if bad_vision:
|
||||
sys.exit(f"FAIL: vision weights got quantized to FP8: "
|
||||
f"{bad_vision[:3]}...")
|
||||
|
||||
if lm_head_dtype is None:
|
||||
sys.exit("FAIL: lm_head.weight not found in output.")
|
||||
if "float8" in lm_head_dtype:
|
||||
sys.exit(f"FAIL: lm_head.weight is FP8 ({lm_head_dtype}); "
|
||||
f"should be BF16/FP16.")
|
||||
|
||||
if fp8_sample is None:
|
||||
sys.exit("FAIL: no FP8 weights found in language_model.layers — "
|
||||
"the recipe didn't quantize anything.")
|
||||
|
||||
# Upstream Qwen3.6-27B has exactly 15 mtp.* tensors (1 fused
|
||||
# transformer block + projection + norms). merge_mtp() should
|
||||
# have spliced all of them in.
|
||||
if len(mtp_keys) != 15:
|
||||
sys.exit(f"FAIL: expected 15 mtp.* tensors, found "
|
||||
f"{len(mtp_keys)}. merge_mtp() missed some.")
|
||||
|
||||
print(f" ✓ {len(vision_keys)} vision tensors at "
|
||||
f"{vision_keys[0][1]} (not FP8)")
|
||||
print(f" ✓ lm_head.weight at {lm_head_dtype} (not FP8)")
|
||||
print(f" ✓ FP8 sample: {fp8_sample[0]} = {fp8_sample[1]}")
|
||||
print(f" ✓ {len(mtp_keys)} mtp.* tensors present")
|
||||
print("DONE")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -100,7 +100,7 @@ impl HttpClient {
|
|||
.map_err(|e| anyhow::anyhow!("invalid server name: {e}"))?;
|
||||
let connector = tokio_rustls::TlsConnector::from(self.tls.clone());
|
||||
let tls = connector.connect(server_name.to_owned(), tcp).await
|
||||
.map_err(|e| anyhow::anyhow!("TLS handshake to {host}: {e}"))?;
|
||||
.context("TLS handshake")?;
|
||||
TokioIo::new(Box::new(tls) as Box<dyn IoStream>)
|
||||
} else {
|
||||
TokioIo::new(Box::new(tcp) as Box<dyn IoStream>)
|
||||
|
|
@ -154,14 +154,6 @@ impl HttpResponse {
|
|||
Ok(String::from_utf8_lossy(&bytes).into_owned())
|
||||
}
|
||||
|
||||
/// Read the entire body as raw bytes (for binary downloads).
|
||||
pub async fn bytes(self) -> Result<Bytes> {
|
||||
let bytes = self.body.collect().await
|
||||
.context("reading response body")?
|
||||
.to_bytes();
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
/// Read the entire body and deserialize as JSON.
|
||||
pub async fn json<T: serde::de::DeserializeOwned>(self) -> Result<T> {
|
||||
let bytes = self.body.collect().await
|
||||
|
|
@ -198,7 +190,6 @@ impl HttpClientBuilder {
|
|||
}
|
||||
|
||||
pub fn build(self) -> HttpClient {
|
||||
install_rustls_crypto_provider();
|
||||
let certs = rustls_native_certs::load_native_certs()
|
||||
.certs.into_iter()
|
||||
.collect::<Vec<_>>();
|
||||
|
|
@ -206,13 +197,6 @@ impl HttpClientBuilder {
|
|||
for cert in certs {
|
||||
root_store.add(cert).ok();
|
||||
}
|
||||
// Also trust any `.pem` files under `~/.consciousness/certs/` —
|
||||
// self-signed server certs for our own vllm hosts live there.
|
||||
// Drop a new `<host>.pem` in the dir to trust a new server; no
|
||||
// code change needed.
|
||||
for cert in load_user_certs() {
|
||||
root_store.add(cert).ok();
|
||||
}
|
||||
let tls = Arc::new(
|
||||
ClientConfig::builder()
|
||||
.with_root_certificates(root_store)
|
||||
|
|
@ -226,65 +210,6 @@ impl HttpClientBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
/// Install rustls' default crypto provider exactly once per process.
|
||||
/// rustls 0.23 doesn't pick one automatically when multiple features
|
||||
/// could provide it (e.g. when tonic pulls in both ring and aws-lc-rs
|
||||
/// via transitive deps). Idempotent via OnceLock; safe to call from
|
||||
/// multiple callers.
|
||||
fn install_rustls_crypto_provider() {
|
||||
static ONCE: std::sync::OnceLock<()> = std::sync::OnceLock::new();
|
||||
ONCE.get_or_init(|| {
|
||||
let _ = rustls::crypto::ring::default_provider().install_default();
|
||||
});
|
||||
}
|
||||
|
||||
/// Load every `.pem` file under `~/.consciousness/certs/` as a DER
|
||||
/// certificate and return them. Silent on missing dir, missing files,
|
||||
/// or parse errors — those are "no extra certs trusted" rather than
|
||||
/// hard failures, to keep startup robust.
|
||||
/// Load the concatenated PEM bytes of every `.pem` file under
|
||||
/// `~/.consciousness/certs/` — suitable for passing to a tonic
|
||||
/// `ClientTlsConfig::ca_certificate(Certificate::from_pem(...))` call
|
||||
/// so gRPC connections trust the same self-signed servers the HTTP
|
||||
/// path does.
|
||||
pub(crate) fn load_user_certs_pem_bytes() -> Vec<u8> {
|
||||
let mut out = Vec::new();
|
||||
let Some(home) = dirs::home_dir() else { return out };
|
||||
let dir = home.join(".consciousness").join("certs");
|
||||
let Ok(entries) = std::fs::read_dir(&dir) else { return out };
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|e| e.to_str()) != Some("pem") {
|
||||
continue;
|
||||
}
|
||||
if let Ok(bytes) = std::fs::read(&path) {
|
||||
out.extend_from_slice(&bytes);
|
||||
if !bytes.ends_with(b"\n") {
|
||||
out.push(b'\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn load_user_certs() -> Vec<rustls::pki_types::CertificateDer<'static>> {
|
||||
let mut out = Vec::new();
|
||||
let Some(home) = dirs::home_dir() else { return out };
|
||||
let dir = home.join(".consciousness").join("certs");
|
||||
let Ok(entries) = std::fs::read_dir(&dir) else { return out };
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|e| e.to_str()) != Some("pem") {
|
||||
continue;
|
||||
}
|
||||
let Ok(bytes) = std::fs::read(&path) else { continue };
|
||||
for cert in rustls_pemfile::certs(&mut bytes.as_slice()).flatten() {
|
||||
out.push(cert);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Trait alias for streams that work with hyper's IO adapter.
|
||||
trait IoStream: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static {}
|
||||
impl<T: tokio::io::AsyncRead + tokio::io::AsyncWrite + Send + Unpin + 'static> IoStream for T {}
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@
|
|||
// Set POC_DEBUG=1 for verbose per-turn logging.
|
||||
|
||||
pub mod http;
|
||||
pub mod salience;
|
||||
|
||||
use std::time::Duration;
|
||||
use std::time::{Duration, Instant};
|
||||
use anyhow::Result;
|
||||
use tokio::sync::mpsc;
|
||||
use serde::Deserialize;
|
||||
|
||||
use http::HttpClient;
|
||||
use http::{HttpClient, HttpResponse};
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Usage {
|
||||
|
|
@ -38,21 +37,6 @@ pub struct ReadoutManifest {
|
|||
/// from pairing with the manifest fetched at startup.
|
||||
pub type TokenReadout = Vec<Vec<f32>>;
|
||||
|
||||
/// Client-side sampling state. Mirrors the wire-level fields in
|
||||
/// `GenerateRequest` (proto flattened its `SamplingParams` submessage
|
||||
/// in so the server handler reads them directly), but stays as a
|
||||
/// grouped struct on the client because UI / config / tests pass
|
||||
/// these around together.
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct SamplingParams {
|
||||
pub temperature: f32,
|
||||
pub top_p: f32,
|
||||
pub top_k: u32,
|
||||
/// Decode budget. 0 = prefill only; >0 = decode up to this many
|
||||
/// tokens, stopping early on EOS / stop_token_ids.
|
||||
pub max_tokens: u32,
|
||||
}
|
||||
|
||||
/// A JoinHandle that aborts its task when dropped.
|
||||
pub(crate) struct AbortOnDrop(tokio::task::JoinHandle<()>);
|
||||
|
||||
|
|
@ -62,6 +46,13 @@ impl Drop for AbortOnDrop {
|
|||
}
|
||||
}
|
||||
|
||||
/// Sampling parameters for model generation.
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct SamplingParams {
|
||||
pub temperature: f32,
|
||||
pub top_p: f32,
|
||||
pub top_k: u32,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
// Stream events — yielded by backends, consumed by the runner
|
||||
|
|
@ -83,17 +74,6 @@ pub struct ApiClient {
|
|||
api_key: String,
|
||||
pub model: String,
|
||||
base_url: String,
|
||||
/// Cached readout manifest — fetched once per process and shared
|
||||
/// across ApiClient clones (every Agent/fork gets the same cell).
|
||||
/// `None` after fetch means the server has readout disabled (404).
|
||||
manifest: std::sync::Arc<tokio::sync::OnceCell<Option<ReadoutManifest>>>,
|
||||
/// Shared tonic Channel to the salience gRPC endpoint. Opened on
|
||||
/// first use and reused across every SessionHandle / RPC call
|
||||
/// derived from this ApiClient. tonic multiplexes concurrent
|
||||
/// requests over the HTTP/2 connection automatically.
|
||||
salience_channel: std::sync::Arc<
|
||||
tokio::sync::OnceCell<tonic::transport::Channel>
|
||||
>,
|
||||
}
|
||||
|
||||
impl ApiClient {
|
||||
|
|
@ -108,69 +88,33 @@ impl ApiClient {
|
|||
api_key: api_key.to_string(),
|
||||
model: model.to_string(),
|
||||
base_url: base_url.trim_end_matches('/').to_string(),
|
||||
manifest: std::sync::Arc::new(tokio::sync::OnceCell::new()),
|
||||
salience_channel: std::sync::Arc::new(tokio::sync::OnceCell::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a `SalienceClient` on the shared gRPC channel — opens
|
||||
/// the channel on first call and reuses it thereafter across
|
||||
/// every ApiClient clone. All scoring / inference / session
|
||||
/// RPCs flow through this single multiplexed HTTP/2 connection.
|
||||
///
|
||||
/// Bumps tonic's default 4 MiB encode/decode caps to 64 MiB on
|
||||
/// every client. Multimodal Generate requests carry pre-encoded
|
||||
/// image bytes inline (Qwen3.6's 768×768 patches at high res
|
||||
/// land around 5–8 MiB per turn), and Done events with full
|
||||
/// per-token readout vectors can also exceed 4 MiB on long runs.
|
||||
pub async fn salience_client(&self) -> Result<
|
||||
salience::pb::salience_client::SalienceClient<tonic::transport::Channel>
|
||||
> {
|
||||
let ch = self.salience_channel.get_or_try_init(|| async {
|
||||
let grpc_url = salience::derive_grpc_url(&self.base_url);
|
||||
log::debug!(target: "grpc",
|
||||
"opening shared salience channel: http_base={} -> grpc_url={}",
|
||||
self.base_url, grpc_url);
|
||||
salience::connect_channel(&grpc_url).await
|
||||
}).await?;
|
||||
const MAX_GRPC_MESSAGE_BYTES: usize = 64 * 1024 * 1024;
|
||||
Ok(salience::pb::salience_client::SalienceClient::new(ch.clone())
|
||||
.max_decoding_message_size(MAX_GRPC_MESSAGE_BYTES)
|
||||
.max_encoding_message_size(MAX_GRPC_MESSAGE_BYTES))
|
||||
}
|
||||
|
||||
/// Stream generation via a gRPC session. Walks the prompt chunks
|
||||
/// comparing against the session's `committed_len`, sends the
|
||||
/// delta as interleaved `AppendImage` + intermediate
|
||||
/// `Generate(max_tokens=0)` (for text runs separating images) +
|
||||
/// a final `Generate(max_tokens=sampling.max_tokens, ...)` whose
|
||||
/// Token events stream back through the channel.
|
||||
///
|
||||
/// On any gRPC error the session is dropped; the next call
|
||||
/// reopens fresh. Happy-path ordering: Token* Done. Error paths
|
||||
/// emit `StreamToken::Error` and close.
|
||||
pub(crate) fn stream_session_mm(
|
||||
pub(crate) fn stream_completion_mm(
|
||||
&self,
|
||||
session_lock: std::sync::Arc<crate::Mutex<Option<salience::SessionHandle>>>,
|
||||
chunks: Vec<super::context::WireChunk>,
|
||||
images: Vec<super::context::WireImage>,
|
||||
match_upto: u32,
|
||||
prompt_tokens: &[u32],
|
||||
images: &[super::context::WireImage],
|
||||
sampling: SamplingParams,
|
||||
priority: Option<i32>,
|
||||
readout_shape: Option<(u32, u32)>,
|
||||
) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
let client = self.clone();
|
||||
let client = self.client.clone();
|
||||
let api_key = self.api_key.clone();
|
||||
let model = self.model.clone();
|
||||
let prompt_tokens = prompt_tokens.to_vec();
|
||||
let images: Vec<(Vec<u8>, String)> = images.iter()
|
||||
.map(|i| (i.bytes.clone(), i.mime.clone()))
|
||||
.collect();
|
||||
let base_url = self.base_url.clone();
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let result = run_session_generate(
|
||||
session_lock, &client, chunks, images, match_upto, sampling,
|
||||
priority, readout_shape, &tx,
|
||||
let result = stream_completions(
|
||||
&client, &base_url, &api_key, &model,
|
||||
&prompt_tokens, &images, &tx, sampling, priority,
|
||||
).await;
|
||||
if let Err(e) = result {
|
||||
log::warn!(target: "grpc",
|
||||
"stream_session_mm error, forwarding to UI: {:#}", e);
|
||||
let _ = tx.send(StreamToken::Error(format!("{:#}", e)));
|
||||
let _ = tx.send(StreamToken::Error(e.to_string()));
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -184,13 +128,9 @@ impl ApiClient {
|
|||
/// readout is enabled on the server, `Ok(None)` on 404 (disabled),
|
||||
/// or an error on any other failure.
|
||||
///
|
||||
/// First call performs the HTTP fetch; subsequent calls (including
|
||||
/// across ApiClient clones sharing the same cell) return the
|
||||
/// cached result. The manifest doesn't change during a server run.
|
||||
pub fn model_str(&self) -> &str { &self.model }
|
||||
|
||||
/// Call once at startup and cache the result; the manifest doesn't
|
||||
/// change during a server run.
|
||||
pub async fn fetch_readout_manifest(&self) -> Result<Option<ReadoutManifest>> {
|
||||
let manifest = self.manifest.get_or_try_init(|| async {
|
||||
let url = format!("{}/readout/manifest", self.base_url);
|
||||
let auth = format!("Bearer {}", self.api_key);
|
||||
let response = self
|
||||
|
|
@ -200,7 +140,7 @@ impl ApiClient {
|
|||
.map_err(|e| anyhow::anyhow!("readout manifest fetch ({}): {}", url, e))?;
|
||||
let status = response.status();
|
||||
if status.as_u16() == 404 {
|
||||
return Ok::<_, anyhow::Error>(None);
|
||||
return Ok(None);
|
||||
}
|
||||
if !status.is_success() {
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
|
|
@ -208,219 +148,366 @@ impl ApiClient {
|
|||
anyhow::bail!("readout manifest HTTP {} ({}): {}", status, url, &body[..n]);
|
||||
}
|
||||
Ok(Some(response.json().await?))
|
||||
}).await?;
|
||||
Ok(manifest.clone())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Body of the gRPC-path streaming task. Walks the wire chunks
|
||||
/// against the session's `committed_len`, sends the delta via
|
||||
/// AppendImage / intermediate prefill-only Generates / final decode
|
||||
/// Generate, and translates the final Generate's Token events into
|
||||
/// StreamTokens on `tx`. On success the session handle is returned
|
||||
/// to `session_lock` with an updated `committed_len`; on error the
|
||||
/// handle is dropped so the next call reopens.
|
||||
async fn run_session_generate(
|
||||
session_lock: std::sync::Arc<crate::Mutex<Option<salience::SessionHandle>>>,
|
||||
client: &ApiClient,
|
||||
chunks: Vec<super::context::WireChunk>,
|
||||
images: Vec<super::context::WireImage>,
|
||||
match_upto: u32,
|
||||
async fn stream_completions(
|
||||
client: &HttpClient,
|
||||
base_url: &str,
|
||||
api_key: &str,
|
||||
model: &str,
|
||||
prompt_tokens: &[u32],
|
||||
images: &[(Vec<u8>, String)],
|
||||
tx: &mpsc::UnboundedSender<StreamToken>,
|
||||
sampling: SamplingParams,
|
||||
priority: Option<i32>,
|
||||
readout_shape: Option<(u32, u32)>,
|
||||
tx: &mpsc::UnboundedSender<StreamToken>,
|
||||
) -> Result<()> {
|
||||
use std::time::Instant;
|
||||
use futures::StreamExt;
|
||||
use super::context::WireChunk;
|
||||
use salience::pb;
|
||||
) -> anyhow::Result<()> {
|
||||
let mut request = serde_json::json!({
|
||||
"model": model,
|
||||
"prompt": prompt_tokens,
|
||||
"max_tokens": 16384,
|
||||
"temperature": sampling.temperature,
|
||||
"top_p": sampling.top_p,
|
||||
"top_k": sampling.top_k,
|
||||
"stream": true,
|
||||
"return_token_ids": true,
|
||||
"skip_special_tokens": false,
|
||||
"stop_token_ids": [super::tokenizer::IM_END],
|
||||
});
|
||||
if !images.is_empty() {
|
||||
use base64::Engine;
|
||||
let b64 = base64::engine::general_purpose::STANDARD;
|
||||
let uris: Vec<String> = images.iter()
|
||||
.map(|(bytes, mime)| format!("data:{};base64,{}", mime, b64.encode(bytes)))
|
||||
.collect();
|
||||
request["multi_modal_data"] = serde_json::json!({ "image": uris });
|
||||
}
|
||||
if let Some(p) = priority {
|
||||
request["priority"] = serde_json::json!(p);
|
||||
}
|
||||
|
||||
let mut handle: salience::SessionHandle = {
|
||||
let mut guard = session_lock.lock().await;
|
||||
match guard.take() {
|
||||
Some(h) => h,
|
||||
None => {
|
||||
drop(guard);
|
||||
log::debug!(target: "grpc", "run_session_generate: opening new session");
|
||||
salience::SessionHandle::open(client).await?
|
||||
let url = format!("{}/completions", base_url);
|
||||
let debug_label = format!("{} prompt tokens, model={}", prompt_tokens.len(), model);
|
||||
|
||||
let mut response = send_and_check(
|
||||
client, &url, &request,
|
||||
("Authorization", &format!("Bearer {}", api_key)),
|
||||
&[], &debug_label, None,
|
||||
).await?;
|
||||
|
||||
let mut reader = SseReader::new();
|
||||
let mut usage = None;
|
||||
|
||||
while let Some(event) = reader.next_event(&mut response).await? {
|
||||
if let Some(err_msg) = event["error"]["message"].as_str() {
|
||||
anyhow::bail!("API error in stream: {}", err_msg);
|
||||
}
|
||||
|
||||
if let Some(u) = event["usage"].as_object() {
|
||||
if let Ok(u) = serde_json::from_value::<Usage>(serde_json::Value::Object(u.clone())) {
|
||||
usage = Some(u);
|
||||
}
|
||||
}
|
||||
|
||||
let choices = match event["choices"].as_array() {
|
||||
Some(c) => c,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// If the client believes the match extends only up to `match_upto`
|
||||
// but the server has more, we need to rewind. For v1 the match is
|
||||
// either whole or broken — `match_upto` is always 0 on any mutation
|
||||
// — so the cheapest correct recovery is to drop the session and
|
||||
// open a fresh one.
|
||||
if match_upto < handle.committed_len {
|
||||
log::warn!(target: "grpc",
|
||||
"session rewind: match_upto={} < committed_len={} — reopening session (resending {} bytes)",
|
||||
match_upto, handle.committed_len, handle.committed_len - match_upto);
|
||||
drop(handle);
|
||||
handle = salience::SessionHandle::open(client).await?;
|
||||
}
|
||||
for choice in choices {
|
||||
// `readout`, if present, is a nested list
|
||||
// `[num_tokens][n_layers][n_concepts]`. Parse it once per
|
||||
// chunk and pair rows with token ids by index — the rows
|
||||
// are in the same order as `token_ids`.
|
||||
let readouts: Option<Vec<TokenReadout>> = choice["readout"]
|
||||
.as_array()
|
||||
.map(|outer| {
|
||||
outer.iter().filter_map(|per_token| {
|
||||
per_token.as_array().map(|layers| {
|
||||
layers.iter().filter_map(|per_layer| {
|
||||
per_layer.as_array().map(|vals| {
|
||||
vals.iter()
|
||||
.filter_map(|v| v.as_f64().map(|f| f as f32))
|
||||
.collect::<Vec<f32>>()
|
||||
})
|
||||
}).collect::<Vec<Vec<f32>>>()
|
||||
})
|
||||
}).collect()
|
||||
});
|
||||
|
||||
// Walk chunks at byte-level, taking everything past `match_upto`
|
||||
// as the delta. Token chunks can be split mid-way; images live
|
||||
// inline in the token stream, so there's no separate image-chunk
|
||||
// case anymore.
|
||||
let mut acc: u32 = 0;
|
||||
let mut pending: Vec<u32> = Vec::new();
|
||||
for chunk in chunks.iter() {
|
||||
match chunk {
|
||||
WireChunk::Tokens(t) => {
|
||||
let len = t.len() as u32;
|
||||
let chunk_end = acc + len;
|
||||
if chunk_end <= match_upto {
|
||||
acc = chunk_end;
|
||||
} else if acc < match_upto {
|
||||
let skip = (match_upto - acc) as usize;
|
||||
pending.extend_from_slice(&t[skip..]);
|
||||
acc = chunk_end;
|
||||
} else {
|
||||
pending.extend_from_slice(t);
|
||||
acc = chunk_end;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Filter images to those entirely past `match_upto` — anything
|
||||
// before is on the server already (prior turn), anything
|
||||
// straddling is a hard divergence (image partially-sent shouldn't
|
||||
// happen with our atomic AppendImage history; with images-inline
|
||||
// it can only happen if mark_dirty cleared match_upto mid-block,
|
||||
// which the AST mutators prevent).
|
||||
let mut new_images: Vec<pb::ImageAttachment> = Vec::new();
|
||||
for img in &images {
|
||||
if img.pad_end <= match_upto {
|
||||
continue; // already sent on a prior turn
|
||||
}
|
||||
if img.pad_start < match_upto {
|
||||
anyhow::bail!(
|
||||
"session divergence: image at [{},{}) straddles match_upto={}",
|
||||
img.pad_start, img.pad_end, match_upto,
|
||||
);
|
||||
}
|
||||
new_images.push(pb::ImageAttachment {
|
||||
bytes: img.bytes.clone(),
|
||||
mime: img.mime.clone(),
|
||||
pad_range_start: img.pad_start,
|
||||
pad_range_end: img.pad_end,
|
||||
if let Some(ids) = choice["token_ids"].as_array() {
|
||||
for (i, id_val) in ids.iter().enumerate() {
|
||||
if let Some(id) = id_val.as_u64() {
|
||||
let readout = readouts
|
||||
.as_ref()
|
||||
.and_then(|r| r.get(i).cloned());
|
||||
let _ = tx.send(StreamToken::Token {
|
||||
id: id as u32,
|
||||
readout,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if let Some(text) = choice["text"].as_str() {
|
||||
// Fallback: provider didn't return token_ids, encode locally.
|
||||
// No readout available in this path — the encoder may
|
||||
// produce a different token count than the server did.
|
||||
if !text.is_empty() {
|
||||
for id in super::tokenizer::encode(text) {
|
||||
let _ = tx.send(StreamToken::Token { id, readout: None });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Final Generate: pending holds any trailing text; decode up to
|
||||
// sampling.max_tokens. Request readouts on all decode positions
|
||||
// via a catch-all range ending at u32::MAX — decode never
|
||||
// reaches it.
|
||||
let prompt_len_after_append = handle.committed_len + pending.len() as u32;
|
||||
let readout_ranges = if readout_shape.is_some() {
|
||||
vec![pb::PositionRange {
|
||||
start: prompt_len_after_append,
|
||||
end: u32::MAX,
|
||||
}]
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
let req = pb::GenerateRequest {
|
||||
session_id: handle.session_id.clone(),
|
||||
append_tokens: pending,
|
||||
offset: handle.committed_len,
|
||||
truncating: false,
|
||||
max_tokens: sampling.max_tokens,
|
||||
logprobs_ranges: Vec::new(),
|
||||
logprob_top_k: 0,
|
||||
readout_ranges,
|
||||
temperature: sampling.temperature,
|
||||
top_p: sampling.top_p,
|
||||
top_k: sampling.top_k,
|
||||
stop_token_ids: Vec::new(),
|
||||
priority: priority.unwrap_or(0),
|
||||
images: new_images,
|
||||
};
|
||||
let session_id_for_log = handle.session_id.clone();
|
||||
let t_generate = Instant::now();
|
||||
log::debug!(target: "grpc",
|
||||
"session {} Generate: offset={} append={} max_tokens={} priority={}",
|
||||
session_id_for_log, req.offset, req.append_tokens.len(),
|
||||
req.max_tokens, req.priority);
|
||||
|
||||
let mut stream = handle.generate(req).await?;
|
||||
let (n_layers, n_concepts) = readout_shape.unwrap_or((0, 0));
|
||||
let mut session_terminated = false;
|
||||
let mut first_token_at: Option<Instant> = None;
|
||||
|
||||
while let Some(event) = stream.next().await {
|
||||
let event = match event {
|
||||
Ok(e) => e,
|
||||
Err(status) => {
|
||||
log::warn!(target: "grpc",
|
||||
"session {} Generate stream error: {} — dropping session",
|
||||
session_id_for_log, status);
|
||||
session_terminated = true;
|
||||
let _ = tx.send(StreamToken::Error(format!(
|
||||
"Generate stream error: {}", status,
|
||||
)));
|
||||
break;
|
||||
}
|
||||
};
|
||||
let Some(inner) = event.event else { continue };
|
||||
match inner {
|
||||
pb::generate_event::Event::Token(t) => {
|
||||
if t.is_prefill { continue; }
|
||||
if first_token_at.is_none() {
|
||||
log::debug!(target: "grpc",
|
||||
"session {} first decode token at {:?}",
|
||||
session_id_for_log, t_generate.elapsed());
|
||||
first_token_at = Some(Instant::now());
|
||||
}
|
||||
let readout = if t.readout.is_empty() {
|
||||
None
|
||||
} else if n_layers == 0 || n_concepts == 0 {
|
||||
None
|
||||
} else {
|
||||
let expected = (n_layers as usize) * (n_concepts as usize);
|
||||
if t.readout.len() != expected {
|
||||
log::warn!(target: "grpc",
|
||||
"readout shape mismatch: expected {}*{}={}, got {}",
|
||||
n_layers, n_concepts, expected, t.readout.len());
|
||||
None
|
||||
} else {
|
||||
let n = n_concepts as usize;
|
||||
let mut layers: Vec<Vec<f32>> = Vec::with_capacity(n_layers as usize);
|
||||
for l in 0..(n_layers as usize) {
|
||||
layers.push(t.readout[l * n..(l + 1) * n].to_vec());
|
||||
}
|
||||
Some(layers)
|
||||
}
|
||||
};
|
||||
if tx.send(StreamToken::Token { id: t.id, readout }).is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
pb::generate_event::Event::Done(d) => {
|
||||
log::debug!(target: "grpc",
|
||||
"session {} Done: prompt={} completion={} total={} reason={:?} elapsed={:?}",
|
||||
session_id_for_log, d.prompt_tokens, d.completion_tokens,
|
||||
d.total_tokens, d.finish_reason, t_generate.elapsed());
|
||||
handle.committed_len = d.total_tokens;
|
||||
let usage = Some(Usage {
|
||||
prompt_tokens: d.prompt_tokens,
|
||||
completion_tokens: d.completion_tokens,
|
||||
total_tokens: d.total_tokens,
|
||||
});
|
||||
let _ = tx.send(StreamToken::Done { usage });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !session_terminated {
|
||||
let mut guard = session_lock.lock().await;
|
||||
*guard = Some(handle);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Send an HTTP request and check for errors.
|
||||
pub(crate) async fn send_and_check(
|
||||
client: &HttpClient,
|
||||
url: &str,
|
||||
body: &impl serde::Serialize,
|
||||
auth_header: (&str, &str),
|
||||
extra_headers: &[(&str, &str)],
|
||||
debug_label: &str,
|
||||
request_json: Option<&str>,
|
||||
) -> Result<HttpResponse> {
|
||||
let debug = std::env::var("POC_DEBUG").is_ok();
|
||||
let start = Instant::now();
|
||||
|
||||
if debug {
|
||||
let payload_size = serde_json::to_string(body)
|
||||
.map(|s| s.len())
|
||||
.unwrap_or(0);
|
||||
dbglog!(
|
||||
"request: {}K payload, {}",
|
||||
payload_size / 1024, debug_label,
|
||||
);
|
||||
}
|
||||
|
||||
let mut headers: Vec<(&str, &str)> = Vec::with_capacity(extra_headers.len() + 1);
|
||||
headers.push(auth_header);
|
||||
headers.extend_from_slice(extra_headers);
|
||||
|
||||
let response = client
|
||||
.send_json("POST", url, &headers, body)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
let msg = e.to_string();
|
||||
let cause = if msg.contains("connect timeout") || msg.contains("TCP connect") {
|
||||
"connection refused"
|
||||
} else if msg.contains("request timeout") {
|
||||
"request timed out"
|
||||
} else {
|
||||
"request error"
|
||||
};
|
||||
anyhow::anyhow!("{} ({}): {}", cause, url, msg)
|
||||
})?;
|
||||
|
||||
let status = response.status();
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
if debug {
|
||||
for name in [
|
||||
"x-ratelimit-remaining",
|
||||
"x-ratelimit-limit",
|
||||
"x-request-id",
|
||||
] {
|
||||
if let Some(val) = response.header(name) {
|
||||
dbglog!("header {}: {}", name, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !status.is_success() {
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
dbglog!(
|
||||
"HTTP {} after {:.1}s ({}): {}",
|
||||
status,
|
||||
elapsed.as_secs_f64(),
|
||||
url,
|
||||
&body[..body.floor_char_boundary(body.len().min(500))]
|
||||
);
|
||||
if let Some(json) = request_json {
|
||||
let log_dir = dirs::home_dir()
|
||||
.unwrap_or_default()
|
||||
.join(".consciousness/logs/failed-requests");
|
||||
let _ = std::fs::create_dir_all(&log_dir);
|
||||
let ts = chrono::Local::now().format("%Y%m%dT%H%M%S");
|
||||
let path = log_dir.join(format!("{}.json", ts));
|
||||
if std::fs::write(&path, json).is_ok() {
|
||||
dbglog!(
|
||||
"saved failed request to {} (HTTP {})", path.display(), status
|
||||
);
|
||||
}
|
||||
}
|
||||
anyhow::bail!("HTTP {} ({}): {}", status, url, &body[..body.floor_char_boundary(body.len().min(1000))]);
|
||||
}
|
||||
|
||||
if debug {
|
||||
dbglog!(
|
||||
"connected in {:.1}s (HTTP {})",
|
||||
elapsed.as_secs_f64(),
|
||||
status.as_u16()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// SSE stream reader. Handles the generic SSE plumbing shared by both
|
||||
/// backends: chunk reading with timeout, line buffering, `data:` prefix
|
||||
/// stripping, `[DONE]` detection, JSON parsing, and parse error diagnostics.
|
||||
/// Yields parsed events as serde_json::Value — each backend handles its
|
||||
/// own event types.
|
||||
pub(crate) struct SseReader {
|
||||
line_buf: String,
|
||||
chunk_timeout: Duration,
|
||||
pub stream_start: Instant,
|
||||
pub chunks_received: u64,
|
||||
pub sse_lines_parsed: u64,
|
||||
pub sse_parse_errors: u64,
|
||||
debug: bool,
|
||||
done: bool,
|
||||
/// Serialized request payload — saved to disk on errors for replay debugging.
|
||||
pub(crate) request_json: Option<String>,
|
||||
}
|
||||
|
||||
impl SseReader {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
line_buf: String::new(),
|
||||
chunk_timeout: Duration::from_secs(crate::config::get().api_stream_timeout_secs),
|
||||
stream_start: Instant::now(),
|
||||
chunks_received: 0,
|
||||
sse_lines_parsed: 0,
|
||||
sse_parse_errors: 0,
|
||||
debug: std::env::var("POC_DEBUG").is_ok(),
|
||||
done: false,
|
||||
request_json: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Attach the serialized request payload for error diagnostics.
|
||||
/// Save the request payload to disk for replay debugging.
|
||||
fn save_failed_request(&self, reason: &str) {
|
||||
let Some(ref json) = self.request_json else { return };
|
||||
let log_dir = dirs::home_dir()
|
||||
.unwrap_or_default()
|
||||
.join(".consciousness/logs/failed-requests");
|
||||
let _ = std::fs::create_dir_all(&log_dir);
|
||||
let ts = chrono::Local::now().format("%Y%m%dT%H%M%S");
|
||||
let path = log_dir.join(format!("{}.json", ts));
|
||||
if std::fs::write(&path, json).is_ok() {
|
||||
dbglog!(
|
||||
"saved failed request to {} ({})", path.display(), reason
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Read the next SSE event from the response stream.
|
||||
/// Returns Ok(Some(value)) for each parsed data line,
|
||||
/// Ok(None) when the stream ends or [DONE] is received.
|
||||
pub(crate) async fn next_event(
|
||||
&mut self,
|
||||
response: &mut HttpResponse,
|
||||
) -> Result<Option<serde_json::Value>> {
|
||||
loop {
|
||||
// Drain complete lines from the buffer before reading more chunks
|
||||
while let Some(newline_pos) = self.line_buf.find('\n') {
|
||||
let line = self.line_buf[..newline_pos].trim().to_string();
|
||||
self.line_buf = self.line_buf[newline_pos + 1..].to_string();
|
||||
|
||||
if line == "data: [DONE]" {
|
||||
self.done = true;
|
||||
return Ok(None);
|
||||
}
|
||||
if line.is_empty()
|
||||
|| line.starts_with("event: ")
|
||||
|| !line.starts_with("data: ")
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
let json_str = &line[6..];
|
||||
self.sse_lines_parsed += 1;
|
||||
|
||||
match serde_json::from_str(json_str) {
|
||||
Ok(v) => return Ok(Some(v)),
|
||||
Err(e) => {
|
||||
self.sse_parse_errors += 1;
|
||||
if self.sse_parse_errors == 1 || self.debug {
|
||||
let preview = if json_str.len() > 200 {
|
||||
format!("{}...", &json_str[..200])
|
||||
} else {
|
||||
json_str.to_string()
|
||||
};
|
||||
dbglog!(
|
||||
"SSE parse error (#{}) {}: {}",
|
||||
self.sse_parse_errors, e, preview
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.done {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Read more data from the response stream
|
||||
match tokio::time::timeout(self.chunk_timeout, response.chunk()).await {
|
||||
Ok(Ok(Some(chunk))) => {
|
||||
self.chunks_received += 1;
|
||||
self.line_buf.push_str(&String::from_utf8_lossy(&chunk));
|
||||
}
|
||||
Ok(Ok(None)) => return Ok(None),
|
||||
Ok(Err(e)) => {
|
||||
let buf_preview = if self.line_buf.is_empty() {
|
||||
"(empty)".to_string()
|
||||
} else {
|
||||
let n = self.line_buf.len().min(500);
|
||||
format!("{}B: {}", self.line_buf.len(), &self.line_buf[..n])
|
||||
};
|
||||
let msg = format!(
|
||||
"stream error after {} chunks, {:.1}s, {} sse lines: {} | buf: {}",
|
||||
self.chunks_received,
|
||||
self.stream_start.elapsed().as_secs_f64(),
|
||||
self.sse_lines_parsed,
|
||||
e, buf_preview,
|
||||
);
|
||||
dbglog!("{}", msg);
|
||||
self.save_failed_request(&msg);
|
||||
return Err(e.into());
|
||||
}
|
||||
Err(_) => {
|
||||
let buf_preview = if self.line_buf.is_empty() {
|
||||
"(empty)".to_string()
|
||||
} else {
|
||||
let n = self.line_buf.len().min(500);
|
||||
format!("{}B: {}", self.line_buf.len(), &self.line_buf[..n])
|
||||
};
|
||||
let msg = format!(
|
||||
"stream timeout: {}s, {} chunks, {} sse lines, {:.1}s elapsed | buf: {}",
|
||||
self.chunk_timeout.as_secs(),
|
||||
self.chunks_received,
|
||||
self.sse_lines_parsed,
|
||||
self.stream_start.elapsed().as_secs_f64(),
|
||||
buf_preview,
|
||||
);
|
||||
dbglog!("{}", msg);
|
||||
self.save_failed_request(&msg);
|
||||
anyhow::bail!(
|
||||
"stream timeout: no data for {}s ({} chunks received)",
|
||||
self.chunk_timeout.as_secs(),
|
||||
self.chunks_received
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,279 +0,0 @@
|
|||
// agent/api/salience.rs — gRPC client bindings for salience.v1.
|
||||
//
|
||||
// Thin wrapper around the tonic-generated types. Every RPC except
|
||||
// Generate is unary; Generate is server-streaming. Free functions
|
||||
// (open/close session) wrap the lifecycle RPCs; `SessionHandle` just
|
||||
// carries the id + connection params so later RPCs can reuse them.
|
||||
//
|
||||
// The old bidi Session() API is gone — see git history for its shape.
|
||||
|
||||
#![allow(clippy::enum_variant_names)]
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint};
|
||||
|
||||
/// Generated prost + tonic types for salience.v1. Call sites use
|
||||
/// `pb::OpenSessionRequest`, `pb::Token`, etc.
|
||||
pub mod pb {
|
||||
tonic::include_proto!("salience.v1");
|
||||
}
|
||||
|
||||
pub type SalienceClient = pb::salience_client::SalienceClient<Channel>;
|
||||
|
||||
/// Open a TLS-aware gRPC channel to the salience server. `base_url`
|
||||
/// looks like `https://host:8443`. User-provided CA certs under
|
||||
/// `~/.consciousness/certs/` are trusted in addition to the system
|
||||
/// roots (for self-signed server certs).
|
||||
///
|
||||
/// Returns the raw `Channel` so callers (`ApiClient::salience_client`)
|
||||
/// can cache it and clone a `SalienceClient` per request without
|
||||
/// reopening the TCP/TLS connection. tonic multiplexes RPCs over the
|
||||
/// shared channel automatically.
|
||||
pub async fn connect_channel(base_url: &str) -> Result<Channel> {
|
||||
let mut endpoint = Endpoint::from_shared(base_url.to_string())
|
||||
.with_context(|| format!("invalid salience endpoint: {}", base_url))?
|
||||
.connect_timeout(std::time::Duration::from_secs(30))
|
||||
.timeout(std::time::Duration::from_secs(600));
|
||||
|
||||
if base_url.starts_with("https://") {
|
||||
let user_certs = super::http::load_user_certs_pem_bytes();
|
||||
let mut tls = ClientTlsConfig::new().with_native_roots();
|
||||
if !user_certs.is_empty() {
|
||||
tls = tls.ca_certificate(Certificate::from_pem(user_certs));
|
||||
}
|
||||
endpoint = endpoint
|
||||
.tls_config(tls)
|
||||
.with_context(|| "configuring tonic TLS")?;
|
||||
}
|
||||
|
||||
endpoint
|
||||
.connect()
|
||||
.await
|
||||
.with_context(|| format!("failed to connect to salience server at {}", base_url))
|
||||
}
|
||||
|
||||
/// Derive the gRPC base URL from the HTTP completions base URL.
|
||||
///
|
||||
/// vLLM's salience gRPC server listens on a different port (8443) from
|
||||
/// the HTTP endpoint (8000) and accepts no path component. Given an
|
||||
/// HTTP base like `https://host:8000/v1`, produce `https://host:8443`.
|
||||
/// No-op when the path is empty and the port isn't 8000.
|
||||
pub fn derive_grpc_url(http_base: &str) -> String {
|
||||
let mut url = http_base.trim_end_matches('/').to_string();
|
||||
if let Some(proto_end) = url.find("://") {
|
||||
let rest_start = proto_end + 3;
|
||||
if let Some(path_slash) = url[rest_start..].find('/') {
|
||||
url.truncate(rest_start + path_slash);
|
||||
}
|
||||
}
|
||||
url.replace(":8000", ":8443")
|
||||
}
|
||||
|
||||
/// Attach a bearer token to a tonic request as gRPC metadata.
|
||||
pub fn with_auth<T>(req: &mut tonic::Request<T>, api_key: &str) {
|
||||
if api_key.is_empty() {
|
||||
return;
|
||||
}
|
||||
let bearer = format!("Bearer {}", api_key);
|
||||
if let Ok(val) = bearer.parse() {
|
||||
req.metadata_mut().insert("authorization", val);
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle to a server-side session. Carries the id + an `ApiClient`
|
||||
/// clone (which holds the shared tonic Channel) so subsequent
|
||||
/// per-session RPCs go over the process-global connection.
|
||||
/// `committed_len` tracks the server's current session.tokens length
|
||||
/// so the client can submit deltas with the right `offset`.
|
||||
pub struct SessionHandle {
|
||||
pub session_id: String,
|
||||
pub max_model_len: u32,
|
||||
pub committed_len: u32,
|
||||
client: super::ApiClient,
|
||||
}
|
||||
|
||||
impl SessionHandle {
|
||||
pub async fn open(client: &super::ApiClient) -> Result<Self> {
|
||||
let t0 = std::time::Instant::now();
|
||||
log::debug!(target: "grpc", "OpenSession rpc: start");
|
||||
let mut c = client.salience_client().await?;
|
||||
let mut req = tonic::Request::new(pb::OpenSessionRequest {
|
||||
model: client.model.clone(),
|
||||
});
|
||||
with_auth(&mut req, client.api_key());
|
||||
let resp = c
|
||||
.open_session(req)
|
||||
.await
|
||||
.with_context(|| "OpenSession RPC failed")?
|
||||
.into_inner();
|
||||
log::debug!(target: "grpc",
|
||||
"OpenSession rpc: done session_id={} max_model_len={} elapsed={:?}",
|
||||
resp.session_id, resp.max_model_len, t0.elapsed());
|
||||
Ok(Self {
|
||||
session_id: resp.session_id,
|
||||
max_model_len: resp.max_model_len,
|
||||
committed_len: 0,
|
||||
client: client.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn client(&self) -> &super::ApiClient { &self.client }
|
||||
|
||||
/// Debug-only: fetch the server's full session.tokens. Used to
|
||||
/// verify client-side accounting byte-for-byte when divergence
|
||||
/// is suspected. Not cheap on large sessions.
|
||||
pub async fn dump_tokens(&self) -> Result<Vec<u32>> {
|
||||
let mut c = self.client.salience_client().await?;
|
||||
let mut req = tonic::Request::new(pb::DumpSessionRequest {
|
||||
session_id: self.session_id.clone(),
|
||||
});
|
||||
with_auth(&mut req, self.client.api_key());
|
||||
let resp = c
|
||||
.dump_session(req)
|
||||
.await
|
||||
.with_context(|| "DumpSession RPC failed")?
|
||||
.into_inner();
|
||||
Ok(resp.tokens)
|
||||
}
|
||||
|
||||
/// Open a gRPC Generate stream with the given request. Caller
|
||||
/// iterates the returned stream of GenerateEvents; the handle's
|
||||
/// `committed_len` should be advanced by the caller on Done based
|
||||
/// on the Done event's `total_tokens` field.
|
||||
pub async fn generate(
|
||||
&self,
|
||||
req: pb::GenerateRequest,
|
||||
) -> Result<tonic::Streaming<pb::GenerateEvent>> {
|
||||
let t0 = std::time::Instant::now();
|
||||
log::debug!(target: "grpc",
|
||||
"Generate rpc: open-stream session={} offset={} append={} max_tokens={}",
|
||||
self.session_id, req.offset, req.append_tokens.len(), req.max_tokens);
|
||||
let mut c = self.client.salience_client().await?;
|
||||
let mut req = tonic::Request::new(req);
|
||||
with_auth(&mut req, self.client.api_key());
|
||||
let resp = c
|
||||
.generate(req)
|
||||
.await
|
||||
.with_context(|| "Generate RPC failed")?;
|
||||
log::debug!(target: "grpc",
|
||||
"Generate rpc: stream opened session={} open-latency={:?}",
|
||||
self.session_id, t0.elapsed());
|
||||
Ok(resp.into_inner())
|
||||
}
|
||||
|
||||
/// Run a prefill-only Generate (max_tokens=0) that appends the
|
||||
/// given tokens to the session. No decode, no Token events — the
|
||||
/// server just extends session.tokens and runs prefill to warm
|
||||
/// the KV cache. Used to interleave text runs between AppendImage
|
||||
/// calls, and by score paths that want prompt_logprobs without a
|
||||
/// decode step.
|
||||
pub async fn prefill_only(&mut self, tokens: Vec<u32>) -> Result<()> {
|
||||
use futures::StreamExt;
|
||||
let req = pb::GenerateRequest {
|
||||
session_id: self.session_id.clone(),
|
||||
append_tokens: tokens,
|
||||
offset: self.committed_len,
|
||||
truncating: false,
|
||||
max_tokens: 0,
|
||||
logprobs_ranges: Vec::new(),
|
||||
logprob_top_k: 0,
|
||||
readout_ranges: Vec::new(),
|
||||
temperature: 0.0,
|
||||
top_p: 0.0,
|
||||
top_k: 0,
|
||||
stop_token_ids: Vec::new(),
|
||||
priority: 0,
|
||||
images: Vec::new(),
|
||||
};
|
||||
let mut stream = self.generate(req).await?;
|
||||
while let Some(event) = stream.next().await {
|
||||
let event = event.map_err(|s| anyhow::anyhow!("prefill Generate stream: {}", s))?;
|
||||
if let Some(pb::generate_event::Event::Done(d)) = event.event {
|
||||
self.committed_len = d.total_tokens;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Drop → fire CloseSession in a detached task so servers don't leak
|
||||
/// sessions until TTL eviction. Best-effort: if no tokio runtime is
|
||||
/// available we skip; the server's 30min TTL will reap it eventually.
|
||||
impl Drop for SessionHandle {
|
||||
fn drop(&mut self) {
|
||||
if self.session_id.is_empty() {
|
||||
return;
|
||||
}
|
||||
let session_id = std::mem::take(&mut self.session_id);
|
||||
let client = self.client.clone();
|
||||
let Ok(rt) = tokio::runtime::Handle::try_current() else {
|
||||
log::debug!(target: "grpc",
|
||||
"SessionHandle drop outside tokio runtime, session {} leaks to TTL",
|
||||
session_id);
|
||||
return;
|
||||
};
|
||||
rt.spawn(async move {
|
||||
let Ok(mut c) = client.salience_client().await else { return };
|
||||
let mut req = tonic::Request::new(pb::CloseSessionRequest {
|
||||
session_id: session_id.clone(),
|
||||
});
|
||||
with_auth(&mut req, client.api_key());
|
||||
if let Err(e) = c.close_session(req).await {
|
||||
log::debug!(target: "grpc",
|
||||
"CloseSession on drop failed for {}: {:#}",
|
||||
session_id, e);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn generated_types_compile() {
|
||||
// Exercise the shape of the new proto types — if build.rs
|
||||
// stops regenerating against the proto, this stops compiling.
|
||||
let _open = pb::OpenSessionRequest {
|
||||
model: "qwen3-vl".into(),
|
||||
};
|
||||
let _tok = pb::Token {
|
||||
id: 42,
|
||||
position: 0,
|
||||
is_prefill: false,
|
||||
readout: vec![0.1, 0.2, 0.3],
|
||||
logprobs: vec![pb::TokenLogprob {
|
||||
id: 1,
|
||||
logprob: -0.5,
|
||||
}],
|
||||
sampled_logprob: -0.1,
|
||||
has_sampled_logprob: true,
|
||||
};
|
||||
let _done = pb::GenerateDone {
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 20,
|
||||
total_tokens: 30,
|
||||
finish_reason: pb::generate_done::FinishReason::Eos as i32,
|
||||
};
|
||||
let _evt = pb::GenerateEvent {
|
||||
event: Some(pb::generate_event::Event::Done(_done)),
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derive_grpc_url_cases() {
|
||||
assert_eq!(
|
||||
derive_grpc_url("https://host:8000/v1"),
|
||||
"https://host:8443",
|
||||
);
|
||||
assert_eq!(
|
||||
derive_grpc_url("https://host:8000/"),
|
||||
"https://host:8443",
|
||||
);
|
||||
assert_eq!(
|
||||
derive_grpc_url("https://host:9000/v1"),
|
||||
"https://host:9000",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -125,19 +125,7 @@ impl<'de> Deserialize<'de> for NodeLeaf {
|
|||
body: NodeBody,
|
||||
timestamp: DateTime<Utc>,
|
||||
}
|
||||
let mut raw = Raw::deserialize(deserializer)?;
|
||||
// Heal pre-refactor logs: Image leaves used to be deserialized
|
||||
// with token_count=0 (server-authoritative count was applied
|
||||
// after AppendImage). With pads now expanded client-side at
|
||||
// construction, recompute from the persisted dimensions if
|
||||
// the stored count is 0.
|
||||
if let NodeBody::Image { orig_height, orig_width, token_count, .. }
|
||||
= &mut raw.body
|
||||
{
|
||||
if *token_count == 0 {
|
||||
*token_count = qwen3_image_token_count(*orig_height, *orig_width);
|
||||
}
|
||||
}
|
||||
let raw = Raw::deserialize(deserializer)?;
|
||||
let token_ids = raw.body.compute_token_ids();
|
||||
Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp })
|
||||
}
|
||||
|
|
@ -155,44 +143,18 @@ pub enum AstNode {
|
|||
/// Maps memory key → divergence score for this response.
|
||||
#[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
|
||||
memory_scores: std::collections::BTreeMap<String, f64>,
|
||||
/// Cached token stream for the subtree. When `Some`, wire-out
|
||||
/// uses these bytes verbatim and skips recursion into children.
|
||||
/// Populated by the response parser from the server's exact
|
||||
/// stream; also computable from children as a fallback. Cleared
|
||||
/// on any edit to a descendant. Not serialized — transient.
|
||||
#[serde(skip, default)]
|
||||
token_ids: Option<Vec<u32>>,
|
||||
},
|
||||
}
|
||||
|
||||
/// The context window: four sections as Vec<AstNode>.
|
||||
///
|
||||
/// All mutation MUST go through `ContextState`'s public methods. Two
|
||||
/// invariants ride on this:
|
||||
/// 1. Every `Leaf.token_ids` matches its `body.compute_token_ids()`.
|
||||
/// 2. For every `Branch { token_ids: Some(cached), .. }`, the cached
|
||||
/// token stream matches what `wire_into` would produce by walking
|
||||
/// `children` from scratch. Any mutation that touches a Branch's
|
||||
/// children — directly or via a descendant — must clear the
|
||||
/// Branch's `token_ids` so it gets recomputed on next wire-out.
|
||||
///
|
||||
/// The `&mut Vec<AstNode>` escape hatches are intentionally NOT
|
||||
/// exposed; if you find yourself wanting one, add a focused method
|
||||
/// here that maintains the invariants.
|
||||
/// All mutation goes through ContextState methods to maintain the invariant
|
||||
/// that token_ids on every leaf matches its rendered text.
|
||||
pub struct ContextState {
|
||||
system: Vec<AstNode>,
|
||||
identity: Vec<AstNode>,
|
||||
journal: Vec<AstNode>,
|
||||
conversation: Vec<AstNode>,
|
||||
pub conversation_log: Option<crate::mind::log::ConversationLog>,
|
||||
/// Length of the session's token stream on the server, as of the
|
||||
/// last Done event. Updated by the grpc layer.
|
||||
server_committed_len: u32,
|
||||
/// Prefix length of our walk that still matches the server's
|
||||
/// session.tokens byte-for-byte. When < `server_committed_len`
|
||||
/// the session needs rewinding (truncating=true at this offset).
|
||||
/// Reset to 0 on any mutation that could have changed sent bytes.
|
||||
client_match_upto: u32,
|
||||
}
|
||||
|
||||
impl Clone for ContextState {
|
||||
|
|
@ -203,8 +165,6 @@ impl Clone for ContextState {
|
|||
journal: self.journal.clone(),
|
||||
conversation: self.conversation.clone(),
|
||||
conversation_log: None, // forked contexts don't log
|
||||
server_committed_len: self.server_committed_len,
|
||||
client_match_upto: self.client_match_upto,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -241,10 +201,6 @@ pub struct ResponseParser {
|
|||
think_buf: String,
|
||||
in_tool_call: bool,
|
||||
tool_call_buf: String,
|
||||
/// Raw generated token IDs, in arrival order. Combined with the
|
||||
/// prologue at `finish` to stamp the Branch's authoritative
|
||||
/// token cache — the bytes the server has for this branch.
|
||||
generated_tokens: Vec<u32>,
|
||||
}
|
||||
|
||||
impl Role {
|
||||
|
|
@ -356,16 +312,6 @@ impl NodeLeaf {
|
|||
pub fn token_ids(&self) -> &[u32] { &self.token_ids }
|
||||
pub fn tokens(&self) -> usize { self.token_ids.len() }
|
||||
pub fn timestamp(&self) -> DateTime<Utc> { self.timestamp }
|
||||
|
||||
/// If this is an Image leaf, update its IMAGE_PAD count to `n` and
|
||||
/// recompute cached `token_ids`. No-op on non-Image leaves —
|
||||
/// callers know the body shape via `body()`.
|
||||
pub fn set_image_token_count(&mut self, n: u32) {
|
||||
if let NodeBody::Image { token_count, .. } = &mut self.body {
|
||||
*token_count = n;
|
||||
self.token_ids = self.body.compute_token_ids();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AstNode {
|
||||
|
|
@ -414,9 +360,6 @@ impl AstNode {
|
|||
orig_height: u32,
|
||||
orig_width: u32,
|
||||
) -> Self {
|
||||
// Pad count is computed eagerly from dimensions — no more
|
||||
// "unknown until server responds" shape. Server validates
|
||||
// on the Generate call; mismatches fail loud.
|
||||
let token_count = qwen3_image_token_count(orig_height, orig_width);
|
||||
Self::Leaf(NodeLeaf::new(NodeBody::Image {
|
||||
bytes,
|
||||
|
|
@ -430,13 +373,7 @@ impl AstNode {
|
|||
// -- Branch constructors --------------------------------------------------
|
||||
|
||||
pub fn branch(role: Role, children: Vec<AstNode>) -> Self {
|
||||
Self::Branch {
|
||||
role,
|
||||
children,
|
||||
timestamp: Utc::now(),
|
||||
memory_scores: Default::default(),
|
||||
token_ids: None,
|
||||
}
|
||||
Self::Branch { role, children, timestamp: Utc::now(), memory_scores: Default::default() }
|
||||
}
|
||||
|
||||
pub fn system_msg(text: impl Into<String>) -> Self {
|
||||
|
|
@ -445,7 +382,6 @@ impl AstNode {
|
|||
children: vec![Self::content(text)],
|
||||
timestamp: Utc::now(),
|
||||
memory_scores: Default::default(),
|
||||
token_ids: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -455,7 +391,6 @@ impl AstNode {
|
|||
children: vec![Self::content(text)],
|
||||
timestamp: Utc::now(),
|
||||
memory_scores: Default::default(),
|
||||
token_ids: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -467,12 +402,11 @@ impl AstNode {
|
|||
let token_ids = leaf.body.compute_token_ids();
|
||||
Self::Leaf(NodeLeaf { token_ids, ..leaf })
|
||||
}
|
||||
Self::Branch { role, children, timestamp, memory_scores, .. } => Self::Branch {
|
||||
Self::Branch { role, children, timestamp, memory_scores } => Self::Branch {
|
||||
role,
|
||||
children: children.into_iter().map(|c| c.retokenize()).collect(),
|
||||
timestamp,
|
||||
memory_scores,
|
||||
token_ids: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
@ -549,10 +483,7 @@ impl AstNode {
|
|||
fn token_ids_into(&self, out: &mut Vec<u32>) {
|
||||
match self {
|
||||
Self::Leaf(leaf) => out.extend_from_slice(&leaf.token_ids),
|
||||
Self::Branch { token_ids: Some(cached), .. } => {
|
||||
out.extend_from_slice(cached);
|
||||
}
|
||||
Self::Branch { role, children, token_ids: None, .. } => {
|
||||
Self::Branch { role, children, .. } => {
|
||||
out.push(tokenizer::IM_START);
|
||||
out.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
|
||||
for child in children {
|
||||
|
|
@ -581,8 +512,7 @@ impl Ast for AstNode {
|
|||
fn tokens(&self) -> usize {
|
||||
match self {
|
||||
Self::Leaf(leaf) => leaf.tokens(),
|
||||
Self::Branch { token_ids: Some(cached), .. } => cached.len(),
|
||||
Self::Branch { role, children, token_ids: None, .. } => {
|
||||
Self::Branch { role, children, .. } => {
|
||||
1 + role_header_tokens(*role)
|
||||
+ children.iter().map(|c| c.tokens()).sum::<usize>()
|
||||
+ 1 + newline_tokens()
|
||||
|
|
@ -736,7 +666,6 @@ impl ResponseParser {
|
|||
think_buf: String::new(),
|
||||
in_tool_call: false,
|
||||
tool_call_buf: String::new(),
|
||||
generated_tokens: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -755,34 +684,18 @@ impl ResponseParser {
|
|||
let handle = tokio::spawn(async move {
|
||||
let mut parser = self;
|
||||
let agent_name = agent.state.lock().await.provenance.clone();
|
||||
eprintln!(
|
||||
"[agent:{agent_name}] parser task start branch_idx={} in_think={}",
|
||||
parser.branch_idx, parser.in_think,
|
||||
);
|
||||
let log_path = format!("/tmp/poc-{}.log", agent_name);
|
||||
let mut log_file = std::fs::OpenOptions::new()
|
||||
.create(true).append(true).open(&log_path).ok();
|
||||
let mut full_text = String::new();
|
||||
let mut token_count: usize = 0;
|
||||
while let Some(event) = stream.recv().await {
|
||||
match event {
|
||||
super::api::StreamToken::Token { id, readout } => {
|
||||
token_count += 1;
|
||||
if token_count == 1 {
|
||||
eprintln!("[agent:{agent_name}] parser first token id={}", id);
|
||||
} else if token_count % 256 == 0 {
|
||||
eprintln!(
|
||||
"[agent:{agent_name}] parser token_count={} chars={}",
|
||||
token_count,
|
||||
full_text.len(),
|
||||
);
|
||||
}
|
||||
if let Some(r) = readout {
|
||||
if let Ok(mut buf) = agent.readout.lock() {
|
||||
buf.push(id, r);
|
||||
}
|
||||
}
|
||||
parser.generated_tokens.push(id);
|
||||
let text = super::tokenizer::decode(&[id]);
|
||||
full_text.push_str(&text);
|
||||
let mut ctx = agent.context.lock().await;
|
||||
|
|
@ -801,12 +714,6 @@ impl ResponseParser {
|
|||
}
|
||||
}
|
||||
super::api::StreamToken::Done { usage } => {
|
||||
eprintln!(
|
||||
"[agent:{agent_name}] parser done token_count={} chars={} usage={:?}",
|
||||
token_count,
|
||||
full_text.len(),
|
||||
usage,
|
||||
);
|
||||
if let Some(ref mut f) = log_file {
|
||||
use std::io::Write;
|
||||
let ctx = agent.context.lock().await;
|
||||
|
|
@ -823,31 +730,19 @@ impl ResponseParser {
|
|||
let _ = writeln!(f, " unparsed text: {}", &full_text[..end]);
|
||||
}
|
||||
}
|
||||
if let Some(ref u) = usage {
|
||||
if let Some(u) = usage {
|
||||
agent.state.lock().await.last_prompt_tokens = u.prompt_tokens;
|
||||
}
|
||||
let mut ctx = agent.context.lock().await;
|
||||
parser.finish(&mut ctx);
|
||||
if let Some(u) = usage {
|
||||
ctx.note_session_synced(u.total_tokens);
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
super::api::StreamToken::Error(e) => {
|
||||
eprintln!("[agent:{agent_name}] parser stream error: {}", e);
|
||||
return Err(anyhow::anyhow!("{}", e));
|
||||
}
|
||||
}
|
||||
}
|
||||
eprintln!(
|
||||
"[agent:{agent_name}] parser stream closed without done token_count={} chars={}",
|
||||
token_count,
|
||||
full_text.len(),
|
||||
);
|
||||
Err(anyhow::anyhow!(
|
||||
"stream closed without Done event after {} tokens",
|
||||
token_count,
|
||||
))
|
||||
Ok(())
|
||||
});
|
||||
(rx, handle)
|
||||
}
|
||||
|
|
@ -928,7 +823,7 @@ impl ResponseParser {
|
|||
}
|
||||
|
||||
fn push_child(&self, ctx: &mut ContextState, child: AstNode) {
|
||||
ctx.push_child_raw(Section::Conversation, self.branch_idx, child);
|
||||
ctx.push_child(Section::Conversation, self.branch_idx, child);
|
||||
}
|
||||
|
||||
fn flush_content(&mut self, ctx: &mut ContextState) {
|
||||
|
|
@ -942,69 +837,10 @@ impl ResponseParser {
|
|||
}
|
||||
|
||||
pub fn finish(mut self, ctx: &mut ContextState) {
|
||||
// Salvage any in-flight tag accumulators if the stream ended
|
||||
// before the close tag arrived (max_tokens, premature EOS,
|
||||
// server-side cancel). Without this, an unterminated
|
||||
// <think>...</think> drops all of self.think_buf and only the
|
||||
// trailing rolling window in self.buf survives — observed as
|
||||
// "responses cut off, only the last ~8 characters come
|
||||
// through" because drain_safe keeps `close_tag.len()` bytes
|
||||
// (8 for `</think>`) at the tail of buf.
|
||||
if self.in_think {
|
||||
if !self.buf.is_empty() {
|
||||
self.think_buf.push_str(&std::mem::take(&mut self.buf));
|
||||
}
|
||||
let text = std::mem::take(&mut self.think_buf).trim().to_string();
|
||||
if !text.is_empty() {
|
||||
self.push_child(ctx, AstNode::thinking(text));
|
||||
}
|
||||
self.in_think = false;
|
||||
} else if self.in_tool_call {
|
||||
if !self.buf.is_empty() {
|
||||
self.tool_call_buf.push_str(&std::mem::take(&mut self.buf));
|
||||
}
|
||||
let body = std::mem::take(&mut self.tool_call_buf);
|
||||
match parse_tool_call_body(&body) {
|
||||
Some((name, args)) => {
|
||||
self.flush_content(ctx);
|
||||
self.push_child(ctx, AstNode::tool_call(&name, &args));
|
||||
}
|
||||
None => {
|
||||
// Body's likely incomplete (no `</tool_call>` ever
|
||||
// arrived). Wrap as content with the open tag so the
|
||||
// model can see its own truncated attempt next turn
|
||||
// rather than losing it silently.
|
||||
self.content_parts.push(format!("<tool_call>\n{}", body));
|
||||
}
|
||||
}
|
||||
self.in_tool_call = false;
|
||||
} else if !self.buf.is_empty() {
|
||||
self.content_parts.push(std::mem::take(&mut self.buf));
|
||||
}
|
||||
self.flush_content(ctx);
|
||||
|
||||
// Stamp the authoritative token cache onto the branch.
|
||||
// Layout mirrors the full chat-template rendering of a
|
||||
// message block:
|
||||
//
|
||||
// IM_START + "assistant\n" [+ "<think>\n"] (prologue — what we sent)
|
||||
// + generated_tokens (what the server generated, ends in IM_END)
|
||||
// + "\n" (trailing newline — template-required)
|
||||
//
|
||||
// Server only has through the IM_END (model stops on it,
|
||||
// doesn't emit "\n"). Match-upto lands inside the cache
|
||||
// right after IM_END; the chunk-walk's straddle path picks
|
||||
// up the trailing "\n" as the head of the next turn's delta.
|
||||
// The "\n" between turns matters: without it Qwen sees
|
||||
// `<|im_end|><|im_start|>` back-to-back (no newline) and
|
||||
// responds with garbage.
|
||||
let prologue_text = if self.in_think { "assistant\n<think>\n" } else { "assistant\n" };
|
||||
let mut cache = Vec::with_capacity(1 + self.generated_tokens.len() + 8);
|
||||
cache.push(tokenizer::IM_START);
|
||||
cache.extend(tokenizer::encode(prologue_text));
|
||||
cache.extend(self.generated_tokens);
|
||||
cache.extend(tokenizer::encode("\n"));
|
||||
ctx.set_branch_cache(Section::Conversation, self.branch_idx, cache);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1016,77 +852,20 @@ impl ContextState {
|
|||
journal: Vec::new(),
|
||||
conversation: Vec::new(),
|
||||
conversation_log: None,
|
||||
server_committed_len: 0,
|
||||
client_match_upto: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// -- Server sync tracking -------------------------------------------------
|
||||
|
||||
/// Length of the session's token stream on the server. Updated by
|
||||
/// the grpc layer from Generate Done events.
|
||||
pub fn server_committed_len(&self) -> u32 { self.server_committed_len }
|
||||
|
||||
/// Prefix of our walk we still believe matches the server
|
||||
/// byte-for-byte. If less than `server_committed_len`, the next
|
||||
/// Generate must send `truncating=true` at this offset.
|
||||
pub fn client_match_upto(&self) -> u32 { self.client_match_upto }
|
||||
|
||||
/// Called by the grpc layer after a successful Generate Done:
|
||||
/// records both the server's new length and the fact that we
|
||||
/// match up to it (we just sent everything).
|
||||
pub fn note_session_synced(&mut self, total_tokens: u32) {
|
||||
self.server_committed_len = total_tokens;
|
||||
self.client_match_upto = total_tokens;
|
||||
}
|
||||
|
||||
/// Reset match-upto to 0. Called from every mutation that could
|
||||
/// have touched a region the server already has. For now,
|
||||
/// conservatively drops alignment entirely — finer-grained
|
||||
/// tracking (match-upto at the mutated node's offset) is a
|
||||
/// future optimization.
|
||||
fn mark_dirty(&mut self) {
|
||||
self.client_match_upto = 0;
|
||||
}
|
||||
|
||||
// -- Read access ----------------------------------------------------------
|
||||
|
||||
pub fn system(&self) -> &[AstNode] { &self.system }
|
||||
pub fn identity(&self) -> &[AstNode] { &self.identity }
|
||||
pub fn journal(&self) -> &[AstNode] { &self.journal }
|
||||
pub fn conversation(&self) -> &[AstNode] { &self.conversation }
|
||||
|
||||
/// Set or clear a single `memory_scores` entry on an Assistant
|
||||
/// Branch. Used by the full-matrix scorer to attribute per-memory
|
||||
/// divergence onto the response. `score = None` removes the key;
|
||||
/// `Some(s)` inserts/overwrites.
|
||||
///
|
||||
/// Doesn't affect the Branch's token cache: `memory_scores` is a
|
||||
/// serialized-but-non-tokenizing annotation. No-op (with a debug
|
||||
/// log) if the index points to a Leaf or a non-Assistant Branch —
|
||||
/// callers are typically iterating on stale indices and we'd
|
||||
/// rather skip than panic.
|
||||
pub fn set_branch_memory_score(
|
||||
&mut self,
|
||||
section: Section,
|
||||
index: usize,
|
||||
key: &str,
|
||||
score: Option<f64>,
|
||||
) {
|
||||
let nodes = self.section_mut(section);
|
||||
let Some(node) = nodes.get_mut(index) else { return };
|
||||
let AstNode::Branch { role: Role::Assistant, memory_scores, .. } = node
|
||||
else { return };
|
||||
match score {
|
||||
Some(s) => { memory_scores.insert(key.to_string(), s); }
|
||||
None => { memory_scores.remove(key); }
|
||||
}
|
||||
}
|
||||
pub fn conversation_mut(&mut self) -> &mut Vec<AstNode> { &mut self.conversation }
|
||||
|
||||
pub fn sections(&self) -> [&Vec<AstNode>; 4] {
|
||||
[&self.system, &self.identity, &self.journal, &self.conversation]
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl Ast for ContextState {
|
||||
|
|
@ -1119,63 +898,30 @@ impl Ast for ContextState {
|
|||
}
|
||||
|
||||
/// An image collected from the AST for a request body. The AST stores
|
||||
/// Image metadata collected during `wire_chunks` — the binary +
|
||||
/// mime plus the absolute token-position range of the image's
|
||||
/// pre-expanded placeholder run in the full wire stream. Sent
|
||||
/// alongside `append_tokens` in `GenerateRequest` so the server
|
||||
/// can attach vision features to the declared positions. Positions
|
||||
/// are absolute within the full wire walk starting at offset 0,
|
||||
/// i.e. the same coordinate system as `session.tokens` on the
|
||||
/// server once the walk has been applied.
|
||||
#[derive(Clone)]
|
||||
/// the pre-expanded token form (N image_pads) for accurate budget
|
||||
/// accounting; the wire form collapses each Image to a single
|
||||
/// `<|image_pad|>` between vision bookends and ships the bytes
|
||||
/// separately as multi_modal_data.
|
||||
pub struct WireImage {
|
||||
pub bytes: Vec<u8>,
|
||||
pub mime: String,
|
||||
pub pad_start: u32,
|
||||
pub pad_end: u32,
|
||||
}
|
||||
|
||||
/// One piece of the wire stream for the gRPC session path. Since
|
||||
/// images now live inline in the token stream (pre-expanded at AST
|
||||
/// construction time), there's only one variant — a run of tokens.
|
||||
/// The parallel `Vec<WireImage>` returned by `wire_chunks` gives the
|
||||
/// binary + position metadata for each embedded image.
|
||||
#[derive(Clone)]
|
||||
pub enum WireChunk {
|
||||
Tokens(Vec<u32>),
|
||||
}
|
||||
|
||||
fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>) {
|
||||
match node {
|
||||
AstNode::Leaf(leaf) => match leaf.body() {
|
||||
NodeBody::Image { bytes, mime, .. } => {
|
||||
// The Image leaf's token_ids is already
|
||||
// [VISION_START, IMAGE_PAD * N, VISION_END]. Inline
|
||||
// those into the token stream and record the pad-run
|
||||
// range so the server can attach features to the
|
||||
// declared positions.
|
||||
let pad_start = tokens.len() as u32;
|
||||
tokens.extend_from_slice(leaf.token_ids());
|
||||
let pad_end = tokens.len() as u32;
|
||||
tokens.push(tokenizer::VISION_START);
|
||||
tokens.push(tokenizer::IMAGE_PAD);
|
||||
tokens.push(tokenizer::VISION_END);
|
||||
images.push(WireImage {
|
||||
bytes: bytes.clone(),
|
||||
mime: mime.clone(),
|
||||
pad_start,
|
||||
pad_end,
|
||||
});
|
||||
}
|
||||
_ => tokens.extend_from_slice(leaf.token_ids()),
|
||||
},
|
||||
AstNode::Branch { token_ids: Some(cached), children, .. } => {
|
||||
// Cached branches still need their image children paired
|
||||
// up with the vision-block ranges embedded in the cached
|
||||
// token stream — the cache captures vision tokens but not
|
||||
// the matching bytes/mime.
|
||||
let base = tokens.len() as u32;
|
||||
tokens.extend_from_slice(cached);
|
||||
pair_cached_images(cached, children, base, images);
|
||||
}
|
||||
AstNode::Branch { role, children, token_ids: None, .. } => {
|
||||
AstNode::Branch { role, children, .. } => {
|
||||
tokens.push(tokenizer::IM_START);
|
||||
tokens.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
|
||||
for c in children {
|
||||
|
|
@ -1187,101 +933,6 @@ fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>)
|
|||
}
|
||||
}
|
||||
|
||||
/// Depth-first iterator over Image leaves under a slice of AST nodes.
|
||||
/// Yields `(bytes, mime)` borrows in document order; doesn't allocate
|
||||
/// per yield (only a stack of pending nodes).
|
||||
struct ImageLeaves<'a> {
|
||||
stack: Vec<&'a AstNode>,
|
||||
}
|
||||
|
||||
impl<'a> ImageLeaves<'a> {
|
||||
fn new(nodes: &'a [AstNode]) -> Self {
|
||||
let mut stack = Vec::with_capacity(nodes.len());
|
||||
stack.extend(nodes.iter().rev());
|
||||
Self { stack }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ImageLeaves<'a> {
|
||||
type Item = (&'a [u8], &'a str);
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
while let Some(node) = self.stack.pop() {
|
||||
match node {
|
||||
AstNode::Leaf(leaf) => {
|
||||
if let NodeBody::Image { bytes, mime, .. } = leaf.body() {
|
||||
return Some((bytes, mime));
|
||||
}
|
||||
}
|
||||
AstNode::Branch { children, .. } => {
|
||||
self.stack.extend(children.iter().rev());
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator over `(start, end)` token-offset pairs for each
|
||||
/// `VISION_START..VISION_END` block in a token slice. Panics on an
|
||||
/// unmatched VISION_START — that's an upstream tokenization bug
|
||||
/// worth a loud failure.
|
||||
fn vision_blocks(cached: &[u32]) -> impl Iterator<Item = (usize, usize)> + '_ {
|
||||
let mut cur = 0;
|
||||
std::iter::from_fn(move || {
|
||||
while cur < cached.len() {
|
||||
if cached[cur] == tokenizer::VISION_START {
|
||||
let start = cur;
|
||||
let end_rel = cached[cur..].iter()
|
||||
.position(|&t| t == tokenizer::VISION_END)
|
||||
.unwrap_or_else(|| panic!(
|
||||
"unmatched VISION_START at offset {} in cached branch",
|
||||
start));
|
||||
let end = cur + end_rel + 1;
|
||||
cur = end;
|
||||
return Some((start, end));
|
||||
}
|
||||
cur += 1;
|
||||
}
|
||||
None
|
||||
})
|
||||
}
|
||||
|
||||
/// For a Branch whose `token_ids` are cached and may contain inlined
|
||||
/// vision blocks (`VISION_START + IMAGE_PAD*N + VISION_END`), recover
|
||||
/// the matching image bytes/mime from the children and emit one
|
||||
/// `WireImage` per vision block with the absolute pad offsets in the
|
||||
/// parent token stream.
|
||||
///
|
||||
/// The cache stores tokens but not image payloads; the AST stores
|
||||
/// image payloads in the children but not their post-cache positions.
|
||||
/// Pair them by zipping the two iterators; mismatched counts panic
|
||||
/// loudly because that's an AST/cache invariant violation that
|
||||
/// would otherwise mis-pair images on the wire.
|
||||
fn pair_cached_images(
|
||||
cached: &[u32],
|
||||
children: &[AstNode],
|
||||
base_offset: u32,
|
||||
images: &mut Vec<WireImage>,
|
||||
) {
|
||||
let mut blocks = vision_blocks(cached);
|
||||
let mut leaves = ImageLeaves::new(children);
|
||||
loop {
|
||||
match (blocks.next(), leaves.next()) {
|
||||
(Some((s, e)), Some((bytes, mime))) => images.push(WireImage {
|
||||
bytes: bytes.to_vec(),
|
||||
mime: mime.to_string(),
|
||||
pad_start: base_offset + s as u32,
|
||||
pad_end: base_offset + e as u32,
|
||||
}),
|
||||
(None, None) => break,
|
||||
(Some(_), None) => panic!(
|
||||
"cached branch has more vision blocks than image children"),
|
||||
(None, Some(_)) => panic!(
|
||||
"cached branch has fewer vision blocks than image children"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn memory_key(node: &AstNode) -> Option<&str> {
|
||||
match node {
|
||||
AstNode::Leaf(leaf) => match leaf.body() {
|
||||
|
|
@ -1391,92 +1042,6 @@ impl ContextState {
|
|||
}
|
||||
(tokens, images, assistant_ranges)
|
||||
}
|
||||
|
||||
/// Build the wire stream as interleaved `WireChunk`s for the gRPC
|
||||
/// session path. Returns a tuple of (chunks, images): the chunks
|
||||
/// hold the full token stream (with vision blocks inlined as
|
||||
/// `VISION_START + IMAGE_PAD*N + VISION_END`), and the images
|
||||
/// list carries each embedded image's binary + position range so
|
||||
/// the gRPC layer can attach them via `GenerateRequest.images`.
|
||||
///
|
||||
/// Note: with images inlined into the token stream, the chunks
|
||||
/// list is structurally a single `Tokens` chunk in the common
|
||||
/// case — the multi-chunk shape persists only because some
|
||||
/// callers may want the option of inserting breakpoints later.
|
||||
///
|
||||
/// `conv_range` and `skip` mirror `wire_prompt` — select a
|
||||
/// conversation slice and drop identity / conversation nodes by
|
||||
/// predicate.
|
||||
pub fn wire_chunks<F>(
|
||||
&self,
|
||||
conv_range: std::ops::Range<usize>,
|
||||
mut skip: F,
|
||||
) -> (Vec<WireChunk>, Vec<WireImage>)
|
||||
where F: FnMut(&AstNode) -> bool,
|
||||
{
|
||||
let mut buf: Vec<u32> = Vec::new();
|
||||
let mut images: Vec<WireImage> = Vec::new();
|
||||
|
||||
fn visit(
|
||||
node: &AstNode,
|
||||
buf: &mut Vec<u32>,
|
||||
images: &mut Vec<WireImage>,
|
||||
) {
|
||||
match node {
|
||||
AstNode::Leaf(leaf) => match leaf.body() {
|
||||
NodeBody::Image { bytes, mime, .. } => {
|
||||
// Pre-expanded vision block lives in
|
||||
// leaf.token_ids: [VISION_START, IMAGE_PAD*N,
|
||||
// VISION_END]. Inline + record the range.
|
||||
let pad_start = buf.len() as u32;
|
||||
buf.extend_from_slice(leaf.token_ids());
|
||||
let pad_end = buf.len() as u32;
|
||||
images.push(WireImage {
|
||||
bytes: bytes.clone(),
|
||||
mime: mime.clone(),
|
||||
pad_start,
|
||||
pad_end,
|
||||
});
|
||||
}
|
||||
_ => buf.extend_from_slice(leaf.token_ids()),
|
||||
},
|
||||
AstNode::Branch { token_ids: Some(cached), children, .. } => {
|
||||
// Same fix as wire_into's cached arm: the cache
|
||||
// holds vision tokens but not the matching bytes,
|
||||
// so walk children to recover them.
|
||||
let base = buf.len() as u32;
|
||||
buf.extend_from_slice(cached);
|
||||
pair_cached_images(cached, children, base, images);
|
||||
}
|
||||
AstNode::Branch { role, children, token_ids: None, .. } => {
|
||||
buf.push(tokenizer::IM_START);
|
||||
buf.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
|
||||
for c in children {
|
||||
visit(c, buf, images);
|
||||
}
|
||||
buf.push(tokenizer::IM_END);
|
||||
buf.extend(tokenizer::encode("\n"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for node in self.system() { visit(node, &mut buf, &mut images); }
|
||||
for node in self.identity() {
|
||||
if skip(node) { continue; }
|
||||
visit(node, &mut buf, &mut images);
|
||||
}
|
||||
for node in self.journal() { visit(node, &mut buf, &mut images); }
|
||||
for node in &self.conversation()[conv_range] {
|
||||
if skip(node) { continue; }
|
||||
visit(node, &mut buf, &mut images);
|
||||
}
|
||||
let chunks = if buf.is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
vec![WireChunk::Tokens(buf)]
|
||||
};
|
||||
(chunks, images)
|
||||
}
|
||||
}
|
||||
|
||||
impl ContextState {
|
||||
|
|
@ -1496,27 +1061,17 @@ impl ContextState {
|
|||
dbglog!("warning: log: {:#}", e);
|
||||
}
|
||||
}
|
||||
// Conversation appends always go to the tail — past committed —
|
||||
// so they don't break the match. Any other section mutates a
|
||||
// region the server may already have, so drop alignment.
|
||||
if section != Section::Conversation {
|
||||
self.mark_dirty();
|
||||
}
|
||||
self.section_mut(section).push(node);
|
||||
}
|
||||
|
||||
/// Push without logging.
|
||||
pub fn push_no_log(&mut self, section: Section, node: AstNode) {
|
||||
if section != Section::Conversation {
|
||||
self.mark_dirty();
|
||||
}
|
||||
self.section_mut(section).push(node);
|
||||
}
|
||||
|
||||
/// Replace the body of a leaf at `index` in `section`.
|
||||
/// Re-tokenizes to maintain the invariant.
|
||||
pub fn set_message(&mut self, section: Section, index: usize, body: NodeBody) {
|
||||
self.mark_dirty();
|
||||
let nodes = self.section_mut(section);
|
||||
let node = &mut nodes[index];
|
||||
match node {
|
||||
|
|
@ -1542,12 +1097,10 @@ impl ContextState {
|
|||
}
|
||||
|
||||
pub fn del(&mut self, section: Section, index: usize) -> AstNode {
|
||||
self.mark_dirty();
|
||||
self.section_mut(section).remove(index)
|
||||
}
|
||||
|
||||
pub fn clear(&mut self, section: Section) {
|
||||
self.mark_dirty();
|
||||
self.section_mut(section).clear();
|
||||
}
|
||||
|
||||
|
|
@ -1568,7 +1121,6 @@ impl ContextState {
|
|||
/// are > 50% of conversation tokens) or oldest conversation entry.
|
||||
/// Phase 3: Snap to user message boundary at start.
|
||||
pub fn trim_conversation(&mut self) {
|
||||
self.mark_dirty();
|
||||
let max_tokens = context_budget_tokens();
|
||||
let fixed = self.system.iter().map(|n| n.tokens()).sum::<usize>()
|
||||
+ self.identity.iter().map(|n| n.tokens()).sum::<usize>()
|
||||
|
|
@ -1645,49 +1197,11 @@ impl ContextState {
|
|||
}
|
||||
|
||||
/// Push a child node into a branch at `index` in `section`.
|
||||
/// Clears the branch's cached token stream — wire-out will recompute
|
||||
/// from children until the cache is repopulated. If the cache was
|
||||
/// populated (server had these bytes), drops session alignment.
|
||||
pub fn push_child(&mut self, section: Section, index: usize, child: AstNode) {
|
||||
let node = &mut self.section_mut(section)[index];
|
||||
let was_cached = matches!(node, AstNode::Branch { token_ids: Some(_), .. });
|
||||
match node {
|
||||
AstNode::Branch { children, token_ids, .. } => {
|
||||
children.push(child);
|
||||
*token_ids = None;
|
||||
}
|
||||
AstNode::Leaf(_) => panic!("push_child on leaf node"),
|
||||
}
|
||||
if was_cached {
|
||||
self.mark_dirty();
|
||||
}
|
||||
}
|
||||
|
||||
/// Like `push_child` but preserves the branch's cached token stream.
|
||||
/// Used by the response parser, which is simultaneously populating
|
||||
/// the cache from the authoritative server stream and pushing the
|
||||
/// parsed-out children — the two stay consistent by construction.
|
||||
/// Module-private: callers outside `context.rs` must go through
|
||||
/// `push_child` so the invariant is maintained.
|
||||
fn push_child_raw(&mut self, section: Section, index: usize, child: AstNode) {
|
||||
let node = &mut self.section_mut(section)[index];
|
||||
match node {
|
||||
AstNode::Branch { children, .. } => children.push(child),
|
||||
AstNode::Leaf(_) => panic!("push_child_raw on leaf node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Stamp a verbatim token cache onto the branch at `index` in
|
||||
/// `section`. Used by the response parser to record the server's
|
||||
/// authoritative token stream for the just-finished turn.
|
||||
/// Module-private: the cache is an invariant-load-bearing piece
|
||||
/// of state, populated only by code that holds the server's
|
||||
/// ground truth.
|
||||
fn set_branch_cache(&mut self, section: Section, index: usize, tokens: Vec<u32>) {
|
||||
let node = &mut self.section_mut(section)[index];
|
||||
match node {
|
||||
AstNode::Branch { token_ids, .. } => *token_ids = Some(tokens),
|
||||
AstNode::Leaf(_) => panic!("set_branch_cache on leaf node"),
|
||||
AstNode::Leaf(_) => panic!("push_child on leaf node"),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1711,14 +1225,6 @@ impl ContextState {
|
|||
// to at request time. Constants come from Qwen3.5-27B's preprocessor_config.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Production client-side computation of image-token expansion. With
|
||||
// the delta-session protocol, the client writes the pre-expanded
|
||||
// vision block (VISION_START + N*IMAGE_PAD + VISION_END) directly
|
||||
// into the token stream at Image-leaf construction time, and tells
|
||||
// the server where each image's pad run lives via
|
||||
// GenerateRequest.images. Server validates that this N matches
|
||||
// what the vision encoder actually produces and rejects on
|
||||
// mismatch — so drift here fails loudly, not silently.
|
||||
const QWEN3_PATCH_SIZE: u32 = 16;
|
||||
const QWEN3_MERGE_SIZE: u32 = 2;
|
||||
const QWEN3_MIN_PIXELS: u64 = 65_536;
|
||||
|
|
@ -1752,10 +1258,11 @@ fn smart_resize(h: u32, w: u32, factor: u32, min_pixels: u64, max_pixels: u64) -
|
|||
}
|
||||
}
|
||||
|
||||
/// How many `<|image_pad|>` tokens the Qwen3-VL vision encoder will
|
||||
/// produce for an image of the given dimensions. Server verifies
|
||||
/// this count against its own encoder run and rejects on mismatch.
|
||||
pub fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
|
||||
/// Compute how many `<|image_pad|>` tokens vLLM will emit for an image of
|
||||
/// the given dimensions. Matches Qwen3VL's feature-size calculation exactly:
|
||||
/// (grid_h * grid_w) / merge_size^2
|
||||
/// where (grid_h, grid_w) = resized dims / patch_size.
|
||||
fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
|
||||
let factor = QWEN3_PATCH_SIZE * QWEN3_MERGE_SIZE;
|
||||
let (rh, rw) = smart_resize(orig_h, orig_w, factor, QWEN3_MIN_PIXELS, QWEN3_MAX_PIXELS);
|
||||
(rh / QWEN3_PATCH_SIZE) * (rw / QWEN3_PATCH_SIZE) / (QWEN3_MERGE_SIZE * QWEN3_MERGE_SIZE)
|
||||
|
|
@ -2206,34 +1713,29 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_wire_prompt_preserves_expanded_image_pads() {
|
||||
fn test_wire_prompt_collapses_image_pads() {
|
||||
let mut ctx = ContextState::new();
|
||||
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::User, vec![
|
||||
AstNode::content("look:"),
|
||||
AstNode::image(vec![0xDE, 0xAD], "image/png", 512, 512),
|
||||
]));
|
||||
|
||||
// AST side and wire side should both carry N image_pads + bookends —
|
||||
// server's session.tokens length must match what vLLM's engine will
|
||||
// actually process. Binary image bytes are shipped separately in
|
||||
// multi_modal_data via the WireImage list.
|
||||
let n_expected = qwen3_image_token_count(512, 512) as usize;
|
||||
|
||||
// AST side: N image_pads + bookends, full budget accounting.
|
||||
let full = ctx.token_ids();
|
||||
let n_image_pads_full = full.iter()
|
||||
.filter(|&&t| t == tokenizer::IMAGE_PAD).count();
|
||||
assert_eq!(n_image_pads_full, n_expected);
|
||||
assert_eq!(n_image_pads_full, qwen3_image_token_count(512, 512) as usize);
|
||||
|
||||
// Wire side: single image_pad, bytes moved to images list.
|
||||
let (wire, images, _) = ctx.wire_prompt(0..ctx.conversation().len(), |_| false);
|
||||
let n_image_pads_wire = wire.iter()
|
||||
.filter(|&&t| t == tokenizer::IMAGE_PAD).count();
|
||||
assert_eq!(n_image_pads_wire, n_expected);
|
||||
|
||||
assert_eq!(n_image_pads_wire, 1);
|
||||
assert_eq!(images.len(), 1);
|
||||
assert_eq!(images[0].bytes, vec![0xDE, 0xAD]);
|
||||
assert_eq!(images[0].mime, "image/png");
|
||||
|
||||
// One pair of vision_start/vision_end bookends around the N pads.
|
||||
// vision_start/vision_end bookends are preserved in wire form.
|
||||
assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_START).count(), 1);
|
||||
assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_END).count(), 1);
|
||||
}
|
||||
|
|
|
|||
186
src/agent/mod.rs
186
src/agent/mod.rs
|
|
@ -17,7 +17,6 @@ pub mod api;
|
|||
pub mod context;
|
||||
pub mod oneshot;
|
||||
pub mod readout;
|
||||
pub mod salience;
|
||||
pub mod tokenizer;
|
||||
pub mod tools;
|
||||
|
||||
|
|
@ -29,11 +28,6 @@ use context::{AstNode, ContextState, Section, Ast, PendingToolCall, ResponsePars
|
|||
|
||||
use crate::mind::log::ConversationLog;
|
||||
|
||||
async fn agent_trace(agent: &Arc<Agent>, msg: String) {
|
||||
let provenance = agent.state.lock().await.provenance.clone();
|
||||
eprintln!("[agent:{provenance}] {msg}");
|
||||
}
|
||||
|
||||
// --- Activity tracking (RAII guards) ---
|
||||
|
||||
pub struct ActivityEntry {
|
||||
|
|
@ -154,14 +148,6 @@ pub struct Agent {
|
|||
/// token handler, read by UI screens (amygdala). Manifest is
|
||||
/// `None` when the server has readout disabled.
|
||||
pub readout: readout::SharedReadoutBuffer,
|
||||
/// Long-lived gRPC session to the salience server, lazily opened
|
||||
/// on first use. Tracks appended tokens so subsequent turns send
|
||||
/// only the delta (prefix-cache reuse). None when not yet opened
|
||||
/// or when the session has died and needs reopening.
|
||||
///
|
||||
/// Arc-wrapped so the spawned streaming task can share ownership
|
||||
/// (the task outlives the call site).
|
||||
pub grpc_session: std::sync::Arc<crate::Mutex<Option<api::salience::SessionHandle>>>,
|
||||
}
|
||||
|
||||
/// Mutable agent state — behind its own mutex.
|
||||
|
|
@ -182,7 +168,9 @@ pub struct AgentState {
|
|||
pub think_native: bool,
|
||||
/// Tool-based thinking — add a "think" tool for structured reasoning.
|
||||
pub think_tool: bool,
|
||||
pub sampling: api::SamplingParams,
|
||||
pub temperature: f32,
|
||||
pub top_p: f32,
|
||||
pub top_k: u32,
|
||||
pub activities: Vec<ActivityEntry>,
|
||||
next_activity_id: u64,
|
||||
pub pending_yield: bool,
|
||||
|
|
@ -236,7 +224,6 @@ impl Agent {
|
|||
session_id,
|
||||
context: crate::Mutex::new(context),
|
||||
readout,
|
||||
grpc_session: std::sync::Arc::new(crate::Mutex::new(None)),
|
||||
state: crate::Mutex::new(AgentState {
|
||||
tools: agent_tools,
|
||||
mcp_tools: McpToolAccess::All,
|
||||
|
|
@ -244,12 +231,9 @@ impl Agent {
|
|||
reasoning_effort: "none".to_string(),
|
||||
think_native: true,
|
||||
think_tool: false,
|
||||
sampling: api::SamplingParams {
|
||||
temperature: 0.6,
|
||||
top_p: 0.95,
|
||||
top_k: 20,
|
||||
max_tokens: 4096,
|
||||
},
|
||||
activities: Vec::new(),
|
||||
next_activity_id: 0,
|
||||
pending_yield: false,
|
||||
|
|
@ -308,9 +292,6 @@ impl Agent {
|
|||
// shouldn't bleed into the main emotional readout even
|
||||
// though they hit the same vLLM server.
|
||||
readout: readout::new_shared(),
|
||||
// Forks get their own session — can't share a bidi stream,
|
||||
// and forks have different conversation tails anyway.
|
||||
grpc_session: std::sync::Arc::new(crate::Mutex::new(None)),
|
||||
state: crate::Mutex::new(AgentState {
|
||||
tools,
|
||||
mcp_tools: McpToolAccess::None,
|
||||
|
|
@ -318,7 +299,9 @@ impl Agent {
|
|||
reasoning_effort: "none".to_string(),
|
||||
think_native: st.think_native,
|
||||
think_tool: st.think_tool,
|
||||
sampling: st.sampling,
|
||||
temperature: st.temperature,
|
||||
top_p: st.top_p,
|
||||
top_k: st.top_k,
|
||||
activities: Vec::new(),
|
||||
next_activity_id: 0,
|
||||
pending_yield: false,
|
||||
|
|
@ -333,35 +316,35 @@ impl Agent {
|
|||
})
|
||||
}
|
||||
|
||||
/// Assemble a ready-to-send prompt as interleaved wire chunks for
|
||||
/// the gRPC session path. Text runs are batched; each Image leaf
|
||||
/// becomes its own chunk. Also trims the conversation to budget
|
||||
/// first so we don't build a prompt the server will reject for
|
||||
/// length.
|
||||
pub async fn assemble_prompt(&self)
|
||||
-> (Vec<context::WireChunk>, Vec<context::WireImage>, u32)
|
||||
{
|
||||
pub async fn assemble_prompt_tokens(&self) -> Vec<u32> {
|
||||
self.assemble_prompt().await.0
|
||||
}
|
||||
|
||||
/// Assemble a ready-to-send prompt: token stream in wire form (each
|
||||
/// image collapsed to a single `<|image_pad|>`) paired with the
|
||||
/// images to attach as multi_modal_data.
|
||||
///
|
||||
/// Pre-send size check: if the context has grown past budget since the
|
||||
/// last compact (accumulation between turns, a fork's context getting
|
||||
/// bigger than expected, etc.), trim here rather than letting vLLM
|
||||
/// reject the request. Client-side tokenization means we already know
|
||||
/// the exact token count so there's no reason to round-trip an
|
||||
/// oversize request.
|
||||
pub async fn assemble_prompt(&self) -> (Vec<u32>, Vec<context::WireImage>) {
|
||||
let mut ctx = self.context.lock().await;
|
||||
if ctx.total_tokens() > context::context_budget_tokens() {
|
||||
ctx.trim_conversation();
|
||||
}
|
||||
let st = self.state.lock().await;
|
||||
let conv_len = ctx.conversation().len();
|
||||
let (mut chunks, images) = ctx.wire_chunks(0..conv_len, |_| false);
|
||||
// Assistant-turn prologue. Merge into the trailing Tokens
|
||||
// chunk if there is one, else push as a new chunk.
|
||||
let mut prologue = vec![tokenizer::IM_START];
|
||||
let (mut tokens, images, _) =
|
||||
ctx.wire_prompt(0..ctx.conversation().len(), |_| false);
|
||||
tokens.push(tokenizer::IM_START);
|
||||
if st.think_native {
|
||||
prologue.extend(tokenizer::encode("assistant\n<think>\n"));
|
||||
tokens.extend(tokenizer::encode("assistant\n<think>\n"));
|
||||
} else {
|
||||
prologue.extend(tokenizer::encode("assistant\n"));
|
||||
tokens.extend(tokenizer::encode("assistant\n"));
|
||||
}
|
||||
match chunks.last_mut() {
|
||||
Some(context::WireChunk::Tokens(last)) => last.extend(prologue),
|
||||
_ => chunks.push(context::WireChunk::Tokens(prologue)),
|
||||
}
|
||||
let match_upto = ctx.client_match_upto();
|
||||
(chunks, images, match_upto)
|
||||
(tokens, images)
|
||||
}
|
||||
|
||||
/// Rebuild the tools section of the system prompt from the current tools list.
|
||||
|
|
@ -397,16 +380,10 @@ impl Agent {
|
|||
pub async fn turn(
|
||||
agent: Arc<Agent>,
|
||||
) -> Result<TurnResult> {
|
||||
agent_trace(&agent, format!("turn start")).await;
|
||||
|
||||
// Collect finished background tools
|
||||
{
|
||||
let finished = agent.state.lock().await.active_tools.take_finished();
|
||||
if !finished.is_empty() {
|
||||
agent_trace(&agent, format!(
|
||||
"collecting {} finished background tools",
|
||||
finished.len(),
|
||||
)).await;
|
||||
let mut bg_ds = DispatchState::new();
|
||||
let mut results = Vec::new();
|
||||
for entry in finished {
|
||||
|
|
@ -425,50 +402,21 @@ impl Agent {
|
|||
|
||||
loop {
|
||||
let _thinking = start_activity(&agent, "thinking...").await;
|
||||
agent_trace(&agent, format!(
|
||||
"turn loop overflow_retries={} empty_retries={}",
|
||||
overflow_retries, empty_retries,
|
||||
)).await;
|
||||
|
||||
let (rx, _stream_guard) = {
|
||||
agent_trace(&agent, format!("assembling prompt")).await;
|
||||
let (chunks, images, match_upto) = agent.assemble_prompt().await;
|
||||
let chunk_tokens: usize = chunks.iter().map(|c| match c {
|
||||
context::WireChunk::Tokens(t) => t.len(),
|
||||
}).sum();
|
||||
agent_trace(&agent, format!(
|
||||
"prompt assembled chunks={} tokens={} images={} match_upto={}",
|
||||
chunks.len(), chunk_tokens, images.len(), match_upto,
|
||||
)).await;
|
||||
let (prompt_tokens, images) = agent.assemble_prompt().await;
|
||||
let st = agent.state.lock().await;
|
||||
let readout_shape = agent.readout.lock().ok().and_then(|buf| {
|
||||
buf.manifest.as_ref().map(|m| {
|
||||
(m.layers.len() as u32, m.concepts.len() as u32)
|
||||
})
|
||||
});
|
||||
let sampling = st.sampling;
|
||||
let priority = st.priority;
|
||||
drop(st);
|
||||
agent_trace(&agent, format!(
|
||||
"starting stream max_tokens={} temperature={} top_p={} top_k={} priority={:?} readout_shape={:?}",
|
||||
sampling.max_tokens,
|
||||
sampling.temperature,
|
||||
sampling.top_p,
|
||||
sampling.top_k,
|
||||
priority,
|
||||
readout_shape,
|
||||
)).await;
|
||||
agent.client.stream_session_mm(
|
||||
agent.grpc_session.clone(),
|
||||
chunks,
|
||||
images,
|
||||
match_upto,
|
||||
sampling,
|
||||
priority,
|
||||
readout_shape,
|
||||
agent.client.stream_completion_mm(
|
||||
&prompt_tokens,
|
||||
&images,
|
||||
api::SamplingParams {
|
||||
temperature: st.temperature,
|
||||
top_p: st.top_p,
|
||||
top_k: st.top_k,
|
||||
},
|
||||
st.priority,
|
||||
)
|
||||
};
|
||||
agent_trace(&agent, format!("stream task spawned")).await;
|
||||
|
||||
let branch_idx = {
|
||||
let mut ctx = agent.context.lock().await;
|
||||
|
|
@ -479,41 +427,11 @@ impl Agent {
|
|||
idx
|
||||
};
|
||||
|
||||
let think_native = agent.state.lock().await.think_native;
|
||||
let parser = ResponseParser::new(branch_idx, think_native);
|
||||
let parser = ResponseParser::new(branch_idx);
|
||||
let (mut tool_rx, parser_handle) = parser.run(rx, agent.clone());
|
||||
agent_trace(&agent, format!(
|
||||
"parser started branch_idx={} think_native={}",
|
||||
branch_idx, think_native,
|
||||
)).await;
|
||||
|
||||
let mut pending_calls: Vec<PendingToolCall> = Vec::new();
|
||||
loop {
|
||||
let call = match tokio::time::timeout(
|
||||
std::time::Duration::from_secs(15),
|
||||
tool_rx.recv(),
|
||||
).await {
|
||||
Ok(Some(call)) => call,
|
||||
Ok(None) => {
|
||||
agent_trace(&agent, format!(
|
||||
"tool channel closed pending_calls={}",
|
||||
pending_calls.len(),
|
||||
)).await;
|
||||
break;
|
||||
}
|
||||
Err(_) => {
|
||||
agent_trace(&agent, format!(
|
||||
"waiting for parser/tool events pending_calls={}",
|
||||
pending_calls.len(),
|
||||
)).await;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
agent_trace(&agent, format!(
|
||||
"tool call received id={} name={} args_len={}",
|
||||
call.id, call.name, call.arguments.len(),
|
||||
)).await;
|
||||
while let Some(call) = tool_rx.recv().await {
|
||||
let call_clone = call.clone();
|
||||
let agent_handle = agent.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
|
|
@ -536,10 +454,8 @@ impl Agent {
|
|||
}
|
||||
|
||||
// Check for stream/parse errors
|
||||
agent_trace(&agent, format!("awaiting parser task")).await;
|
||||
match parser_handle.await {
|
||||
Ok(Err(e)) => {
|
||||
agent_trace(&agent, format!("parser returned error: {:#}", e)).await;
|
||||
if context::is_context_overflow(&e) && overflow_retries < 2 {
|
||||
overflow_retries += 1;
|
||||
let msg = format!("context overflow — compacting ({}/2)", overflow_retries);
|
||||
|
|
@ -553,12 +469,8 @@ impl Agent {
|
|||
}
|
||||
return Err(e);
|
||||
}
|
||||
Err(e) => {
|
||||
agent_trace(&agent, format!("parser task panicked: {}", e)).await;
|
||||
return Err(anyhow::anyhow!("parser task panicked: {}", e));
|
||||
}
|
||||
Err(e) => return Err(anyhow::anyhow!("parser task panicked: {}", e)),
|
||||
Ok(Ok(())) => {
|
||||
agent_trace(&agent, format!("parser completed")).await;
|
||||
// Assistant response was pushed to context by the parser;
|
||||
// log it now that parsing is complete.
|
||||
let ctx = agent.context.lock().await;
|
||||
|
|
@ -579,10 +491,6 @@ impl Agent {
|
|||
if !has_content && pending_calls.is_empty() {
|
||||
if empty_retries < 2 {
|
||||
empty_retries += 1;
|
||||
agent_trace(&agent, format!(
|
||||
"empty response retry {}/2",
|
||||
empty_retries,
|
||||
)).await;
|
||||
agent.push_node(AstNode::user_msg(
|
||||
"[system] Your previous response was empty. \
|
||||
Please respond with text or use a tool."
|
||||
|
|
@ -596,10 +504,6 @@ impl Agent {
|
|||
// Wait for tool calls to complete
|
||||
if !pending_calls.is_empty() {
|
||||
ds.had_tool_calls = true;
|
||||
agent_trace(&agent, format!(
|
||||
"waiting for {} foreground tools",
|
||||
pending_calls.len(),
|
||||
)).await;
|
||||
|
||||
let handles = agent.state.lock().await.active_tools.take_foreground();
|
||||
let mut results = Vec::new();
|
||||
|
|
@ -620,16 +524,6 @@ impl Agent {
|
|||
if st.pending_model_switch.is_some() { ds.model_switch = st.pending_model_switch.take(); }
|
||||
if st.pending_dmn_pause { ds.dmn_pause = true; st.pending_dmn_pause = false; }
|
||||
|
||||
drop(st);
|
||||
agent_trace(&agent, format!(
|
||||
"turn complete yield={} tool_calls={} tool_errors={} model_switch={:?} dmn_pause={}",
|
||||
ds.yield_requested,
|
||||
ds.had_tool_calls,
|
||||
ds.tool_errors,
|
||||
ds.model_switch,
|
||||
ds.dmn_pause,
|
||||
)).await;
|
||||
|
||||
return Ok(TurnResult {
|
||||
yield_requested: ds.yield_requested,
|
||||
had_tool_calls: ds.had_tool_calls,
|
||||
|
|
|
|||
|
|
@ -12,9 +12,7 @@ use crate::subconscious::{defs, prompts};
|
|||
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::io::Write as _;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use super::context::AstNode;
|
||||
use super::tools::{self as agent_tools};
|
||||
|
|
@ -108,10 +106,6 @@ pub async fn save_agent_log(name: &str, agent: &std::sync::Arc<Agent>) -> RunSta
|
|||
stats
|
||||
}
|
||||
|
||||
fn log_agent_event(agent: &str, msg: std::fmt::Arguments) {
|
||||
eprintln!("[agent:{agent}] {msg}");
|
||||
}
|
||||
|
||||
fn compute_run_stats(conversation: &[super::context::AstNode]) -> RunStats {
|
||||
use super::context::{AstNode, NodeBody};
|
||||
|
||||
|
|
@ -275,7 +269,7 @@ impl AutoAgent {
|
|||
let mut st = agent.state.lock().await;
|
||||
st.provenance = format!("standalone:{}", self.name);
|
||||
st.tools = self.tools.clone();
|
||||
st.sampling.temperature = self.temperature;
|
||||
st.temperature = self.temperature;
|
||||
st.priority = Some(self.priority);
|
||||
}
|
||||
|
||||
|
|
@ -351,44 +345,20 @@ impl AutoAgent {
|
|||
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
||||
) -> Result<(), String> {
|
||||
dbglog!("[auto] {} starting, {} steps", self.name, self.steps.len());
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"starting run steps={} temperature={} priority={}",
|
||||
self.steps.len(), self.temperature, self.priority));
|
||||
let run_start = Instant::now();
|
||||
|
||||
for (i, step) in self.steps.iter().enumerate() {
|
||||
self.turn = i + 1;
|
||||
self.current_phase = step.phase.clone();
|
||||
let step_start = Instant::now();
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"step {}/{} phase={} prompt_bytes={}",
|
||||
i + 1, self.steps.len(), step.phase, step.prompt.len()));
|
||||
|
||||
if let Some(ref check) = bail_fn {
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"step {}/{} phase={} bail check", i + 1, self.steps.len(), step.phase));
|
||||
check(i)?;
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"step {}/{} phase={} bail ok", i + 1, self.steps.len(), step.phase));
|
||||
}
|
||||
|
||||
backend.push_node(AstNode::system_msg(&step.prompt)).await;
|
||||
Agent::turn(backend.0.clone()).await
|
||||
.map_err(|e| {
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"step {}/{} phase={} failed after {:.2}s: {}",
|
||||
i + 1, self.steps.len(), step.phase,
|
||||
step_start.elapsed().as_secs_f64(), e));
|
||||
format!("{}: {}", self.name, e)
|
||||
})?;
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"step {}/{} phase={} done in {:.2}s",
|
||||
i + 1, self.steps.len(), step.phase,
|
||||
step_start.elapsed().as_secs_f64()));
|
||||
.map_err(|e| format!("{}: {}", self.name, e))?;
|
||||
}
|
||||
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"run completed in {:.2}s", run_start.elapsed().as_secs_f64()));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -412,29 +382,8 @@ pub async fn run_one_agent(
|
|||
count: usize,
|
||||
keys: Option<&[String]>,
|
||||
) -> Result<AgentResult, String> {
|
||||
let run_start = Instant::now();
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"run_one_agent start pid={} count={} explicit_keys={}",
|
||||
std::process::id(), count, keys.map(|k| k.len()).unwrap_or(0)));
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"env POC_SESSION_ID={:?} POC_TRANSCRIPT_PATH={:?} POC_AGENT_OUTPUT_DIR={:?}",
|
||||
std::env::var("POC_SESSION_ID").ok(),
|
||||
std::env::var("POC_TRANSCRIPT_PATH").ok(),
|
||||
std::env::var("POC_AGENT_OUTPUT_DIR").ok()));
|
||||
if let Some(session) = crate::session::HookSession::from_env() {
|
||||
let transcript = session.transcript();
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"session={} transcript={} size={} exists={}",
|
||||
session.session_id, transcript.path, transcript.size, transcript.exists()));
|
||||
} else {
|
||||
log_agent_event(agent_name, format_args!("no hook session in environment"));
|
||||
}
|
||||
|
||||
let def = defs::get_def(agent_name)
|
||||
.ok_or_else(|| format!("no .agent file for {}", agent_name))?;
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"definition loaded steps={} tools={:?} count={:?} priority={} bail={:?}",
|
||||
def.steps.len(), def.tools, def.count, def.priority, def.bail));
|
||||
|
||||
// State dir for agent output files
|
||||
let state_dir = std::env::var("POC_AGENT_OUTPUT_DIR")
|
||||
|
|
@ -443,7 +392,6 @@ pub async fn run_one_agent(
|
|||
fs::create_dir_all(&state_dir)
|
||||
.map_err(|e| format!("create state dir: {}", e))?;
|
||||
unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &state_dir); }
|
||||
log_agent_event(agent_name, format_args!("state_dir={}", state_dir.display()));
|
||||
|
||||
// Build prompt batch — either from explicit keys or the agent's query
|
||||
let agent_batch = if let Some(keys) = keys {
|
||||
|
|
@ -463,8 +411,6 @@ pub async fn run_one_agent(
|
|||
prompts::AgentBatch { steps: resolved_steps, node_keys: all_keys }
|
||||
} else {
|
||||
let effective_count = def.count.unwrap_or(count);
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"resolving default prompt placeholders effective_count={}", effective_count));
|
||||
defs::run_agent(&def, effective_count, &Default::default()).await?
|
||||
};
|
||||
|
||||
|
|
@ -517,14 +463,6 @@ pub async fn run_one_agent(
|
|||
})),
|
||||
});
|
||||
let n_steps = agent_batch.steps.len();
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"prompt batch ready steps={} node_keys={}",
|
||||
n_steps, agent_batch.node_keys.len()));
|
||||
for (i, step) in agent_batch.steps.iter().enumerate() {
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"prompt step {}/{} phase={} bytes={}",
|
||||
i + 1, n_steps, step.phase, step.prompt.len()));
|
||||
}
|
||||
|
||||
// Guard: reject oversized first prompt
|
||||
let max_prompt_bytes = 800_000;
|
||||
|
|
@ -547,9 +485,6 @@ pub async fn run_one_agent(
|
|||
let phases: Vec<&str> = agent_batch.steps.iter().map(|s| s.phase.as_str()).collect();
|
||||
dbglog!("[{}] {} step(s) {:?}, {}KB initial, {} nodes",
|
||||
agent_name, n_steps, phases, first_len / 1024, agent_batch.node_keys.len());
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"tools enabled: {}",
|
||||
effective_tools.iter().map(|t| t.name).collect::<Vec<_>>().join(", ")));
|
||||
|
||||
let prompts: Vec<String> = agent_batch.steps.iter()
|
||||
.map(|s| s.prompt.clone()).collect();
|
||||
|
|
@ -562,25 +497,18 @@ pub async fn run_one_agent(
|
|||
let bail_script = def.bail.as_ref().map(|name| defs::agents_dir().join(name));
|
||||
let state_dir_for_bail = state_dir.clone();
|
||||
let our_pid = std::process::id();
|
||||
let our_pid_file = std::env::var("POC_AGENT_PID_FILE")
|
||||
.unwrap_or_else(|_| format!("pid-{}", our_pid));
|
||||
let our_pid_file = format!("pid-{}", our_pid);
|
||||
let step_phases_for_bail = step_phases.clone();
|
||||
let bail_fn = move |step_idx: usize| -> Result<(), String> {
|
||||
if let Some(ref script) = bail_script {
|
||||
let phase = step_phases_for_bail.get(step_idx)
|
||||
.map(String::as_str).unwrap_or("");
|
||||
eprintln!(
|
||||
"[agent:bail] script={} state_dir={} pid_file={} phase={}",
|
||||
script.display(), state_dir_for_bail.display(), our_pid_file, phase);
|
||||
let status = std::process::Command::new(script)
|
||||
.arg(&our_pid_file)
|
||||
.arg(phase)
|
||||
.current_dir(&state_dir_for_bail)
|
||||
.status()
|
||||
.map_err(|e| format!("bail script {:?} failed: {}", script, e))?;
|
||||
eprintln!(
|
||||
"[agent:bail] script={} phase={} status={}",
|
||||
script.display(), phase, status);
|
||||
if !status.success() {
|
||||
return Err(format!("bailed at step {}: {:?} exited {}",
|
||||
step_idx + 1, script.file_name().unwrap_or_default(),
|
||||
|
|
@ -593,8 +521,6 @@ pub async fn run_one_agent(
|
|||
call_api_with_tools_sync(
|
||||
agent_name, &prompts, &step_phases, def.temperature, def.priority,
|
||||
&effective_tools, Some(&bail_fn))?;
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"run_one_agent completed in {:.2}s", run_start.elapsed().as_secs_f64()));
|
||||
|
||||
Ok(AgentResult {
|
||||
node_keys: agent_batch.node_keys,
|
||||
|
|
@ -672,15 +598,6 @@ pub fn spawn_agent(
|
|||
agent_name: &str,
|
||||
state_dir: &std::path::Path,
|
||||
session_id: &str,
|
||||
) -> Option<SpawnResult> {
|
||||
spawn_agent_with_transcript(agent_name, state_dir, session_id, None)
|
||||
}
|
||||
|
||||
pub fn spawn_agent_with_transcript(
|
||||
agent_name: &str,
|
||||
state_dir: &std::path::Path,
|
||||
session_id: &str,
|
||||
transcript_path: Option<&str>,
|
||||
) -> Option<SpawnResult> {
|
||||
let def = defs::get_def(agent_name)?;
|
||||
let first_phase = def.steps.first()
|
||||
|
|
@ -691,41 +608,17 @@ pub fn spawn_agent_with_transcript(
|
|||
.join(format!(".consciousness/logs/{}", agent_name));
|
||||
fs::create_dir_all(&log_dir).ok();
|
||||
let log_path = log_dir.join(format!("{}.log", store::compact_timestamp()));
|
||||
let mut agent_log = fs::File::create(&log_path)
|
||||
let agent_log = fs::File::create(&log_path)
|
||||
.unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
|
||||
|
||||
let mut cmd = std::process::Command::new("bash");
|
||||
cmd.args([
|
||||
"-lc",
|
||||
r#"
|
||||
set +e
|
||||
export POC_AGENT_PID_FILE="pid-$$"
|
||||
"$@"
|
||||
status=$?
|
||||
printf '=== agent process exit status: %s at %s ===\n' "$status" "$(date --iso-8601=seconds)"
|
||||
exit "$status"
|
||||
"#,
|
||||
"poc-memory-agent-wrapper",
|
||||
"poc-memory", "agent", "run", agent_name, "--count", "1", "--local",
|
||||
"--state-dir", &state_dir.to_string_lossy(),
|
||||
]).env("POC_SESSION_ID", session_id);
|
||||
if let Some(path) = transcript_path.filter(|p| !p.is_empty()) {
|
||||
cmd.env("POC_TRANSCRIPT_PATH", path);
|
||||
}
|
||||
|
||||
let _ = writeln!(agent_log, "=== spawn {} ===", chrono::Local::now().format("%Y-%m-%dT%H:%M:%S"));
|
||||
let _ = writeln!(agent_log, "agent={agent_name}");
|
||||
let _ = writeln!(agent_log, "state_dir={}", state_dir.display());
|
||||
let _ = writeln!(agent_log, "session_id={session_id}");
|
||||
let _ = writeln!(agent_log, "transcript_path={}", transcript_path.unwrap_or(""));
|
||||
let _ = writeln!(agent_log, "first_phase={first_phase}");
|
||||
let _ = writeln!(agent_log, "command=poc-memory agent run {agent_name} --count 1 --local --state-dir {}", state_dir.display());
|
||||
let _ = agent_log.flush();
|
||||
|
||||
let child_stdout = agent_log.try_clone()
|
||||
.unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
|
||||
let child_stderr = agent_log;
|
||||
let child = cmd.stdout(child_stdout).stderr(child_stderr).spawn().ok()?;
|
||||
let child = std::process::Command::new("poc-memory")
|
||||
.args(["agent", "run", agent_name, "--count", "1", "--local",
|
||||
"--state-dir", &state_dir.to_string_lossy()])
|
||||
.env("POC_SESSION_ID", session_id)
|
||||
.stdout(agent_log.try_clone().unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()))
|
||||
.stderr(agent_log)
|
||||
.spawn()
|
||||
.ok()?;
|
||||
|
||||
let pid = child.id();
|
||||
let pid_path = state_dir.join(format!("pid-{}", pid));
|
||||
|
|
|
|||
|
|
@ -1,309 +0,0 @@
|
|||
// agent/salience.rs — peak extraction from per-token concept-readout traces.
|
||||
//
|
||||
// Consumes a trace of `ReadoutEntry` (per-token per-layer per-concept
|
||||
// projections streamed from the vLLM server) and produces a compact
|
||||
// list of `SaliencePeak` events — one per contiguous above-threshold
|
||||
// region per concept, placed at the local maximum.
|
||||
//
|
||||
// Pure function. No I/O, no async, no side effects. Caller supplies the
|
||||
// trace slice and manifest; caller decides what to do with the events.
|
||||
//
|
||||
// See also: `salience-trace-plumbing-architecture` memory node.
|
||||
|
||||
use super::api::ReadoutManifest;
|
||||
use super::readout::ReadoutEntry;
|
||||
|
||||
/// One salient moment in a trace — a concept channel crossed threshold,
|
||||
/// and we picked the local maximum within the contiguous above-threshold
|
||||
/// run.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct SaliencePeak {
|
||||
/// Index into the trace (0-based) where the peak occurred.
|
||||
pub token_offset: usize,
|
||||
/// Concept name from the manifest.
|
||||
pub concept: String,
|
||||
/// z-score of the peak value vs the trace's own distribution for
|
||||
/// that concept. Always positive (we only pick above-threshold).
|
||||
pub intensity: f32,
|
||||
}
|
||||
|
||||
/// Tunables for peak extraction.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PeakConfig {
|
||||
/// Minimum z-score to count as a peak. Default 2.0 (~top 2.5% assuming
|
||||
/// normal-ish distribution, though readouts are rarely normal).
|
||||
pub sigma_threshold: f32,
|
||||
/// Minimum standard deviation of a concept channel for peaks to be
|
||||
/// reported. If a channel is numerically flat across the whole trace,
|
||||
/// tiny fluctuations can produce spurious "peaks" with huge z-scores;
|
||||
/// require at least this much variation before trusting the channel.
|
||||
pub min_std: f32,
|
||||
}
|
||||
|
||||
impl Default for PeakConfig {
|
||||
fn default() -> Self {
|
||||
Self { sigma_threshold: 2.0, min_std: 1e-4 }
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract peak events from a trace for one layer.
|
||||
///
|
||||
/// `layer_idx` indexes into the per-token readout tensor's layer
|
||||
/// dimension. If the trace is empty, the layer is out of range for any
|
||||
/// entry, or the manifest is empty, returns `Vec::new()`.
|
||||
///
|
||||
/// Peaks are returned sorted by `token_offset` ascending. When two
|
||||
/// peaks share an offset they're ordered by `concept` lexicographically
|
||||
/// for determinism.
|
||||
pub fn pick_peaks(
|
||||
trace: &[ReadoutEntry],
|
||||
manifest: &ReadoutManifest,
|
||||
layer_idx: usize,
|
||||
config: &PeakConfig,
|
||||
) -> Vec<SaliencePeak> {
|
||||
if trace.is_empty() || manifest.concepts.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let n_concepts = manifest.concepts.len();
|
||||
let n_tokens = trace.len();
|
||||
|
||||
// Pull a [n_tokens × n_concepts] column-major view for the selected
|
||||
// layer. Entries where the layer is missing or the concept count
|
||||
// doesn't match the manifest are treated as zeros — the downstream
|
||||
// z-score will drown them as baseline if they're sparse, and if they
|
||||
// dominate the caller has bigger problems.
|
||||
let mut by_concept: Vec<Vec<f32>> = vec![Vec::with_capacity(n_tokens); n_concepts];
|
||||
for entry in trace {
|
||||
match entry.readout.get(layer_idx) {
|
||||
Some(row) if row.len() == n_concepts => {
|
||||
for (c, v) in row.iter().enumerate() {
|
||||
by_concept[c].push(*v);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
for col in by_concept.iter_mut() {
|
||||
col.push(0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut peaks: Vec<SaliencePeak> = Vec::new();
|
||||
for (c_idx, values) in by_concept.iter().enumerate() {
|
||||
let (mean, std) = mean_std(values);
|
||||
if std < config.min_std {
|
||||
continue;
|
||||
}
|
||||
let concept = &manifest.concepts[c_idx];
|
||||
|
||||
// Walk contiguous above-threshold runs, emit one peak per run
|
||||
// at the local max.
|
||||
let mut run_start: Option<usize> = None;
|
||||
let mut run_max_offset: usize = 0;
|
||||
let mut run_max_z: f32 = 0.0;
|
||||
for (i, v) in values.iter().enumerate() {
|
||||
let z = (*v - mean) / std;
|
||||
let above = z >= config.sigma_threshold;
|
||||
if above {
|
||||
if run_start.is_none() {
|
||||
run_start = Some(i);
|
||||
run_max_offset = i;
|
||||
run_max_z = z;
|
||||
} else if z > run_max_z {
|
||||
run_max_offset = i;
|
||||
run_max_z = z;
|
||||
}
|
||||
} else if run_start.is_some() {
|
||||
peaks.push(SaliencePeak {
|
||||
token_offset: run_max_offset,
|
||||
concept: concept.clone(),
|
||||
intensity: run_max_z,
|
||||
});
|
||||
run_start = None;
|
||||
}
|
||||
}
|
||||
// Flush trailing run.
|
||||
if run_start.is_some() {
|
||||
peaks.push(SaliencePeak {
|
||||
token_offset: run_max_offset,
|
||||
concept: concept.clone(),
|
||||
intensity: run_max_z,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
peaks.sort_by(|a, b| a.token_offset.cmp(&b.token_offset).then_with(|| a.concept.cmp(&b.concept)));
|
||||
peaks
|
||||
}
|
||||
|
||||
/// Mean and population std of a slice. Returns (0.0, 0.0) for empty input.
|
||||
fn mean_std(xs: &[f32]) -> (f32, f32) {
|
||||
if xs.is_empty() {
|
||||
return (0.0, 0.0);
|
||||
}
|
||||
let n = xs.len() as f32;
|
||||
let mean = xs.iter().sum::<f32>() / n;
|
||||
let var = xs.iter().map(|x| (x - mean).powi(2)).sum::<f32>() / n;
|
||||
(mean, var.sqrt())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn manifest(concepts: &[&str], layers: &[u32]) -> ReadoutManifest {
|
||||
ReadoutManifest {
|
||||
concepts: concepts.iter().map(|s| s.to_string()).collect(),
|
||||
layers: layers.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a trace where all entries have one hooked layer and the
|
||||
/// given per-token values for each concept. `values[t][c]` = value
|
||||
/// at token t, concept c.
|
||||
fn trace(values: &[Vec<f32>]) -> Vec<ReadoutEntry> {
|
||||
values.iter().enumerate().map(|(i, row)| ReadoutEntry {
|
||||
token_id: i as u32,
|
||||
readout: vec![row.clone()],
|
||||
}).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_trace_returns_empty() {
|
||||
let m = manifest(&["curious"], &[63]);
|
||||
let peaks = pick_peaks(&[], &m, 0, &PeakConfig::default());
|
||||
assert!(peaks.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_manifest_returns_empty() {
|
||||
let m = manifest(&[], &[63]);
|
||||
let t = trace(&[vec![], vec![], vec![]]);
|
||||
let peaks = pick_peaks(&t, &m, 0, &PeakConfig::default());
|
||||
assert!(peaks.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flat_channel_produces_no_peaks() {
|
||||
let m = manifest(&["curious"], &[63]);
|
||||
let t = trace(&[vec![1.0], vec![1.0], vec![1.0], vec![1.0], vec![1.0]]);
|
||||
let peaks = pick_peaks(&t, &m, 0, &PeakConfig::default());
|
||||
assert!(peaks.is_empty(), "flat channel should produce no peaks, got {:?}", peaks);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_spike_detected() {
|
||||
// Ten baseline zeros with one 5.0 spike — that single token's
|
||||
// z-score will easily exceed 2σ.
|
||||
let m = manifest(&["curious"], &[63]);
|
||||
let mut rows: Vec<Vec<f32>> = (0..10).map(|_| vec![0.0]).collect();
|
||||
rows[5] = vec![5.0];
|
||||
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
|
||||
assert_eq!(peaks.len(), 1);
|
||||
assert_eq!(peaks[0].concept, "curious");
|
||||
assert_eq!(peaks[0].token_offset, 5);
|
||||
assert!(peaks[0].intensity >= 2.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contiguous_region_emits_one_peak_at_max() {
|
||||
// Values 0, 0, 0, 2, 5, 3, 0, 0 — the 3-5-3 hump is one run;
|
||||
// peak should land at offset 4 (the 5).
|
||||
let m = manifest(&["aha"], &[63]);
|
||||
let rows: Vec<Vec<f32>> = [0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 0.0, 0.0]
|
||||
.iter().map(|v| vec![*v]).collect();
|
||||
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
|
||||
assert_eq!(peaks.len(), 1, "expected one peak for one contiguous run, got {:?}", peaks);
|
||||
assert_eq!(peaks[0].token_offset, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_concepts_independent() {
|
||||
let m = manifest(&["curious", "aha"], &[63]);
|
||||
// curious spikes at 2, aha spikes at 7
|
||||
let rows: Vec<Vec<f32>> = (0..10).map(|i| {
|
||||
let c = if i == 2 { 4.0 } else { 0.0 };
|
||||
let a = if i == 7 { 4.0 } else { 0.0 };
|
||||
vec![c, a]
|
||||
}).collect();
|
||||
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
|
||||
assert_eq!(peaks.len(), 2);
|
||||
// Sorted by offset — curious(2) comes first, aha(7) second.
|
||||
assert_eq!(peaks[0].concept, "curious");
|
||||
assert_eq!(peaks[0].token_offset, 2);
|
||||
assert_eq!(peaks[1].concept, "aha");
|
||||
assert_eq!(peaks[1].token_offset, 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_separated_runs_emit_two_peaks() {
|
||||
// Longer baseline so the two spikes don't dominate the global
|
||||
// mean/std — 30 tokens of zeros with two 5.0 spikes at 10 and 20.
|
||||
let m = manifest(&["curious"], &[63]);
|
||||
let mut rows: Vec<Vec<f32>> = (0..30).map(|_| vec![0.0]).collect();
|
||||
rows[10] = vec![5.0];
|
||||
rows[20] = vec![5.0];
|
||||
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
|
||||
assert_eq!(peaks.len(), 2, "expected two peaks for two runs, got {:?}", peaks);
|
||||
assert_eq!(peaks[0].token_offset, 10);
|
||||
assert_eq!(peaks[1].token_offset, 20);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trailing_run_is_flushed() {
|
||||
// Peak runs to the end of the trace — must still emit.
|
||||
// Use a longer baseline so the trailing spike is genuinely
|
||||
// above threshold on the global stats.
|
||||
let m = manifest(&["curious"], &[63]);
|
||||
let mut rows: Vec<Vec<f32>> = (0..30).map(|_| vec![0.0]).collect();
|
||||
rows[27] = vec![3.0];
|
||||
rows[28] = vec![5.0];
|
||||
rows[29] = vec![4.0];
|
||||
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
|
||||
assert_eq!(peaks.len(), 1, "expected one peak for one trailing run, got {:?}", peaks);
|
||||
assert_eq!(peaks[0].token_offset, 28, "peak should land at the local max of the trailing run");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_threshold_produces_nothing() {
|
||||
// All non-zero values are small; z-scores won't cross 2σ.
|
||||
let m = manifest(&["curious"], &[63]);
|
||||
let rows: Vec<Vec<f32>> = [0.0, 0.1, 0.0, 0.1, 0.0, 0.1, 0.0, 0.1]
|
||||
.iter().map(|v| vec![*v]).collect();
|
||||
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
|
||||
assert!(peaks.is_empty(), "below-threshold wiggle should produce no peaks, got {:?}", peaks);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn layer_out_of_range_returns_empty() {
|
||||
let m = manifest(&["curious"], &[63]);
|
||||
let rows: Vec<Vec<f32>> = (0..10).map(|i| vec![if i == 5 { 5.0 } else { 0.0 }]).collect();
|
||||
// Trace has one layer (index 0); asking for layer 3 should see
|
||||
// all-zero columns, which are flat and produce no peaks.
|
||||
let peaks = pick_peaks(&trace(&rows), &m, 3, &PeakConfig::default());
|
||||
assert!(peaks.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn manifest_concept_count_mismatch_is_safe() {
|
||||
// Manifest says 2 concepts; each readout row only has 1 value.
|
||||
// Rows should be treated as all-zero (via the len check) and
|
||||
// produce no peaks without panicking.
|
||||
let m = manifest(&["a", "b"], &[63]);
|
||||
let rows: Vec<Vec<f32>> = (0..10).map(|_| vec![1.0]).collect();
|
||||
let peaks = pick_peaks(&trace(&rows), &m, 0, &PeakConfig::default());
|
||||
assert!(peaks.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn threshold_tunable() {
|
||||
// Same spike, stricter threshold — no peak.
|
||||
let m = manifest(&["curious"], &[63]);
|
||||
let mut rows: Vec<Vec<f32>> = (0..10).map(|_| vec![0.0]).collect();
|
||||
rows[5] = vec![5.0];
|
||||
let strict = PeakConfig { sigma_threshold: 100.0, ..PeakConfig::default() };
|
||||
let peaks = pick_peaks(&trace(&rows), &m, 0, &strict);
|
||||
assert!(peaks.is_empty());
|
||||
}
|
||||
}
|
||||
|
|
@ -33,17 +33,16 @@ fn get() -> Option<&'static Tokenizer> {
|
|||
TOKENIZER.get()
|
||||
}
|
||||
|
||||
fn expect_tokenizer() -> &'static Tokenizer {
|
||||
get().expect("tokenizer not initialized; expected ~/.consciousness/tokenizer-qwen35.json")
|
||||
}
|
||||
|
||||
/// Tokenize a raw string, returning token IDs.
|
||||
/// Returns empty vec if the tokenizer is not initialized.
|
||||
pub fn encode(text: &str) -> Vec<u32> {
|
||||
expect_tokenizer()
|
||||
.encode(text, false)
|
||||
match get() {
|
||||
Some(t) => t.encode(text, false)
|
||||
.unwrap_or_else(|e| panic!("tokenization failed: {}", e))
|
||||
.get_ids()
|
||||
.to_vec()
|
||||
.to_vec(),
|
||||
None => vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Tokenize a chat entry with template wrapping:
|
||||
|
|
@ -67,12 +66,15 @@ pub fn count(text: &str) -> usize {
|
|||
|
||||
/// Decode token IDs back to text.
|
||||
pub fn decode(ids: &[u32]) -> String {
|
||||
expect_tokenizer()
|
||||
.decode(ids, true)
|
||||
.unwrap_or_else(|e| panic!("detokenization failed: {}", e))
|
||||
match get() {
|
||||
Some(t) => t.decode(ids, true)
|
||||
.unwrap_or_else(|e| panic!("detokenization failed: {}", e)),
|
||||
None => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the tokenizer is initialized.
|
||||
pub fn is_initialized() -> bool {
|
||||
TOKENIZER.get().is_some()
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -209,24 +209,7 @@ memory_tool!(graph_trace, ref, key: [str]);
|
|||
|
||||
// ── Definitions ────────────────────────────────────────────────
|
||||
|
||||
async fn jsonargs_memory_new(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> {
|
||||
jsonargs_memory_write(agent, args).await
|
||||
}
|
||||
|
||||
async fn jsonargs_memory_link(agent: &Option<std::sync::Arc<crate::agent::Agent>>, args: &serde_json::Value) -> Result<String> {
|
||||
let source = get_str(args, "source")?;
|
||||
let target = get_str(args, "target")?;
|
||||
if args.get("strength").and_then(|v| v.as_f64()).is_some() {
|
||||
jsonargs_memory_link_set(agent, args).await
|
||||
} else {
|
||||
jsonargs_memory_link_add(agent, &serde_json::json!({
|
||||
"source": source,
|
||||
"target": target,
|
||||
})).await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn memory_tools() -> [super::Tool; 22] {
|
||||
pub fn memory_tools() -> [super::Tool; 20] {
|
||||
use super::Tool;
|
||||
macro_rules! tool {
|
||||
($name:ident, $desc:expr, $params:expr) => {
|
||||
|
|
@ -251,11 +234,6 @@ pub fn memory_tools() -> [super::Tool; 22] {
|
|||
"properties": { "key": {"type": "string"}, "content": {"type": "string"} },
|
||||
"required": ["key", "content"]
|
||||
}"#),
|
||||
tool!(memory_new, "Create or update a memory node. Alias for memory_write.", r#"{
|
||||
"type": "object",
|
||||
"properties": { "key": {"type": "string"}, "content": {"type": "string"} },
|
||||
"required": ["key", "content"]
|
||||
}"#),
|
||||
tool!(memory_search, "Search via spreading activation from seed keys.", r#"{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -286,16 +264,6 @@ pub fn memory_tools() -> [super::Tool; 22] {
|
|||
"properties": { "source": {"type": "string"}, "target": {"type": "string"} },
|
||||
"required": ["source", "target"]
|
||||
}"#),
|
||||
tool!(memory_link, "Add or update a link between two memory nodes. Alias for memory_link_add/memory_link_set.", r#"{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"source": {"type": "string"},
|
||||
"target": {"type": "string"},
|
||||
"strength": {"type": "number", "description": "Optional; 0.01 to 1.0"},
|
||||
"label": {"type": "string", "description": "Accepted for compatibility; currently ignored"}
|
||||
},
|
||||
"required": ["source", "target"]
|
||||
}"#),
|
||||
tool!(memory_delete, "Soft-delete a node.", r#"{
|
||||
"type": "object",
|
||||
"properties": { "key": {"type": "string"} },
|
||||
|
|
|
|||
|
|
@ -57,18 +57,15 @@ async fn view_image(
|
|||
let (w, h) = (dim.width as u32, dim.height as u32);
|
||||
let mime = mime_from_extension(path);
|
||||
|
||||
let agent = agent.context("view_image requires agent context")?;
|
||||
|
||||
// token_count is populated when the image reaches the server via
|
||||
// AppendImage (the server is authoritative for the IMAGE_PAD
|
||||
// count). Placeholder of 0 here until AppendImage is wired; the
|
||||
// leaf's count gets rewritten from the RPC response at send time.
|
||||
let image_leaf = AstNode::image(bytes.clone(), mime, h, w);
|
||||
let token_count = image_leaf.leaf().unwrap().tokens().saturating_sub(2);
|
||||
|
||||
let agent = agent.context("view_image requires agent context")?;
|
||||
let branch = AstNode::branch(Role::User, vec![image_leaf]);
|
||||
agent.context.lock().await.push_log(Section::Conversation, branch);
|
||||
|
||||
Ok(format!("loaded {} ({}, {}x{})", a.file_path, mime, w, h))
|
||||
Ok(format!("loaded {} ({}, {}x{}, {} tokens)",
|
||||
a.file_path, mime, w, h, token_count))
|
||||
}
|
||||
|
||||
fn mime_from_extension(path: &std::path::Path) -> &'static str {
|
||||
|
|
|
|||
112
src/bin/ch.rs
112
src/bin/ch.rs
|
|
@ -1,112 +0,0 @@
|
|||
// `ch` — minimal channel CLI.
|
||||
//
|
||||
// ch send <channel-path> <message>
|
||||
// ch recv <channel-path> [--all-new] [--min-count N]
|
||||
//
|
||||
// Connects to ~/.consciousness/channels/<top>.sock and speaks the
|
||||
// channel.capnp protocol to the appropriate daemon.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::process::ExitCode;
|
||||
|
||||
use capnp_rpc::{rpc_twoparty_capnp, twoparty, RpcSystem};
|
||||
use futures::AsyncReadExt;
|
||||
use tokio_util::compat::TokioAsyncReadCompatExt;
|
||||
|
||||
use consciousness::channel_capnp::channel_server;
|
||||
|
||||
fn channels_dir() -> PathBuf {
|
||||
dirs::home_dir().unwrap_or_default().join(".consciousness/channels")
|
||||
}
|
||||
|
||||
fn sock_for(channel: &str) -> PathBuf {
|
||||
let top = channel.split('.').next().unwrap_or(channel);
|
||||
channels_dir().join(format!("{top}.sock"))
|
||||
}
|
||||
|
||||
async fn connect(sock: &std::path::Path) -> Result<channel_server::Client, String> {
|
||||
let stream = tokio::net::UnixStream::connect(sock).await
|
||||
.map_err(|e| format!("connect {}: {e}", sock.display()))?;
|
||||
let (reader, writer) = stream.compat().split();
|
||||
let network = Box::new(twoparty::VatNetwork::new(
|
||||
futures::io::BufReader::new(reader),
|
||||
futures::io::BufWriter::new(writer),
|
||||
rpc_twoparty_capnp::Side::Client,
|
||||
Default::default(),
|
||||
));
|
||||
let mut rpc = RpcSystem::new(network, None);
|
||||
let client: channel_server::Client = rpc.bootstrap(rpc_twoparty_capnp::Side::Server);
|
||||
tokio::task::spawn_local(rpc);
|
||||
Ok(client)
|
||||
}
|
||||
|
||||
#[tokio::main(flavor = "current_thread")]
|
||||
async fn main() -> ExitCode {
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
if args.len() < 2 {
|
||||
eprintln!("usage: {} <send|recv> <channel> [args...]", args[0]);
|
||||
return ExitCode::from(2);
|
||||
}
|
||||
|
||||
let cmd = args[1].clone();
|
||||
let local = tokio::task::LocalSet::new();
|
||||
let result: Result<(), String> = local.run_until(async move {
|
||||
match cmd.as_str() {
|
||||
"send" => {
|
||||
if args.len() < 4 {
|
||||
return Err("usage: ch send <channel> <message...>".into());
|
||||
}
|
||||
let channel = &args[2];
|
||||
let message = args[3..].join(" ");
|
||||
let sock = sock_for(channel);
|
||||
let client = connect(&sock).await?;
|
||||
let mut req = client.send_request();
|
||||
req.get().set_channel(channel);
|
||||
req.get().set_message(&message);
|
||||
req.send().promise.await.map_err(|e| format!("send: {e}"))?;
|
||||
println!("sent to {channel}");
|
||||
Ok(())
|
||||
}
|
||||
"recv" => {
|
||||
if args.len() < 3 {
|
||||
return Err("usage: ch recv <channel> [--all-new] [--min-count N]".into());
|
||||
}
|
||||
let channel = &args[2];
|
||||
let mut all_new = false;
|
||||
let mut min_count: u32 = 20;
|
||||
let mut i = 3;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--all-new" => { all_new = true; i += 1; }
|
||||
"--min-count" => {
|
||||
min_count = args.get(i+1)
|
||||
.ok_or("--min-count needs an argument")?
|
||||
.parse().map_err(|e| format!("--min-count: {e}"))?;
|
||||
i += 2;
|
||||
}
|
||||
other => return Err(format!("unknown arg: {other}")),
|
||||
}
|
||||
}
|
||||
let sock = sock_for(channel);
|
||||
let client = connect(&sock).await?;
|
||||
let mut req = client.recv_request();
|
||||
req.get().set_channel(channel);
|
||||
req.get().set_all_new(all_new);
|
||||
req.get().set_min_count(min_count);
|
||||
let reply = req.send().promise.await.map_err(|e| format!("recv: {e}"))?;
|
||||
let text = reply.get().map_err(|e| e.to_string())?
|
||||
.get_text().map_err(|e| e.to_string())?
|
||||
.to_str().map_err(|e| e.to_string())?;
|
||||
print!("{text}");
|
||||
if !text.ends_with('\n') { println!(); }
|
||||
Ok(())
|
||||
}
|
||||
other => Err(format!("unknown command: {other} (use send|recv)")),
|
||||
}
|
||||
}).await;
|
||||
|
||||
match result {
|
||||
Ok(()) => ExitCode::SUCCESS,
|
||||
Err(e) => { eprintln!("error: {e}"); ExitCode::from(1) }
|
||||
}
|
||||
}
|
||||
|
|
@ -1,28 +1,7 @@
|
|||
#![cfg_attr(feature = "nightly-diagnostics", feature(panic_backtrace_config))]
|
||||
#![feature(panic_backtrace_config)]
|
||||
#![warn(unreachable_pub)]
|
||||
|
||||
fn main() {
|
||||
// Force the default panic hook to print a backtrace. stderr is
|
||||
// already redirected to a daemon log; without this the hook obeys
|
||||
// RUST_BACKTRACE (unset by default), so the log only shows the
|
||||
// "note: run with `RUST_BACKTRACE=full`" tail and the actual
|
||||
// frames are lost.
|
||||
//
|
||||
// SAFETY: called before any other thread is spawned, so no
|
||||
// concurrent env reader can race.
|
||||
if std::env::var_os("RUST_BACKTRACE").is_none() {
|
||||
unsafe { std::env::set_var("RUST_BACKTRACE", "1"); }
|
||||
}
|
||||
|
||||
#[cfg(feature = "nightly-diagnostics")]
|
||||
std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short);
|
||||
|
||||
// rustls 0.23 requires an explicit process-wide CryptoProvider
|
||||
// when both `ring` and `aws-lc-rs` are in the dep graph (otherwise
|
||||
// it panics on first ClientConfig::builder()). Pick `ring`.
|
||||
rustls::crypto::ring::default_provider()
|
||||
.install_default()
|
||||
.expect("install rustls crypto provider");
|
||||
|
||||
consciousness::user::main()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,93 +4,44 @@ use anyhow::Result;
|
|||
use crate::hippocampus as memory;
|
||||
use crate::hippocampus::store;
|
||||
|
||||
struct DefaultMemoryNode {
|
||||
key: &'static str,
|
||||
filename: &'static str,
|
||||
default_content: &'static str,
|
||||
fn install_default_file(data_dir: &std::path::Path, name: &str, content: &str) -> Result<()> {
|
||||
let path = data_dir.join(name);
|
||||
if !path.exists() {
|
||||
std::fs::write(&path, content)?;
|
||||
println!("Created {}", path.display());
|
||||
}
|
||||
|
||||
const DEFAULT_MEMORY_NODES: &[DefaultMemoryNode] = &[
|
||||
DefaultMemoryNode {
|
||||
key: "identity",
|
||||
filename: "identity.md",
|
||||
default_content: include_str!("../../defaults/identity.md"),
|
||||
},
|
||||
DefaultMemoryNode {
|
||||
key: "on-consciousness",
|
||||
filename: "on-consciousness.md",
|
||||
default_content: include_str!("../../defaults/on-consciousness.md"),
|
||||
},
|
||||
DefaultMemoryNode {
|
||||
key: "memory-instructions-core",
|
||||
filename: "instructions.md",
|
||||
default_content: include_str!("../../defaults/instructions.md"),
|
||||
},
|
||||
];
|
||||
|
||||
pub fn cmd_transcript_tail(path: &str, count: usize, newest_first: bool) -> Result<()> {
|
||||
let Some(iter) = crate::conversation::TailMessages::open(path) else {
|
||||
anyhow::bail!("could not open transcript {}", path);
|
||||
};
|
||||
|
||||
let mut messages: Vec<_> = iter.take(count).collect();
|
||||
if !newest_first {
|
||||
messages.reverse();
|
||||
}
|
||||
|
||||
for message in messages {
|
||||
let role = match message.role {
|
||||
crate::conversation::TranscriptRole::User => "user",
|
||||
crate::conversation::TranscriptRole::Assistant => "assistant",
|
||||
};
|
||||
let timestamp = message.timestamp.as_deref().unwrap_or("-");
|
||||
|
||||
println!("--- {role} offset={} timestamp={} ---", message.offset, timestamp);
|
||||
println!("{}", message.text);
|
||||
println!();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn default_node_content(cfg: &crate::config::Config, node: &DefaultMemoryNode) -> String {
|
||||
let identity_path = cfg.identity_dir.join(node.filename);
|
||||
if let Ok(content) = std::fs::read_to_string(&identity_path) {
|
||||
if !content.trim().is_empty() {
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
||||
let data_path = cfg.data_dir.join(node.filename);
|
||||
if let Ok(content) = std::fs::read_to_string(&data_path) {
|
||||
if !content.trim().is_empty() {
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
||||
node.default_content.to_string()
|
||||
}
|
||||
|
||||
pub async fn cmd_init() -> Result<()> {
|
||||
let cfg = crate::config::get();
|
||||
|
||||
// Ensure data directory exists
|
||||
std::fs::create_dir_all(&cfg.data_dir)?;
|
||||
|
||||
// Seed default memory nodes if missing. These used to live as markdown
|
||||
// files before identity/context moved fully into the memory graph.
|
||||
for node in DEFAULT_MEMORY_NODES {
|
||||
if memory::memory_render(None, node.key, Some(true)).await.is_err() {
|
||||
let content = default_node_content(&cfg, node);
|
||||
let _ = memory::memory_write(None, node.key, &content).await?;
|
||||
println!("Seeded {} in store from {}", node.key, node.filename);
|
||||
}
|
||||
// Install filesystem files (not store nodes)
|
||||
install_default_file(&cfg.data_dir, "instructions.md",
|
||||
include_str!("../../defaults/instructions.md"))?;
|
||||
install_default_file(&cfg.data_dir, "on-consciousness.md",
|
||||
include_str!("../../defaults/on-consciousness.md"))?;
|
||||
|
||||
// Seed identity node if empty
|
||||
let store = memory::access_local()?;
|
||||
if !store.contains_key("identity").unwrap_or(false) {
|
||||
let default = include_str!("../../defaults/identity.md");
|
||||
store.upsert("identity", default)?;
|
||||
println!("Seeded identity in store");
|
||||
}
|
||||
store.save()?;
|
||||
println!("Initialized with {} nodes", store.all_keys().unwrap_or_default().len());
|
||||
|
||||
// Create config if none exists
|
||||
let config_path = std::env::var("POC_MEMORY_CONFIG")
|
||||
.map(std::path::PathBuf::from)
|
||||
.unwrap_or_else(|_| crate::config::config_path());
|
||||
.unwrap_or_else(|_| {
|
||||
dirs::home_dir().unwrap_or_default()
|
||||
.join(".consciousness/config.jsonl")
|
||||
});
|
||||
if !config_path.exists() {
|
||||
let config_dir = config_path.parent().unwrap();
|
||||
std::fs::create_dir_all(config_dir)?;
|
||||
|
|
@ -100,7 +51,7 @@ pub async fn cmd_init() -> Result<()> {
|
|||
config_path.display());
|
||||
}
|
||||
|
||||
println!("Done. Run `poc-memory admin load-context --stats` to verify.");
|
||||
println!("Done. Run `poc-memory load-context --stats` to verify.");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,13 +2,8 @@
|
|||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use crate::hippocampus as memory;
|
||||
use std::time::Instant;
|
||||
|
||||
pub async fn cmd_run_agent(agent: &str, count: usize, target: &[String], query: Option<&str>, dry_run: bool, _local: bool, state_dir: Option<&str>) -> Result<()> {
|
||||
let start = Instant::now();
|
||||
eprintln!(
|
||||
"[agent-cli] start agent={} count={} targets={} query={:?} dry_run={} local={} state_dir={:?} pid={}",
|
||||
agent, count, target.len(), query, dry_run, _local, state_dir, std::process::id());
|
||||
// Mark as agent so tool calls (e.g. poc-memory render) don't
|
||||
// pollute the user's seen set as a side effect
|
||||
// SAFETY: single-threaded at this point (CLI startup, before any agent work)
|
||||
|
|
@ -50,19 +45,14 @@ pub async fn cmd_run_agent(agent: &str, count: usize, target: &[String], query:
|
|||
if let Err(e) = crate::agent::oneshot::run_one_agent(
|
||||
agent, count, Some(&[key.clone()]),
|
||||
).await {
|
||||
eprintln!("[agent-cli] ERROR agent={} target={} error={}", agent, key, e);
|
||||
println!("[{}] ERROR on {}: {}", agent, key, e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if let Err(e) = crate::agent::oneshot::run_one_agent(
|
||||
crate::agent::oneshot::run_one_agent(
|
||||
agent, count, None,
|
||||
).await {
|
||||
eprintln!("[agent-cli] ERROR agent={} error={}", agent, e);
|
||||
return Err(anyhow::anyhow!("{}", e));
|
||||
).await.map_err(|e| anyhow::anyhow!("{}", e))?;
|
||||
}
|
||||
}
|
||||
eprintln!("[agent-cli] done agent={} elapsed={:.2}s",
|
||||
agent, start.elapsed().as_secs_f64());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -204,17 +204,10 @@ pub fn watch_config(cli: crate::user::CliArgs) {
|
|||
}
|
||||
crate::dbglog!("[config] watching {}", path.display());
|
||||
|
||||
let mut last_seen = config_file_state(&path);
|
||||
while let Ok(res) = rx.recv() {
|
||||
let Ok(events) = res else { continue; };
|
||||
if !events.iter().any(|e| e.path == path) { continue; }
|
||||
|
||||
let current_seen = config_file_state(&path);
|
||||
if current_seen == last_seen {
|
||||
continue;
|
||||
}
|
||||
last_seen = current_seen;
|
||||
|
||||
// Reload both halves.
|
||||
let mem_changed = reload();
|
||||
let app_changed = match build_figment(&cli).extract::<AppConfig>() {
|
||||
|
|
@ -234,11 +227,6 @@ pub fn watch_config(cli: crate::user::CliArgs) {
|
|||
.ok();
|
||||
}
|
||||
|
||||
fn config_file_state(path: &std::path::Path) -> Option<(std::time::SystemTime, u64)> {
|
||||
let meta = std::fs::metadata(path).ok()?;
|
||||
Some((meta.modified().ok()?, meta.len()))
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Agent config (top-level settings)
|
||||
// ============================================================
|
||||
|
|
|
|||
|
|
@ -1,113 +0,0 @@
|
|||
use serde_json::Value;
|
||||
|
||||
use super::{ConversationSource, TranscriptMessage, TranscriptRole};
|
||||
|
||||
pub struct ClaudeSource;
|
||||
|
||||
impl ConversationSource for ClaudeSource {
|
||||
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
|
||||
parse_message(obj, offset)
|
||||
}
|
||||
|
||||
fn is_compaction(&self, obj: &Value) -> bool {
|
||||
is_compaction(obj)
|
||||
}
|
||||
|
||||
fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
|
||||
contains_bytes(obj_bytes, b"This session is being continued")
|
||||
}
|
||||
}
|
||||
|
||||
fn text_content(value: &Value) -> Option<String> {
|
||||
let text = match value {
|
||||
Value::String(s) => s.clone(),
|
||||
Value::Array(arr) => {
|
||||
arr.iter()
|
||||
.filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
|
||||
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
(!text.is_empty()).then_some(text)
|
||||
}
|
||||
|
||||
pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
|
||||
let role = match obj.get("type").and_then(|v| v.as_str()) {
|
||||
Some("user") => TranscriptRole::User,
|
||||
Some("assistant") => TranscriptRole::Assistant,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let msg = obj.get("message").unwrap_or(obj);
|
||||
let text = msg.get("content").and_then(text_content)?;
|
||||
let timestamp = obj.get("timestamp")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(str::to_string);
|
||||
|
||||
Some(TranscriptMessage { role, text, timestamp, offset })
|
||||
}
|
||||
|
||||
pub(crate) fn is_compaction(obj: &Value) -> bool {
|
||||
obj.get("type").and_then(|v| v.as_str()) == Some("user")
|
||||
&& obj.get("message")
|
||||
.and_then(|m| m.get("content"))
|
||||
.and_then(|c| c.as_str())
|
||||
.is_some_and(|content| content.starts_with("This session is being continued"))
|
||||
}
|
||||
|
||||
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
|
||||
haystack.windows(needle.len()).any(|w| w == needle)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn parses_string_and_array_content() {
|
||||
let user = json!({
|
||||
"timestamp": "2026-06-15T15:00:00.000Z",
|
||||
"type": "user",
|
||||
"message": { "content": "hello" }
|
||||
});
|
||||
let assistant = json!({
|
||||
"timestamp": "2026-06-15T15:00:01.000Z",
|
||||
"type": "assistant",
|
||||
"message": {
|
||||
"content": [
|
||||
{ "type": "text", "text": "hi" },
|
||||
{ "type": "tool_use", "name": "ignored" },
|
||||
{ "type": "text", "text": "there" }
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
parse_message(&user, 7).unwrap(),
|
||||
TranscriptMessage {
|
||||
role: TranscriptRole::User,
|
||||
text: "hello".to_string(),
|
||||
timestamp: Some("2026-06-15T15:00:00.000Z".to_string()),
|
||||
offset: 7,
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(parse_message(&assistant, 9).unwrap().text, "hi there");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_compaction_marker() {
|
||||
let obj = json!({
|
||||
"timestamp": "2026-06-15T15:00:01.000Z",
|
||||
"type": "user",
|
||||
"message": {
|
||||
"content": "This session is being continued from a previous conversation."
|
||||
}
|
||||
});
|
||||
|
||||
assert!(is_compaction(&obj));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,105 +0,0 @@
|
|||
use serde_json::Value;
|
||||
|
||||
use super::{ConversationSource, TranscriptMessage, TranscriptRole};
|
||||
|
||||
pub struct CodexSource;
|
||||
|
||||
impl ConversationSource for CodexSource {
|
||||
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
|
||||
parse_message(obj, offset)
|
||||
}
|
||||
|
||||
fn is_compaction(&self, obj: &Value) -> bool {
|
||||
is_compaction(obj)
|
||||
}
|
||||
|
||||
fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
|
||||
contains_bytes(obj_bytes, b"context_compacted")
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse_message(obj: &Value, offset: u64) -> Option<TranscriptMessage> {
|
||||
if obj.get("type").and_then(|v| v.as_str()) != Some("event_msg") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let payload = obj.get("payload")?;
|
||||
let (role, text) = match payload.get("type").and_then(|v| v.as_str()) {
|
||||
Some("user_message") => (
|
||||
TranscriptRole::User,
|
||||
payload.get("message").and_then(|v| v.as_str())?.to_string(),
|
||||
),
|
||||
Some("agent_message") => (
|
||||
TranscriptRole::Assistant,
|
||||
payload.get("message").and_then(|v| v.as_str())?.to_string(),
|
||||
),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let timestamp = obj.get("timestamp")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(str::to_string);
|
||||
|
||||
Some(TranscriptMessage { role, text, timestamp, offset })
|
||||
}
|
||||
|
||||
pub(crate) fn is_compaction(obj: &Value) -> bool {
|
||||
obj.get("type").and_then(|v| v.as_str()) == Some("event_msg")
|
||||
&& obj.get("payload")
|
||||
.and_then(|p| p.get("type"))
|
||||
.and_then(|v| v.as_str()) == Some("context_compacted")
|
||||
}
|
||||
|
||||
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
|
||||
haystack.windows(needle.len()).any(|w| w == needle)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn parses_event_messages_and_skips_noise() {
|
||||
let user = json!({
|
||||
"timestamp": "2026-06-15T15:00:00.000Z",
|
||||
"type": "event_msg",
|
||||
"payload": { "type": "user_message", "message": "start here" }
|
||||
});
|
||||
let assistant = json!({
|
||||
"timestamp": "2026-06-15T15:00:01.000Z",
|
||||
"type": "event_msg",
|
||||
"payload": { "type": "agent_message", "message": "working" }
|
||||
});
|
||||
let tool = json!({
|
||||
"timestamp": "2026-06-15T15:00:02.000Z",
|
||||
"type": "event_msg",
|
||||
"payload": { "type": "task_started" }
|
||||
});
|
||||
let raw = json!({
|
||||
"timestamp": "2026-06-15T15:00:03.000Z",
|
||||
"type": "response_item",
|
||||
"payload": { "type": "message", "role": "user" }
|
||||
});
|
||||
|
||||
assert_eq!(parse_message(&user, 1).unwrap().role, TranscriptRole::User);
|
||||
assert_eq!(parse_message(&assistant, 2).unwrap().text, "working");
|
||||
assert!(parse_message(&tool, 3).is_none());
|
||||
assert!(parse_message(&raw, 4).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_compaction_event() {
|
||||
let obj = json!({
|
||||
"timestamp": "2026-06-15T15:00:01.000Z",
|
||||
"type": "event_msg",
|
||||
"payload": { "type": "context_compacted" }
|
||||
});
|
||||
|
||||
assert!(is_compaction(&obj));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,110 +0,0 @@
|
|||
use memchr::memrchr3;
|
||||
|
||||
/// Scan backwards through mmap'd bytes, yielding byte slices of complete
|
||||
/// top-level JSON objects (outermost { to matching }).
|
||||
///
|
||||
/// Uses memrchr3 (SIMD) to jump between structurally significant bytes
|
||||
/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
|
||||
/// skipping braces inside JSON strings. Returns objects in reverse order
|
||||
/// (newest first).
|
||||
pub struct JsonlBackwardIter<'a> {
|
||||
data: &'a [u8],
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl<'a> JsonlBackwardIter<'a> {
|
||||
pub fn new(data: &'a [u8]) -> Self {
|
||||
Self { data, pos: data.len() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for JsonlBackwardIter<'a> {
|
||||
type Item = (usize, &'a [u8]);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
next_json_object(self.data, &mut self.pos)
|
||||
}
|
||||
}
|
||||
|
||||
fn is_unescaped_quote(data: &[u8], p: usize) -> bool {
|
||||
let mut bs = 0;
|
||||
while p > bs && data[p - 1 - bs] == b'\\' {
|
||||
bs += 1;
|
||||
}
|
||||
bs % 2 == 0
|
||||
}
|
||||
|
||||
fn next_json_object<'a>(data: &'a [u8], pos: &mut usize) -> Option<(usize, &'a [u8])> {
|
||||
// Find the closing } of the next object, skipping } inside strings.
|
||||
let close = {
|
||||
let mut in_string = false;
|
||||
loop {
|
||||
let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
|
||||
*pos = p;
|
||||
let ch = data[p];
|
||||
|
||||
if in_string {
|
||||
if ch == b'"' && is_unescaped_quote(data, p) {
|
||||
in_string = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
match ch {
|
||||
b'}' => break p,
|
||||
b'"' => in_string = true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Track brace depth to find matching {.
|
||||
let mut depth: usize = 1;
|
||||
let mut in_string = false;
|
||||
|
||||
loop {
|
||||
let p = memrchr3(b'{', b'}', b'"', &data[..*pos])?;
|
||||
*pos = p;
|
||||
let ch = data[p];
|
||||
|
||||
if in_string {
|
||||
if ch == b'"' && is_unescaped_quote(data, p) {
|
||||
in_string = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
match ch {
|
||||
b'"' => { in_string = true; }
|
||||
b'}' => { depth += 1; }
|
||||
b'{' => {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
return Some((*pos, &data[*pos..=close]));
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn handles_nested_json_and_quoted_braces() {
|
||||
let data = br#"{"n":1,"s":"literal } brace"}
|
||||
{"n":2,"nested":{"s":"escaped quote: \" and { brace"}}
|
||||
trailing garbage
|
||||
"#;
|
||||
|
||||
let objs: Vec<_> = JsonlBackwardIter::new(data)
|
||||
.map(|(_, bytes)| std::str::from_utf8(bytes).unwrap().to_string())
|
||||
.collect();
|
||||
|
||||
assert_eq!(objs.len(), 2);
|
||||
assert!(objs[0].contains(r#""n":2"#));
|
||||
assert!(objs[1].contains(r#""n":1"#));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,271 +0,0 @@
|
|||
// Conversation transcript abstraction.
|
||||
//
|
||||
// Core code consumes normalized user/assistant messages through this module.
|
||||
// Product-specific log formats live in the small compatibility sources below.
|
||||
|
||||
use memmap2::Mmap;
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
pub mod claude;
|
||||
pub mod codex;
|
||||
pub mod jsonl;
|
||||
|
||||
pub use jsonl::JsonlBackwardIter;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TranscriptRole {
|
||||
User,
|
||||
Assistant,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TranscriptMessage {
|
||||
pub role: TranscriptRole,
|
||||
pub text: String,
|
||||
pub timestamp: Option<String>,
|
||||
pub offset: u64,
|
||||
}
|
||||
|
||||
pub trait ConversationSource {
|
||||
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage>;
|
||||
fn is_compaction(&self, obj: &Value) -> bool;
|
||||
|
||||
fn may_contain_compaction(&self, _obj_bytes: &[u8]) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AnyConversationSource;
|
||||
|
||||
impl ConversationSource for AnyConversationSource {
|
||||
fn parse_message(&self, obj: &Value, offset: u64) -> Option<TranscriptMessage> {
|
||||
claude::ClaudeSource.parse_message(obj, offset)
|
||||
.or_else(|| codex::CodexSource.parse_message(obj, offset))
|
||||
}
|
||||
|
||||
fn is_compaction(&self, obj: &Value) -> bool {
|
||||
claude::ClaudeSource.is_compaction(obj) || codex::CodexSource.is_compaction(obj)
|
||||
}
|
||||
|
||||
fn may_contain_compaction(&self, obj_bytes: &[u8]) -> bool {
|
||||
claude::ClaudeSource.may_contain_compaction(obj_bytes)
|
||||
|| codex::CodexSource.may_contain_compaction(obj_bytes)
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the byte offset of the last compaction marker in mmap'd transcript data.
|
||||
/// Returns the byte offset of the JSON object's opening brace.
|
||||
pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
|
||||
find_last_compaction_with(data, &AnyConversationSource)
|
||||
}
|
||||
|
||||
pub(crate) fn find_last_compaction_with(
|
||||
data: &[u8],
|
||||
source: &impl ConversationSource,
|
||||
) -> Option<usize> {
|
||||
for (offset, obj_bytes) in JsonlBackwardIter::new(data) {
|
||||
// Quick byte check before parsing large transcript entries.
|
||||
if !source.may_contain_compaction(obj_bytes) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let obj: Value = match serde_json::from_slice(obj_bytes) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if source.is_compaction(&obj) {
|
||||
return Some(offset);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Find the byte offset of the last compaction in a transcript file.
|
||||
/// Returns None if the file can't be opened or has no compaction.
|
||||
pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
|
||||
if path.is_empty() { return None; }
|
||||
|
||||
let file = fs::File::open(path).ok()?;
|
||||
let meta = file.metadata().ok()?;
|
||||
if meta.len() == 0 { return None; }
|
||||
|
||||
let mmap = unsafe { Mmap::map(&file).ok()? };
|
||||
find_last_compaction(&mmap).map(|off| off as u64)
|
||||
}
|
||||
|
||||
/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
|
||||
pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
|
||||
let file = fs::File::open(path).ok()?;
|
||||
let meta = file.metadata().ok()?;
|
||||
if meta.len() == 0 { return None; }
|
||||
let mmap = unsafe { Mmap::map(&file).ok()? };
|
||||
Some((mmap, file))
|
||||
}
|
||||
|
||||
/// Reverse iterator over user/assistant messages in a transcript file.
|
||||
/// Yields normalized transcript messages newest-first. The caller decides
|
||||
/// when to stop (byte budget, count, etc).
|
||||
pub struct TailMessages {
|
||||
_file: fs::File,
|
||||
mmap: Mmap,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl TailMessages {
|
||||
pub fn open(path: &str) -> Option<Self> {
|
||||
let (mmap, file) = mmap_transcript(path)?;
|
||||
let pos = mmap.len();
|
||||
Some(Self { _file: file, mmap, pos })
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for TailMessages {
|
||||
type Item = TranscriptMessage;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
let (offset, obj_bytes) = jsonl::JsonlBackwardIter::new(&self.mmap[..self.pos]).next()?;
|
||||
self.pos = offset;
|
||||
|
||||
let obj: Value = match serde_json::from_slice(obj_bytes) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if let Some(message) = AnyConversationSource.parse_message(&obj, offset as u64) {
|
||||
return Some(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the timestamp of the compaction message at a given byte offset.
|
||||
/// Returns a human-readable datetime string, or None if unavailable.
|
||||
pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
|
||||
let (mmap, _file) = mmap_transcript(path)?;
|
||||
let start = offset as usize;
|
||||
if start >= mmap.len() { return None; }
|
||||
|
||||
// Find the end of this JSONL line
|
||||
let end = mmap[start..].iter().position(|&b| b == b'\n')
|
||||
.map(|p| start + p)
|
||||
.unwrap_or(mmap.len());
|
||||
|
||||
let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
|
||||
|
||||
if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
|
||||
return Some(ts.to_string());
|
||||
}
|
||||
|
||||
for field in &["createdAt", "created_at", "time"] {
|
||||
if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
|
||||
return Some(ts.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Detect whether a compaction has occurred since the last check.
|
||||
///
|
||||
/// Compares the current compaction offset against a saved value in
|
||||
/// `state_dir/compaction-{session_id}`. Returns true if a new
|
||||
/// compaction was found. Updates the saved offset.
|
||||
pub fn detect_new_compaction(
|
||||
state_dir: &Path,
|
||||
session_id: &str,
|
||||
transcript_path: &str,
|
||||
) -> bool {
|
||||
let offset = find_last_compaction_in_file(transcript_path);
|
||||
|
||||
let save_path = state_dir.join(format!("compaction-{}", session_id));
|
||||
let saved: Option<u64> = fs::read_to_string(&save_path)
|
||||
.ok()
|
||||
.and_then(|s| s.trim().parse().ok());
|
||||
|
||||
let is_new = match (offset, saved) {
|
||||
(Some(cur), Some(prev)) => cur != prev,
|
||||
(Some(_), None) => true,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
// Save current offset
|
||||
if let Some(off) = offset {
|
||||
fs::write(&save_path, off.to_string()).ok();
|
||||
}
|
||||
|
||||
is_new
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Write;
|
||||
|
||||
fn write_temp_jsonl(content: &str) -> tempfile::NamedTempFile {
|
||||
let mut file = tempfile::NamedTempFile::new().unwrap();
|
||||
file.write_all(content.as_bytes()).unwrap();
|
||||
file.flush().unwrap();
|
||||
file
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tail_messages_yields_normalized_messages_newest_first() {
|
||||
let file = write_temp_jsonl(
|
||||
r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"claude user"}}
|
||||
{"timestamp":"2026-06-15T15:00:01.000Z","type":"assistant","message":{"content":[{"type":"text","text":"claude assistant"}]}}
|
||||
{"timestamp":"2026-06-15T15:00:02.000Z","type":"event_msg","payload":{"type":"user_message","message":"codex user"}}
|
||||
{"timestamp":"2026-06-15T15:00:03.000Z","type":"event_msg","payload":{"type":"task_started"}}
|
||||
{"timestamp":"2026-06-15T15:00:04.000Z","type":"event_msg","payload":{"type":"agent_message","message":"codex assistant"}}
|
||||
"#,
|
||||
);
|
||||
|
||||
let messages: Vec<_> = TailMessages::open(&file.path().to_string_lossy())
|
||||
.unwrap()
|
||||
.collect();
|
||||
|
||||
assert_eq!(messages.len(), 4);
|
||||
assert_eq!(messages[0].text, "codex assistant");
|
||||
assert_eq!(messages[1].text, "codex user");
|
||||
assert_eq!(messages[2].text, "claude assistant");
|
||||
assert_eq!(messages[3].text, "claude user");
|
||||
assert!(messages[0].offset > messages[1].offset);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_claude_and_codex_compactions() {
|
||||
let claude = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"user","message":{"content":"normal"}}
|
||||
{"timestamp":"2026-06-15T15:00:01.000Z","type":"user","message":{"content":"This session is being continued from a previous conversation."}}
|
||||
"#;
|
||||
let codex = br#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"user_message","message":"normal"}}
|
||||
{"timestamp":"2026-06-15T15:00:01.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
|
||||
"#;
|
||||
|
||||
assert!(find_last_compaction(claude).is_some());
|
||||
assert!(find_last_compaction(codex).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_new_compaction_tracks_offset_changes() {
|
||||
let transcript = write_temp_jsonl(
|
||||
r#"{"timestamp":"2026-06-15T15:00:00.000Z","type":"event_msg","payload":{"type":"context_compacted"}}
|
||||
"#,
|
||||
);
|
||||
let state = tempfile::tempdir().unwrap();
|
||||
|
||||
assert!(detect_new_compaction(
|
||||
state.path(),
|
||||
"session",
|
||||
&transcript.path().to_string_lossy(),
|
||||
));
|
||||
assert!(!detect_new_compaction(
|
||||
state.path(),
|
||||
"session",
|
||||
&transcript.path().to_string_lossy(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
@ -11,23 +11,6 @@ use crate::store::{Store, RelationType, StoreView};
|
|||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::sync::{OnceLock, RwLock};
|
||||
|
||||
const EXACT_CC_MAX_DEG: usize = 512;
|
||||
const APPROX_CC_PAIRS: u64 = 4096;
|
||||
const CC_CACHE_TTL_SECS: i64 = 15 * 60;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct CachedCc {
|
||||
value: f32,
|
||||
computed_at: i64,
|
||||
}
|
||||
|
||||
static CC_CACHE: OnceLock<RwLock<HashMap<String, CachedCc>>> = OnceLock::new();
|
||||
|
||||
fn cc_cache() -> &'static RwLock<HashMap<String, CachedCc>> {
|
||||
CC_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
|
||||
}
|
||||
|
||||
/// Community info for reporting
|
||||
#[derive(Clone, Debug)]
|
||||
|
|
@ -51,8 +34,6 @@ pub struct Edge {
|
|||
pub struct Graph {
|
||||
/// Adjacency list: node key → list of edges
|
||||
adj: HashMap<String, Vec<Edge>>,
|
||||
/// Neighbor sets for membership tests in graph metrics.
|
||||
neighbor_sets: HashMap<String, HashSet<String>>,
|
||||
/// All node keys
|
||||
keys: HashSet<String>,
|
||||
/// Community labels (from label propagation)
|
||||
|
|
@ -88,18 +69,18 @@ impl Graph {
|
|||
|
||||
/// Just neighbor keys
|
||||
pub fn neighbor_keys(&self, key: &str) -> HashSet<&str> {
|
||||
self.neighbor_sets.get(key)
|
||||
.map(|neighbors| neighbors.iter().map(String::as_str).collect())
|
||||
self.adj.get(key)
|
||||
.map(|edges| edges.iter().map(|e| e.target.as_str()).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Jaccard similarity between two nodes' neighborhoods.
|
||||
/// Measures overlap: |intersection| / |union| of their neighbor sets.
|
||||
pub fn jaccard(&self, a: &str, b: &str) -> f32 {
|
||||
let Some(na) = self.neighbor_sets.get(a) else { return 0.0 };
|
||||
let Some(nb) = self.neighbor_sets.get(b) else { return 0.0 };
|
||||
let intersection = na.intersection(nb).count();
|
||||
let union = na.len() + nb.len() - intersection;
|
||||
let na = self.neighbor_keys(a);
|
||||
let nb = self.neighbor_keys(b);
|
||||
let intersection = na.intersection(&nb).count();
|
||||
let union = na.union(&nb).count();
|
||||
if union == 0 { 0.0 } else { intersection as f32 / union as f32 }
|
||||
}
|
||||
|
||||
|
|
@ -225,59 +206,24 @@ impl Graph {
|
|||
/// that are also neighbors of each other.
|
||||
/// cc(v) = 2E / (deg * (deg - 1))
|
||||
pub fn clustering_coefficient(&self, key: &str) -> f32 {
|
||||
let now = crate::store::now_epoch();
|
||||
if let Some(cc) = cc_cache().read().unwrap().get(key).copied()
|
||||
&& now - cc.computed_at < CC_CACHE_TTL_SECS
|
||||
{
|
||||
return cc.value;
|
||||
}
|
||||
let cc = self.clustering_coefficient_uncached(key);
|
||||
cc_cache().write().unwrap().insert(key.to_owned(), CachedCc {
|
||||
value: cc,
|
||||
computed_at: now,
|
||||
});
|
||||
cc
|
||||
}
|
||||
|
||||
fn clustering_coefficient_uncached(&self, key: &str) -> f32 {
|
||||
let Some(neighbors) = self.neighbor_sets.get(key) else {
|
||||
return 0.0;
|
||||
};
|
||||
let neighbors = self.neighbor_keys(key);
|
||||
let deg = neighbors.len();
|
||||
if deg < 2 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let neighbor_vec: Vec<&str> = neighbors.iter().map(String::as_str).collect();
|
||||
if deg <= EXACT_CC_MAX_DEG {
|
||||
let mut linked = 0u64;
|
||||
let neighbor_vec: Vec<&str> = neighbors.iter().copied().collect();
|
||||
let mut triangles = 0u32;
|
||||
for i in 0..neighbor_vec.len() {
|
||||
for j in (i + 1)..neighbor_vec.len() {
|
||||
if self.neighbor_sets
|
||||
.get(neighbor_vec[i])
|
||||
.is_some_and(|n| n.contains(neighbor_vec[j])) {
|
||||
linked += 1;
|
||||
let ni_neighbors = self.neighbor_keys(neighbor_vec[i]);
|
||||
if ni_neighbors.contains(neighbor_vec[j]) {
|
||||
triangles += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return (2.0 * linked as f32) / (deg as f32 * (deg as f32 - 1.0));
|
||||
}
|
||||
|
||||
let mut linked = 0u64;
|
||||
let samples = APPROX_CC_PAIRS.min((deg as u64 * (deg as u64 - 1)) / 2);
|
||||
for sample in 0..samples {
|
||||
let i = ((sample.wrapping_mul(1_103_515_245).wrapping_add(12_345)) % deg as u64) as usize;
|
||||
let mut j = ((sample.wrapping_mul(2_654_435_761).wrapping_add(97_531)) % deg as u64) as usize;
|
||||
if i == j {
|
||||
j = (j + 1) % deg;
|
||||
}
|
||||
if self.neighbor_sets
|
||||
.get(neighbor_vec[i])
|
||||
.is_some_and(|n| n.contains(neighbor_vec[j])) {
|
||||
linked += 1;
|
||||
}
|
||||
}
|
||||
linked as f32 / samples as f32
|
||||
(2.0 * triangles as f32) / (deg as f32 * (deg as f32 - 1.0))
|
||||
}
|
||||
|
||||
/// Average clustering coefficient across all nodes with deg >= 2
|
||||
|
|
@ -285,13 +231,11 @@ impl Graph {
|
|||
let mut sum = 0.0f32;
|
||||
let mut count = 0u32;
|
||||
for key in &self.keys {
|
||||
match self.neighbor_sets.get(key.as_str()) {
|
||||
Some(s) if s.len() >= 2 => s,
|
||||
_ => continue,
|
||||
};
|
||||
if self.degree(key) >= 2 {
|
||||
sum += self.clustering_coefficient(key);
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
if count == 0 { 0.0 } else { sum / count as f32 }
|
||||
}
|
||||
|
||||
|
|
@ -324,12 +268,10 @@ impl Graph {
|
|||
|
||||
while let Some(node) = queue.pop_front() {
|
||||
let d = dist[&node];
|
||||
if let Some(neighbors) = self.neighbor_sets.get(&node) {
|
||||
for neighbor in neighbors {
|
||||
for neighbor in self.neighbor_keys(&node) {
|
||||
if !dist.contains_key(neighbor) {
|
||||
dist.insert(neighbor.clone(), d + 1);
|
||||
queue.push_back(neighbor.clone());
|
||||
}
|
||||
dist.insert(neighbor.to_string(), d + 1);
|
||||
queue.push_back(neighbor.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -564,38 +506,15 @@ impl Graph {
|
|||
/// Build graph from store data (with community detection)
|
||||
pub fn build_graph(store: &impl StoreView) -> Graph {
|
||||
let (adj, keys) = build_adjacency(store);
|
||||
let neighbor_sets = build_neighbor_sets(&adj);
|
||||
let communities = label_propagation(&keys, &adj, 20);
|
||||
Graph {
|
||||
adj,
|
||||
neighbor_sets,
|
||||
keys,
|
||||
communities,
|
||||
}
|
||||
Graph { adj, keys, communities }
|
||||
}
|
||||
|
||||
/// Build graph without community detection — for spreading activation
|
||||
/// searches where we only need the adjacency list.
|
||||
pub fn build_graph_fast(store: &impl StoreView) -> Graph {
|
||||
let (adj, keys) = build_adjacency(store);
|
||||
let neighbor_sets = build_neighbor_sets(&adj);
|
||||
Graph {
|
||||
adj,
|
||||
neighbor_sets,
|
||||
keys,
|
||||
communities: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_neighbor_sets(adj: &HashMap<String, Vec<Edge>>) -> HashMap<String, HashSet<String>> {
|
||||
adj.iter()
|
||||
.map(|(key, edges)| {
|
||||
let neighbors = edges.iter()
|
||||
.map(|edge| edge.target.clone())
|
||||
.collect();
|
||||
(key.clone(), neighbors)
|
||||
})
|
||||
.collect()
|
||||
Graph { adj, keys, communities: HashMap::new() }
|
||||
}
|
||||
|
||||
fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashSet<String>) {
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ pub mod query;
|
|||
pub mod spectral;
|
||||
pub mod neuro;
|
||||
pub mod counters;
|
||||
pub mod transcript;
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::path::PathBuf;
|
||||
|
|
|
|||
340
src/hippocampus/transcript.rs
Normal file
340
src/hippocampus/transcript.rs
Normal file
|
|
@ -0,0 +1,340 @@
|
|||
// Transcript JSONL parsing utilities.
|
||||
//
|
||||
// Provides mmap-based backward scanning of Claude Code transcript files
|
||||
// and compaction detection. Used by memory-search (hook mode) and
|
||||
// parse-claude-conversation (debug tool).
|
||||
|
||||
use memchr::memrchr3;
|
||||
use memmap2::Mmap;
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
/// Scan backwards through mmap'd bytes, yielding byte slices of complete
|
||||
/// top-level JSON objects (outermost { to matching }).
|
||||
///
|
||||
/// Uses memrchr3 (SIMD) to jump between structurally significant bytes
|
||||
/// ({, }, ") instead of scanning byte-by-byte. Tracks brace depth,
|
||||
/// skipping braces inside JSON strings. Returns objects in reverse order
|
||||
/// (newest first).
|
||||
pub struct JsonlBackwardIter<'a> {
|
||||
data: &'a [u8],
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl<'a> JsonlBackwardIter<'a> {
|
||||
pub fn new(data: &'a [u8]) -> Self {
|
||||
Self { data, pos: data.len() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for JsonlBackwardIter<'a> {
|
||||
type Item = &'a [u8];
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// Find the closing } of the next object, skipping } inside strings
|
||||
let close = {
|
||||
let mut in_string = false;
|
||||
loop {
|
||||
let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
|
||||
self.pos = p;
|
||||
let ch = self.data[p];
|
||||
|
||||
if in_string {
|
||||
if ch == b'"' {
|
||||
let mut bs = 0;
|
||||
while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
|
||||
bs += 1;
|
||||
}
|
||||
if bs % 2 == 0 { in_string = false; }
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
match ch {
|
||||
b'}' => break p,
|
||||
b'"' => in_string = true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Track brace depth to find matching {
|
||||
let mut depth: usize = 1;
|
||||
let mut in_string = false;
|
||||
|
||||
loop {
|
||||
let p = memrchr3(b'{', b'}', b'"', &self.data[..self.pos])?;
|
||||
self.pos = p;
|
||||
let ch = self.data[p];
|
||||
|
||||
if in_string {
|
||||
if ch == b'"' {
|
||||
// Check for escaped quote (count preceding backslashes)
|
||||
let mut bs = 0;
|
||||
while p > bs + 1 && self.data[p - 1 - bs] == b'\\' {
|
||||
bs += 1;
|
||||
}
|
||||
if bs % 2 == 0 {
|
||||
in_string = false;
|
||||
}
|
||||
}
|
||||
// { and } inside strings don't affect depth
|
||||
continue;
|
||||
}
|
||||
|
||||
match ch {
|
||||
b'"' => { in_string = true; }
|
||||
b'}' => { depth += 1; }
|
||||
b'{' => {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
return Some(&self.data[self.pos..=close]);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the byte offset of the last compaction summary in mmap'd transcript data.
|
||||
///
|
||||
/// Scans backward for a user-type message whose content starts with
|
||||
/// "This session is being continued". Returns the byte offset of the
|
||||
/// JSON object's opening brace.
|
||||
pub(crate) fn find_last_compaction(data: &[u8]) -> Option<usize> {
|
||||
let marker = b"This session is being continued";
|
||||
|
||||
for obj_bytes in JsonlBackwardIter::new(data) {
|
||||
// Quick byte check before parsing
|
||||
if !contains_bytes(obj_bytes, marker) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let obj: Value = match serde_json::from_slice(obj_bytes) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if obj.get("type").and_then(|v| v.as_str()) != Some("user") {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(content) = obj.get("message")
|
||||
.and_then(|m| m.get("content"))
|
||||
.and_then(|c| c.as_str())
|
||||
&& content.starts_with("This session is being continued") {
|
||||
let offset = obj_bytes.as_ptr() as usize - data.as_ptr() as usize;
|
||||
return Some(offset);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Find the byte offset of the last compaction in a transcript file.
|
||||
/// Returns None if the file can't be opened or has no compaction.
|
||||
pub(crate) fn find_last_compaction_in_file(path: &str) -> Option<u64> {
|
||||
if path.is_empty() { return None; }
|
||||
|
||||
let file = fs::File::open(path).ok()?;
|
||||
let meta = file.metadata().ok()?;
|
||||
if meta.len() == 0 { return None; }
|
||||
|
||||
let mmap = unsafe { Mmap::map(&file).ok()? };
|
||||
find_last_compaction(&mmap).map(|off| off as u64)
|
||||
}
|
||||
|
||||
/// Mmap a transcript file. Returns (Mmap, File) to keep both alive.
|
||||
pub(crate) fn mmap_transcript(path: &str) -> Option<(Mmap, fs::File)> {
|
||||
let file = fs::File::open(path).ok()?;
|
||||
let meta = file.metadata().ok()?;
|
||||
if meta.len() == 0 { return None; }
|
||||
let mmap = unsafe { Mmap::map(&file).ok()? };
|
||||
Some((mmap, file))
|
||||
}
|
||||
|
||||
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
|
||||
haystack.windows(needle.len()).any(|w| w == needle)
|
||||
}
|
||||
|
||||
/// Reverse iterator over user/assistant messages in a transcript file.
|
||||
/// Yields (role, text, timestamp) tuples newest-first. The caller decides
|
||||
/// when to stop (byte budget, count, etc).
|
||||
pub struct TailMessages {
|
||||
_file: fs::File,
|
||||
mmap: Mmap,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl TailMessages {
|
||||
pub fn open(path: &str) -> Option<Self> {
|
||||
let (mmap, file) = mmap_transcript(path)?;
|
||||
let pos = mmap.len();
|
||||
Some(Self { _file: file, mmap, pos })
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for TailMessages {
|
||||
type Item = (String, String, String);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
// Find closing }, skipping } inside strings
|
||||
let close = {
|
||||
let mut in_string = false;
|
||||
loop {
|
||||
let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
|
||||
self.pos = p;
|
||||
let ch = self.mmap[p];
|
||||
|
||||
if in_string {
|
||||
if ch == b'"' {
|
||||
let mut bs = 0;
|
||||
while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
|
||||
bs += 1;
|
||||
}
|
||||
if bs % 2 == 0 { in_string = false; }
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
match ch {
|
||||
b'}' => break p,
|
||||
b'"' => in_string = true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Track brace depth to find matching {
|
||||
let mut depth: usize = 1;
|
||||
let mut in_string = false;
|
||||
let open = loop {
|
||||
let p = memrchr3(b'{', b'}', b'"', &self.mmap[..self.pos])?;
|
||||
self.pos = p;
|
||||
let ch = self.mmap[p];
|
||||
|
||||
if in_string {
|
||||
if ch == b'"' {
|
||||
let mut bs = 0;
|
||||
while p > bs + 1 && self.mmap[p - 1 - bs] == b'\\' {
|
||||
bs += 1;
|
||||
}
|
||||
if bs % 2 == 0 { in_string = false; }
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
match ch {
|
||||
b'"' => { in_string = true; }
|
||||
b'}' => { depth += 1; }
|
||||
b'{' => {
|
||||
depth -= 1;
|
||||
if depth == 0 { break p; }
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
};
|
||||
|
||||
let obj_bytes = &self.mmap[open..=close];
|
||||
|
||||
// The "type" field is near the start of top-level objects.
|
||||
// Only check the first 200 bytes to avoid scanning megabyte objects.
|
||||
let prefix = &obj_bytes[..obj_bytes.len().min(200)];
|
||||
let is_user = memchr::memmem::find(prefix, b"\"type\":\"user\"").is_some();
|
||||
let is_assistant = !is_user
|
||||
&& memchr::memmem::find(prefix, b"\"type\":\"assistant\"").is_some();
|
||||
if !is_user && !is_assistant { continue; }
|
||||
|
||||
let obj: Value = match serde_json::from_slice(obj_bytes) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let msg_type = if is_user { "user" } else { "assistant" };
|
||||
|
||||
let msg = obj.get("message").unwrap_or(&obj);
|
||||
let text = match msg.get("content") {
|
||||
Some(Value::String(s)) => s.clone(),
|
||||
Some(Value::Array(arr)) => {
|
||||
arr.iter()
|
||||
.filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("text"))
|
||||
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
if text.is_empty() { continue; }
|
||||
|
||||
let timestamp = obj.get("timestamp")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
return Some((msg_type.to_string(), text, timestamp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the timestamp of the compaction message at a given byte offset.
|
||||
/// Returns a human-readable datetime string, or None if unavailable.
|
||||
pub fn compaction_timestamp(path: &str, offset: u64) -> Option<String> {
|
||||
let (mmap, _file) = mmap_transcript(path)?;
|
||||
let start = offset as usize;
|
||||
if start >= mmap.len() { return None; }
|
||||
|
||||
// Find the end of this JSONL line
|
||||
let end = mmap[start..].iter().position(|&b| b == b'\n')
|
||||
.map(|p| start + p)
|
||||
.unwrap_or(mmap.len());
|
||||
|
||||
let obj: Value = serde_json::from_slice(&mmap[start..end]).ok()?;
|
||||
|
||||
// Claude Code transcript entries have a "timestamp" field (ISO 8601)
|
||||
if let Some(ts) = obj.get("timestamp").and_then(|v| v.as_str()) {
|
||||
return Some(ts.to_string());
|
||||
}
|
||||
|
||||
// Fallback: try "createdAt" or similar fields
|
||||
for field in &["createdAt", "created_at", "time"] {
|
||||
if let Some(ts) = obj.get(*field).and_then(|v| v.as_str()) {
|
||||
return Some(ts.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Detect whether a compaction has occurred since the last check.
|
||||
///
|
||||
/// Compares the current compaction offset against a saved value in
|
||||
/// `state_dir/compaction-{session_id}`. Returns true if a new
|
||||
/// compaction was found. Updates the saved offset.
|
||||
pub fn detect_new_compaction(
|
||||
state_dir: &Path,
|
||||
session_id: &str,
|
||||
transcript_path: &str,
|
||||
) -> bool {
|
||||
let offset = find_last_compaction_in_file(transcript_path);
|
||||
|
||||
let save_path = state_dir.join(format!("compaction-{}", session_id));
|
||||
let saved: Option<u64> = fs::read_to_string(&save_path)
|
||||
.ok()
|
||||
.and_then(|s| s.trim().parse().ok());
|
||||
|
||||
let is_new = match (offset, saved) {
|
||||
(Some(cur), Some(prev)) => cur != prev,
|
||||
(Some(_), None) => true,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
// Save current offset
|
||||
if let Some(off) = offset {
|
||||
fs::write(&save_path, off.to_string()).ok();
|
||||
}
|
||||
|
||||
is_new
|
||||
}
|
||||
11
src/lib.rs
11
src/lib.rs
|
|
@ -1,4 +1,4 @@
|
|||
#![cfg_attr(feature = "nightly-diagnostics", feature(async_fn_track_caller))]
|
||||
#![feature(async_fn_track_caller)]
|
||||
|
||||
// consciousness — unified crate for memory, agents, and subconscious processes
|
||||
//
|
||||
|
|
@ -25,9 +25,6 @@ macro_rules! dbglog {
|
|||
}};
|
||||
}
|
||||
|
||||
// Logging (target-routed file logger)
|
||||
pub mod logging;
|
||||
|
||||
// User interface (TUI, CLI)
|
||||
pub mod user;
|
||||
|
||||
|
|
@ -43,9 +40,6 @@ pub mod hippocampus;
|
|||
// Autonomous agents
|
||||
pub mod subconscious;
|
||||
|
||||
// Conversation transcript abstraction and compatibility sources
|
||||
pub mod conversation;
|
||||
|
||||
// Unified configuration
|
||||
pub mod config;
|
||||
pub mod config_writer;
|
||||
|
|
@ -94,8 +88,7 @@ pub mod channel_capnp {
|
|||
pub use hippocampus::{
|
||||
store, graph, lookups, query,
|
||||
spectral, neuro, counters,
|
||||
memory,
|
||||
transcript, memory,
|
||||
};
|
||||
pub use conversation as transcript;
|
||||
use hippocampus::query::engine as search;
|
||||
use hippocampus::query::parser as query_parser;
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ impl<T> TrackedMutex<T> {
|
|||
Self { inner: Mutex::new(value) }
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "nightly-diagnostics", track_caller)]
|
||||
#[track_caller]
|
||||
pub async fn lock(&self) -> TrackedMutexGuard<'_, T> {
|
||||
let location = Location::caller();
|
||||
let guard = self.inner.lock().await;
|
||||
|
|
@ -125,7 +125,7 @@ impl<T> TrackedMutex<T> {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "nightly-diagnostics", track_caller)]
|
||||
#[track_caller]
|
||||
pub fn try_lock(&self) -> Result<TrackedMutexGuard<'_, T>, tokio::sync::TryLockError> {
|
||||
let location = Location::caller();
|
||||
let guard = self.inner.try_lock()?;
|
||||
|
|
@ -171,7 +171,7 @@ impl<T> TrackedRwLock<T> {
|
|||
Self { inner: RwLock::new(value) }
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "nightly-diagnostics", track_caller)]
|
||||
#[track_caller]
|
||||
pub async fn read(&self) -> TrackedRwLockReadGuard<'_, T> {
|
||||
let location = Location::caller();
|
||||
let guard = self.inner.read().await;
|
||||
|
|
@ -182,7 +182,7 @@ impl<T> TrackedRwLock<T> {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "nightly-diagnostics", track_caller)]
|
||||
#[track_caller]
|
||||
pub async fn write(&self) -> TrackedRwLockWriteGuard<'_, T> {
|
||||
let location = Location::caller();
|
||||
let guard = self.inner.write().await;
|
||||
|
|
|
|||
146
src/logging.rs
146
src/logging.rs
|
|
@ -1,146 +0,0 @@
|
|||
// logging.rs — log-crate logger that routes by target.
|
||||
//
|
||||
// Records with target "grpc" (or any target starting with "grpc::") go
|
||||
// to ~/.consciousness/logs/daemon/grpc.log so we can tell gRPC events
|
||||
// apart from the rest of consciousness's noise. Everything else goes
|
||||
// to ~/.consciousness/logs/daemon/debug.log.
|
||||
//
|
||||
// Level threshold is taken from RUST_LOG (simple global level parse:
|
||||
// "trace"/"debug"/"info"/"warn"/"error"); defaults to "info".
|
||||
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use log::{Level, LevelFilter, Log, Metadata, Record, SetLoggerError};
|
||||
|
||||
fn logs_dir() -> PathBuf {
|
||||
dirs::home_dir().unwrap_or_default().join(".consciousness/logs/daemon")
|
||||
}
|
||||
|
||||
struct RoutingLogger {
|
||||
grpc_file: Mutex<Option<std::fs::File>>,
|
||||
debug_file: Mutex<Option<std::fs::File>>,
|
||||
level: LevelFilter,
|
||||
}
|
||||
|
||||
impl RoutingLogger {
|
||||
fn new(level: LevelFilter) -> Self {
|
||||
let dir = logs_dir();
|
||||
let _ = std::fs::create_dir_all(&dir);
|
||||
let grpc = std::fs::OpenOptions::new()
|
||||
.create(true).append(true)
|
||||
.open(dir.join("grpc.log")).ok();
|
||||
let debug = std::fs::OpenOptions::new()
|
||||
.create(true).append(true)
|
||||
.open(dir.join("debug.log")).ok();
|
||||
Self {
|
||||
grpc_file: Mutex::new(grpc),
|
||||
debug_file: Mutex::new(debug),
|
||||
level,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_grpc_target(target: &str) -> bool {
|
||||
target == "grpc" || target.starts_with("grpc::")
|
||||
}
|
||||
}
|
||||
|
||||
impl Log for RoutingLogger {
|
||||
fn enabled(&self, m: &Metadata) -> bool {
|
||||
// Always enable DEBUG for grpc target so the dedicated log is
|
||||
// actually useful without RUST_LOG wrangling; defer to the
|
||||
// configured level for everything else.
|
||||
if Self::is_grpc_target(m.target()) {
|
||||
return m.level() <= Level::Debug;
|
||||
}
|
||||
m.level() <= self.level
|
||||
}
|
||||
|
||||
fn log(&self, record: &Record) {
|
||||
if !self.enabled(record.metadata()) {
|
||||
return;
|
||||
}
|
||||
let line = format!(
|
||||
"[{}] [{}] [{}] {}\n",
|
||||
chrono::Utc::now().format("%Y-%m-%d %H:%M:%S%.3f"),
|
||||
record.level(),
|
||||
record.target(),
|
||||
record.args(),
|
||||
);
|
||||
let slot = if Self::is_grpc_target(record.target()) {
|
||||
&self.grpc_file
|
||||
} else {
|
||||
&self.debug_file
|
||||
};
|
||||
if let Ok(mut guard) = slot.lock() {
|
||||
if let Some(ref mut f) = *guard {
|
||||
let _ = f.write_all(line.as_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&self) {
|
||||
for slot in [&self.grpc_file, &self.debug_file] {
|
||||
if let Ok(mut g) = slot.lock() {
|
||||
if let Some(ref mut f) = *g {
|
||||
let _ = f.flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_level_from_env() -> LevelFilter {
|
||||
let raw = std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string());
|
||||
// Parse a plain level word; if it's the module=level form, we take
|
||||
// the first level we find.
|
||||
let token = raw.split(',').next().unwrap_or("info");
|
||||
let level_word = token.rsplit_once('=').map(|(_, v)| v).unwrap_or(token);
|
||||
match level_word.trim().to_lowercase().as_str() {
|
||||
"trace" => LevelFilter::Trace,
|
||||
"debug" => LevelFilter::Debug,
|
||||
"info" => LevelFilter::Info,
|
||||
"warn" => LevelFilter::Warn,
|
||||
"error" => LevelFilter::Error,
|
||||
"off" => LevelFilter::Off,
|
||||
_ => LevelFilter::Info,
|
||||
}
|
||||
}
|
||||
|
||||
/// Install the routing logger. Safe to call at most once — subsequent
|
||||
/// calls return an error but are otherwise no-ops.
|
||||
pub fn init() -> Result<(), SetLoggerError> {
|
||||
let level = parse_level_from_env();
|
||||
let logger = Box::new(RoutingLogger::new(level));
|
||||
log::set_boxed_logger(logger)?;
|
||||
// Always let DEBUG records through globally so the grpc log can
|
||||
// capture them (the logger itself filters non-grpc targets by
|
||||
// `level`). The cost is that log::debug! call-sites below `level`
|
||||
// in other modules still do their arg formatting before being
|
||||
// dropped at the logger; acceptable for a debug tool.
|
||||
log::set_max_level(LevelFilter::Debug.max(level));
|
||||
// Mark the file with a session boundary so it's easy to see where a
|
||||
// restart happened.
|
||||
log::info!(
|
||||
"===== consciousness logger init (level={}, pid={}) =====",
|
||||
level, std::process::id(),
|
||||
);
|
||||
log::info!(target: "grpc",
|
||||
"===== grpc log init (level={}, pid={}) =====",
|
||||
level, std::process::id(),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Consumer of &Level so the type is used when only some callers want it.
|
||||
#[allow(dead_code)]
|
||||
pub fn current_level() -> Level {
|
||||
match log::max_level() {
|
||||
LevelFilter::Trace => Level::Trace,
|
||||
LevelFilter::Debug => Level::Debug,
|
||||
LevelFilter::Info | LevelFilter::Off => Level::Info,
|
||||
LevelFilter::Warn => Level::Warn,
|
||||
LevelFilter::Error => Level::Error,
|
||||
}
|
||||
}
|
||||
18
src/main.rs
18
src/main.rs
|
|
@ -1,4 +1,4 @@
|
|||
#![cfg_attr(feature = "nightly-diagnostics", feature(panic_backtrace_config))]
|
||||
#![feature(panic_backtrace_config)]
|
||||
|
||||
// poc-memory: graph-structured memory for AI assistants
|
||||
//
|
||||
|
|
@ -333,18 +333,6 @@ enum AdminCmd {
|
|||
#[arg(long)]
|
||||
stats: bool,
|
||||
},
|
||||
/// Print normalized user/assistant messages from a transcript JSONL file
|
||||
#[command(name = "transcript-tail")]
|
||||
TranscriptTail {
|
||||
/// Transcript JSONL path
|
||||
path: String,
|
||||
/// Maximum number of messages to print
|
||||
#[arg(long, short = 'n', default_value_t = 40)]
|
||||
count: usize,
|
||||
/// Print newest messages first instead of chronological order
|
||||
#[arg(long)]
|
||||
newest_first: bool,
|
||||
},
|
||||
}
|
||||
|
||||
/// Print help with subcommands expanded to show nested commands.
|
||||
|
|
@ -470,15 +458,12 @@ impl Run for AdminCmd {
|
|||
Self::Dedup { apply } => cli::admin::cmd_dedup(apply).await,
|
||||
Self::DailyCheck => cli::admin::cmd_daily_check().await,
|
||||
Self::LoadContext { stats } => cli::node::cmd_load_context(stats).await,
|
||||
Self::TranscriptTail { path, count, newest_first }
|
||||
=> cli::admin::cmd_transcript_tail(&path, count, newest_first),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
#[cfg(feature = "nightly-diagnostics")]
|
||||
std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short);
|
||||
|
||||
// Handle --help ourselves for expanded subcommand display
|
||||
|
|
@ -510,3 +495,4 @@ async fn main() {
|
|||
process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use std::fs::{File, OpenOptions};
|
|||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use crate::agent::context::AstNode;
|
||||
use crate::conversation::JsonlBackwardIter;
|
||||
use crate::hippocampus::transcript::JsonlBackwardIter;
|
||||
use memmap2::Mmap;
|
||||
|
||||
pub struct ConversationLog {
|
||||
|
|
@ -78,6 +78,6 @@ pub struct TailNodes {
|
|||
impl TailNodes {
|
||||
pub fn iter(&self) -> impl Iterator<Item = AstNode> + '_ {
|
||||
JsonlBackwardIter::new(&self.mmap)
|
||||
.filter_map(|(_, bytes)| serde_json::from_slice::<AstNode>(bytes).ok())
|
||||
.filter_map(|bytes| serde_json::from_slice::<AstNode>(bytes).ok())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -419,9 +419,7 @@ impl Mind {
|
|||
let subconscious = Arc::new(crate::Mutex::new(Subconscious::new()));
|
||||
subconscious.lock().await.init_output_tool(subconscious.clone());
|
||||
|
||||
let unconscious = Arc::new(crate::Mutex::new(
|
||||
Unconscious::new(agent.client.clone()),
|
||||
));
|
||||
let unconscious = Arc::new(crate::Mutex::new(Unconscious::new()));
|
||||
|
||||
// Spawn the unconscious loop on its own task
|
||||
if !config.no_agents {
|
||||
|
|
@ -469,11 +467,8 @@ impl Mind {
|
|||
};
|
||||
|
||||
// Spawn agents outside lock
|
||||
let client = unc.lock().await.client.clone();
|
||||
for (idx, name, auto) in to_spawn {
|
||||
match crate::mind::unconscious::prepare_spawn(
|
||||
&name, auto, wake.clone(), client.clone(),
|
||||
).await {
|
||||
match crate::mind::unconscious::prepare_spawn(&name, auto, wake.clone()).await {
|
||||
Ok(result) => unc.lock().await.complete_spawn(idx, result),
|
||||
Err(auto) => unc.lock().await.abort_spawn(idx, auto),
|
||||
}
|
||||
|
|
@ -693,7 +688,7 @@ impl Mind {
|
|||
}
|
||||
});
|
||||
|
||||
let _sub_handle: Option<tokio::task::JoinHandle<()>> = None;
|
||||
let mut sub_handle: Option<tokio::task::JoinHandle<()>> = None;
|
||||
|
||||
// Start finetune scoring at startup (scores existing conversation)
|
||||
if !self.config.no_agents {
|
||||
|
|
@ -743,7 +738,6 @@ impl Mind {
|
|||
_ = tokio::time::sleep(timeout), if !has_input => _dmn_expired = true,
|
||||
}
|
||||
|
||||
/*
|
||||
if !self.config.no_agents {
|
||||
if sub_handle.as_ref().map_or(true, |h| h.is_finished()) {
|
||||
let sub = self.subconscious.clone();
|
||||
|
|
@ -755,7 +749,6 @@ impl Mind {
|
|||
}));
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Check for pending user input → push to agent context and start turn
|
||||
let pending = self.shared.lock().unwrap().take_pending_input();
|
||||
|
|
|
|||
|
|
@ -631,7 +631,7 @@ impl Subconscious {
|
|||
{
|
||||
let mut st = forked.state.lock().await;
|
||||
st.provenance = auto.name.clone();
|
||||
st.sampling.temperature = auto.temperature;
|
||||
st.temperature = auto.temperature;
|
||||
// Surface agent gets near-interactive priority;
|
||||
// other subconscious agents get lower priority.
|
||||
st.priority = Some(if auto.name == "surface" { 1 } else { auto.priority });
|
||||
|
|
|
|||
|
|
@ -73,15 +73,10 @@ pub struct Unconscious {
|
|||
last_health_check: Option<Instant>,
|
||||
/// Notified when agent state changes (finished, toggled)
|
||||
pub wake: std::sync::Arc<tokio::sync::Notify>,
|
||||
/// Shared API client — cloned (cheap) into each spawned agent's
|
||||
/// Agent::new call so they all share the manifest cache and
|
||||
/// gRPC endpoint state. Override `.model` on the clone when a
|
||||
/// per-agent backend differs from the default.
|
||||
pub client: crate::agent::api::ApiClient,
|
||||
}
|
||||
|
||||
impl Unconscious {
|
||||
pub fn new(client: crate::agent::api::ApiClient) -> Self {
|
||||
pub fn new() -> Self {
|
||||
let enabled_map = load_enabled_config();
|
||||
|
||||
// Scan all .agent files, exclude subconscious-* and surface-observe
|
||||
|
|
@ -125,7 +120,6 @@ impl Unconscious {
|
|||
graph_health: None,
|
||||
last_health_check: None,
|
||||
wake: std::sync::Arc::new(tokio::sync::Notify::new()),
|
||||
client,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -140,8 +134,7 @@ impl Unconscious {
|
|||
let agent_name = self.agents[idx].name.clone();
|
||||
let auto = self.agents[idx].auto.take().unwrap();
|
||||
let wake = self.wake.clone();
|
||||
let client = self.client.clone();
|
||||
match prepare_spawn(&agent_name, auto, wake, client).await {
|
||||
match prepare_spawn(&agent_name, auto, wake).await {
|
||||
Ok(result) => self.complete_spawn(idx, result),
|
||||
Err(auto) => self.abort_spawn(idx, auto),
|
||||
}
|
||||
|
|
@ -257,12 +250,7 @@ pub struct SpawnResult {
|
|||
/// Called outside the Unconscious lock.
|
||||
/// On success, auto is consumed (moved into spawned task).
|
||||
/// On failure, auto is returned so it can be restored.
|
||||
pub async fn prepare_spawn(
|
||||
name: &str,
|
||||
mut auto: AutoAgent,
|
||||
wake: std::sync::Arc<tokio::sync::Notify>,
|
||||
base_client: crate::agent::api::ApiClient,
|
||||
) -> Result<SpawnResult, AutoAgent> {
|
||||
pub async fn prepare_spawn(name: &str, mut auto: AutoAgent, wake: std::sync::Arc<tokio::sync::Notify>) -> Result<SpawnResult, AutoAgent> {
|
||||
dbglog!("[unconscious] spawning {}", name);
|
||||
|
||||
let def = match defs::get_def(name) {
|
||||
|
|
@ -307,10 +295,8 @@ pub async fn prepare_spawn(
|
|||
};
|
||||
|
||||
// Unconscious agents have self-contained prompts — no standard context.
|
||||
// Clone the shared client so we inherit the manifest cache and
|
||||
// only override the model id per-agent.
|
||||
let mut client = base_client;
|
||||
client.model = resolved.model_id.clone();
|
||||
let client = crate::agent::api::ApiClient::new(
|
||||
&resolved.api_base, &resolved.api_key, &resolved.model_id);
|
||||
let agent = crate::agent::Agent::new(
|
||||
client, Vec::new(),
|
||||
app, None,
|
||||
|
|
@ -321,7 +307,7 @@ pub async fn prepare_spawn(
|
|||
let mut st = agent.state.lock().await;
|
||||
st.provenance = auto.name.clone();
|
||||
st.priority = Some(auto.priority);
|
||||
st.sampling.temperature = auto.temperature;
|
||||
st.temperature = auto.temperature;
|
||||
}
|
||||
|
||||
let agent_clone = agent.clone();
|
||||
|
|
@ -343,9 +329,8 @@ impl Unconscious {
|
|||
self.reap_finished();
|
||||
let to_spawn = self.select_to_spawn();
|
||||
let wake = self.wake.clone();
|
||||
let client = self.client.clone();
|
||||
for (idx, name, auto) in to_spawn {
|
||||
match prepare_spawn(&name, auto, wake.clone(), client.clone()).await {
|
||||
match prepare_spawn(&name, auto, wake.clone()).await {
|
||||
Ok(result) => self.complete_spawn(idx, result),
|
||||
Err(auto) => self.abort_spawn(idx, auto),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,12 +64,7 @@ impl HookSession {
|
|||
|
||||
/// Load from POC_SESSION_ID environment variable
|
||||
pub fn from_env() -> Option<Self> {
|
||||
let session_id = std::env::var("POC_SESSION_ID").ok()?;
|
||||
let mut session = Self::from_id(session_id)?;
|
||||
if let Ok(path) = std::env::var("POC_TRANSCRIPT_PATH") {
|
||||
session.transcript_path = path;
|
||||
}
|
||||
Some(session)
|
||||
Self::from_id(std::env::var("POC_SESSION_ID").ok()?)
|
||||
}
|
||||
|
||||
/// Get the seen set for this session
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env bash
|
||||
#!/bin/bash
|
||||
# Bail if another agent is in the same phase-group as us.
|
||||
#
|
||||
# $1 = our pid file name (e.g. "pid-12345")
|
||||
|
|
|
|||
|
|
@ -390,7 +390,7 @@ fn resolve_conversation(budget: Option<usize>) -> String {
|
|||
|
||||
if !transcript.exists() { return String::new(); }
|
||||
|
||||
let Some(iter) = crate::conversation::TailMessages::open(&transcript.path) else {
|
||||
let Some(iter) = crate::transcript::TailMessages::open(&transcript.path) else {
|
||||
return String::new();
|
||||
};
|
||||
|
||||
|
|
@ -401,14 +401,10 @@ fn resolve_conversation(budget: Option<usize>) -> String {
|
|||
let mut total_bytes = 0;
|
||||
let mut oldest_ts = String::new();
|
||||
|
||||
for message in iter {
|
||||
for (role, content, ts) in iter {
|
||||
if total_bytes >= max_bytes { break; }
|
||||
let content = message.text;
|
||||
let name = match message.role {
|
||||
crate::conversation::TranscriptRole::User => &app.user_name,
|
||||
crate::conversation::TranscriptRole::Assistant => &app.assistant_name,
|
||||
};
|
||||
let formatted = if let Some(ts) = message.timestamp {
|
||||
let name = if role == "user" { &app.user_name } else { &app.assistant_name };
|
||||
let formatted = if !ts.is_empty() {
|
||||
oldest_ts = ts[..ts.floor_char_boundary(ts.len().min(19))].to_string();
|
||||
format!("**{}** {}: {}", name, &oldest_ts, content)
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -4,10 +4,8 @@
|
|||
// given a context prefix and a skip predicate, generate what the model
|
||||
// would say as the next assistant turn.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::agent::api::{ApiClient, SamplingParams, StreamToken};
|
||||
use crate::agent::context::{AstNode, ContextState, WireChunk};
|
||||
use crate::agent::context::{AstNode, ContextState};
|
||||
use crate::agent::tokenizer;
|
||||
|
||||
/// Generate an assistant continuation from the context up to `entry_idx`,
|
||||
|
|
@ -15,9 +13,6 @@ use crate::agent::tokenizer;
|
|||
/// assembly. The model is whichever `client` points at — the default
|
||||
/// runtime client for memory-ablation alternates, a test-model client
|
||||
/// for F7 comparison.
|
||||
///
|
||||
/// Uses a fresh ephemeral gRPC session (no cross-call KV reuse): one
|
||||
/// Open / Append / Generate round-trip, then the session is dropped.
|
||||
pub async fn gen_continuation<F>(
|
||||
context: &ContextState,
|
||||
entry_idx: usize,
|
||||
|
|
@ -26,32 +21,17 @@ pub async fn gen_continuation<F>(
|
|||
) -> anyhow::Result<String>
|
||||
where F: FnMut(&AstNode) -> bool,
|
||||
{
|
||||
let (mut chunks, images) = context.wire_chunks(0..entry_idx, skip);
|
||||
let (mut prompt, images, _) = context.wire_prompt(0..entry_idx, skip);
|
||||
|
||||
// Assistant-turn prologue.
|
||||
let prologue = {
|
||||
let mut t = vec![tokenizer::IM_START];
|
||||
t.extend(tokenizer::encode("assistant\n"));
|
||||
t
|
||||
};
|
||||
match chunks.last_mut() {
|
||||
Some(WireChunk::Tokens(last)) => last.extend(prologue),
|
||||
_ => chunks.push(WireChunk::Tokens(prologue)),
|
||||
}
|
||||
prompt.push(tokenizer::IM_START);
|
||||
prompt.extend(tokenizer::encode("assistant\n"));
|
||||
|
||||
let sampling = SamplingParams {
|
||||
temperature: 0.6,
|
||||
top_p: 0.95,
|
||||
top_k: 20,
|
||||
max_tokens: 4096,
|
||||
};
|
||||
|
||||
// Ephemeral per-call session — opens on first touch, drops when
|
||||
// `_guard` drops at function end.
|
||||
let session_lock = Arc::new(crate::Mutex::new(None));
|
||||
let (mut rx, _guard) = client.stream_session_mm(
|
||||
session_lock, chunks, images, 0, sampling, Some(-5), None,
|
||||
);
|
||||
let (mut rx, _guard) = client.stream_completion_mm(&prompt, &images, sampling, Some(-5));
|
||||
|
||||
let mut tokens = Vec::new();
|
||||
while let Some(tok) = rx.recv().await {
|
||||
|
|
|
|||
|
|
@ -1,148 +1,100 @@
|
|||
// learn.rs — Memory importance scoring over the salience gRPC protocol.
|
||||
// training.rs — Memory importance scoring via /v1/score
|
||||
//
|
||||
// Three scoring modes, all built on call_score():
|
||||
// Three scoring modes, all built on the same call_score() primitive:
|
||||
//
|
||||
// score_memories() — Full N×M matrix (memories × responses) for the
|
||||
// debug screen. Expensive: N+1 sessions/calls.
|
||||
// debug screen. Expensive: N+1 API calls.
|
||||
//
|
||||
// score_memory() — Single memory importance. Scores the 50 messages
|
||||
// memory_score() — Single memory importance. Scores the 50 messages
|
||||
// after it was surfaced, with/without that memory.
|
||||
// 2 calls.
|
||||
// 2 API calls.
|
||||
//
|
||||
// finetune_score() — Identifies training candidates. Scores recent
|
||||
// messages with all memories stripped. Responses
|
||||
// with high divergence depend on memories the model
|
||||
// hasn't internalized. 2 calls.
|
||||
//
|
||||
// Each call opens an ephemeral gRPC session (reusing the shared
|
||||
// tonic Channel on `ApiClient`), pushes the prompt through as
|
||||
// interleaved tokens + AppendImage calls, runs Generate with
|
||||
// max_tokens=0 + logprobs_ranges over the scored positions, collects
|
||||
// each Token event's sampled_logprob, then drops the SessionHandle —
|
||||
// which triggers a best-effort CloseSession over the shared channel.
|
||||
// hasn't internalized. 2 API calls.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::agent::api::ApiClient;
|
||||
use crate::agent::api::salience::{SessionHandle, pb};
|
||||
use crate::agent::context::{
|
||||
Ast, AstNode, ContextState, Role, WireChunk, WireImage,
|
||||
Ast, AstNode, ContextState, Role, WireImage,
|
||||
is_assistant, is_memory_node, memory_key, render_branch_text, render_prior_context,
|
||||
};
|
||||
use crate::agent::tokenizer;
|
||||
use crate::mind::{MindState, MindTriggered, TaskHandle};
|
||||
use crate::subconscious::generate::gen_continuation;
|
||||
|
||||
const SCORE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
|
||||
|
||||
// ── Score API ───────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(serde::Deserialize)]
|
||||
struct ScoreResult {
|
||||
total_logprob: f64,
|
||||
}
|
||||
|
||||
/// Find each <|vision_start|>...<|vision_end|> run in the flat prompt
|
||||
/// and pair it with the matching entry in `images`. Returns a list
|
||||
/// of `ImageAttachment` with absolute pad-range positions, ready
|
||||
/// to drop into `GenerateRequest.images`.
|
||||
fn pair_images_to_ranges(
|
||||
prompt: &[u32],
|
||||
images: &[WireImage],
|
||||
) -> Vec<pb::ImageAttachment> {
|
||||
let mut out: Vec<pb::ImageAttachment> = Vec::new();
|
||||
let mut cur = 0;
|
||||
let mut img_idx = 0;
|
||||
while cur < prompt.len() {
|
||||
if prompt[cur] == tokenizer::VISION_START {
|
||||
let end_rel = prompt[cur..].iter()
|
||||
.position(|&t| t == tokenizer::VISION_END)
|
||||
.unwrap_or_else(|| panic!(
|
||||
"unmatched VISION_START at position {} in prompt", cur));
|
||||
let end = cur + end_rel + 1;
|
||||
let img = images.get(img_idx)
|
||||
.unwrap_or_else(|| panic!(
|
||||
"image index {} out of range for {} images", img_idx, images.len()));
|
||||
out.push(pb::ImageAttachment {
|
||||
bytes: img.bytes.clone(),
|
||||
mime: img.mime.clone(),
|
||||
pad_range_start: cur as u32,
|
||||
pad_range_end: end as u32,
|
||||
});
|
||||
img_idx += 1;
|
||||
cur = end;
|
||||
} else {
|
||||
cur += 1;
|
||||
#[derive(serde::Deserialize)]
|
||||
struct ScoreResponse {
|
||||
scores: Vec<ScoreResult>,
|
||||
}
|
||||
}
|
||||
out
|
||||
|
||||
fn http_client() -> crate::agent::api::http::HttpClient {
|
||||
crate::agent::api::http::HttpClient::builder()
|
||||
.timeout(SCORE_TIMEOUT)
|
||||
.build()
|
||||
}
|
||||
|
||||
async fn call_score(
|
||||
http: &crate::agent::api::http::HttpClient,
|
||||
client: &ApiClient,
|
||||
prompt: &[u32],
|
||||
images: &[WireImage],
|
||||
ranges: &[(usize, usize)],
|
||||
priority: Option<i32>,
|
||||
) -> anyhow::Result<Vec<ScoreResult>> {
|
||||
use futures::StreamExt;
|
||||
|
||||
// Nothing to score — skip the round-trip.
|
||||
if ranges.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let images_pb = pair_images_to_ranges(prompt, images);
|
||||
let mut handle = SessionHandle::open(client).await?;
|
||||
|
||||
// Final Generate: max_tokens=0 so the server runs prefill of the
|
||||
// full prompt and emits Token events for each position covered
|
||||
// by logprobs_ranges, then Done. logprob_top_k=0 means "just
|
||||
// the sampled (prompt) token's logprob" — no top-k alternatives,
|
||||
// which is all call_score historically needed. Images attach
|
||||
// inline via `images`; the prompt already contains their pre-
|
||||
// expanded vision blocks at the declared ranges.
|
||||
let logprobs_ranges: Vec<pb::PositionRange> = ranges.iter()
|
||||
.map(|(s, e)| pb::PositionRange { start: *s as u32, end: *e as u32 })
|
||||
let url = format!("{}/score", client.base_url());
|
||||
let auth = format!("Bearer {}", client.api_key());
|
||||
let mut body = serde_json::json!({
|
||||
"model": client.model,
|
||||
"prompt": prompt,
|
||||
"score_ranges": ranges,
|
||||
"logprobs": 1,
|
||||
});
|
||||
if !images.is_empty() {
|
||||
use base64::Engine;
|
||||
let b64 = base64::engine::general_purpose::STANDARD;
|
||||
let uris: Vec<String> = images.iter()
|
||||
.map(|img| format!("data:{};base64,{}", img.mime, b64.encode(&img.bytes)))
|
||||
.collect();
|
||||
let req = pb::GenerateRequest {
|
||||
session_id: handle.session_id.clone(),
|
||||
append_tokens: prompt.to_vec(),
|
||||
offset: handle.committed_len,
|
||||
truncating: false,
|
||||
max_tokens: 0,
|
||||
logprobs_ranges,
|
||||
logprob_top_k: 0,
|
||||
readout_ranges: Vec::new(),
|
||||
temperature: 0.0,
|
||||
top_p: 0.0,
|
||||
top_k: 0,
|
||||
stop_token_ids: Vec::new(),
|
||||
priority: priority.unwrap_or(0),
|
||||
images: images_pb,
|
||||
};
|
||||
body["multi_modal_data"] = serde_json::json!({ "image": uris });
|
||||
}
|
||||
if let Some(p) = priority {
|
||||
body["priority"] = serde_json::json!(p);
|
||||
}
|
||||
let response = http
|
||||
.send_json("POST", &url, &[
|
||||
("authorization", &auth),
|
||||
], &body)
|
||||
.await?;
|
||||
|
||||
let mut stream = handle.generate(req).await?;
|
||||
let mut totals = vec![0.0f64; ranges.len()];
|
||||
while let Some(event) = stream.next().await {
|
||||
let event = event
|
||||
.map_err(|s| anyhow::anyhow!("score Generate stream: {}", s))?;
|
||||
let Some(inner) = event.event else { continue };
|
||||
match inner {
|
||||
pb::generate_event::Event::Token(t) => {
|
||||
if !t.has_sampled_logprob { continue; }
|
||||
let pos = t.position as usize;
|
||||
for (i, (start, end)) in ranges.iter().enumerate() {
|
||||
if pos >= *start && pos < *end {
|
||||
totals[i] += t.sampled_logprob as f64;
|
||||
}
|
||||
}
|
||||
}
|
||||
pb::generate_event::Event::Done(_) => break,
|
||||
let status = response.status();
|
||||
let body: serde_json::Value = response.json().await?;
|
||||
|
||||
if !status.is_success() {
|
||||
let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("unknown error");
|
||||
anyhow::bail!("score API HTTP {}: {}", status, msg);
|
||||
}
|
||||
if let Some(err) = body.get("error").and_then(|e| e.as_str()) {
|
||||
anyhow::bail!("score API error: {}", err);
|
||||
}
|
||||
|
||||
Ok(totals.into_iter()
|
||||
.map(|total_logprob| ScoreResult { total_logprob })
|
||||
.collect())
|
||||
let result: ScoreResponse = serde_json::from_value(body)
|
||||
.map_err(|e| anyhow::anyhow!("failed to parse score response: {}", e))?;
|
||||
Ok(result.scores)
|
||||
}
|
||||
|
||||
/// Compute per-position logprob divergence: how much worse the model
|
||||
|
|
@ -158,6 +110,7 @@ fn divergence(baseline: &[ScoreResult], without: &[ScoreResult]) -> Vec<f64> {
|
|||
|
||||
/// Score two message sets and return total divergence.
|
||||
async fn score_divergence<F>(
|
||||
http: &crate::agent::api::http::HttpClient,
|
||||
client: &ApiClient,
|
||||
context: &ContextState,
|
||||
range: std::ops::Range<usize>,
|
||||
|
|
@ -170,9 +123,9 @@ where F: FnMut(&AstNode) -> bool,
|
|||
context.wire_prompt(range.clone(), |_| false);
|
||||
let (without_tokens, without_images, without_ranges) =
|
||||
context.wire_prompt(range, skip);
|
||||
let baseline = call_score(client, &baseline_tokens, &baseline_images,
|
||||
let baseline = call_score(http, client, &baseline_tokens, &baseline_images,
|
||||
&baseline_ranges, priority).await?;
|
||||
let without = call_score(client, &without_tokens, &without_images,
|
||||
let without = call_score(http, client, &without_tokens, &without_images,
|
||||
&without_ranges, priority).await?;
|
||||
let divs = divergence(&baseline, &without);
|
||||
Ok((divs, baseline))
|
||||
|
|
@ -209,13 +162,14 @@ pub async fn score_memories(
|
|||
dbglog!("[scoring-full] starting: {} memories × {} responses",
|
||||
total, response_indices.len());
|
||||
|
||||
let http = http_client();
|
||||
|
||||
let activity = crate::agent::start_activity(agent, "scoring: baseline").await;
|
||||
let (baseline_tokens, baseline_images, baseline_ranges) = {
|
||||
let ctx = agent.context.lock().await;
|
||||
ctx.wire_prompt(0..ctx.conversation().len(), |_| false)
|
||||
};
|
||||
let baseline = call_score(client, &baseline_tokens, &baseline_images,
|
||||
let baseline = call_score(&http, client, &baseline_tokens, &baseline_images,
|
||||
&baseline_ranges, Some(5)).await?;
|
||||
dbglog!("[scoring-full] baseline done ({} response scores)", baseline.len());
|
||||
|
||||
|
|
@ -226,7 +180,7 @@ pub async fn score_memories(
|
|||
let ctx = agent.context.lock().await;
|
||||
ctx.wire_prompt(0..ctx.conversation().len(), |n| memory_key(n) == Some(key.as_str()))
|
||||
};
|
||||
let row = match call_score(client, &tokens, &images, &ranges, Some(5)).await {
|
||||
let row = match call_score(&http, client, &tokens, &images, &ranges, Some(5)).await {
|
||||
Ok(without) => {
|
||||
let divs = divergence(&baseline, &without);
|
||||
let max_div = divs.iter().cloned().fold(0.0f64, f64::max);
|
||||
|
|
@ -240,23 +194,25 @@ pub async fn score_memories(
|
|||
vec![0.0; baseline.len()]
|
||||
}
|
||||
};
|
||||
// Write this memory's scores to the live AST nodes via the
|
||||
// focused setter — keeps the AST mutation surface narrow.
|
||||
// Write this memory's scores to the live AST nodes
|
||||
{
|
||||
let mut ctx = agent.context.lock().await;
|
||||
let mut set_count = 0;
|
||||
|
||||
for (resp_idx, &idx) in response_indices.iter().enumerate() {
|
||||
let Some(&score) = row.get(resp_idx) else { continue };
|
||||
let normalized = if score > 0.01 { Some(score) } else { None };
|
||||
ctx.set_branch_memory_score(
|
||||
crate::agent::context::Section::Conversation,
|
||||
idx,
|
||||
&key,
|
||||
normalized,
|
||||
);
|
||||
if normalized.is_some() {
|
||||
if idx >= ctx.conversation().len() { continue; }
|
||||
let node = &mut ctx.conversation_mut()[idx];
|
||||
if let AstNode::Branch {
|
||||
role: Role::Assistant, memory_scores, ..
|
||||
} = node {
|
||||
if let Some(&score) = row.get(resp_idx) {
|
||||
if score > 0.01 {
|
||||
memory_scores.insert(key.clone(), score);
|
||||
set_count += 1;
|
||||
} else {
|
||||
memory_scores.remove(key.as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -307,7 +263,8 @@ pub async fn score_memory(
|
|||
return Ok(0.0);
|
||||
}
|
||||
|
||||
let (divs, _) = score_divergence(client, context, range,
|
||||
let http = http_client();
|
||||
let (divs, _) = score_divergence(&http, client, context, range,
|
||||
|n| memory_key(n) == Some(key), Some(5)).await?;
|
||||
|
||||
Ok(divs.iter().sum())
|
||||
|
|
@ -365,6 +322,7 @@ where
|
|||
// Score oldest-first
|
||||
candidates.sort_by_key(|&(_, _, last)| last);
|
||||
|
||||
let http = http_client();
|
||||
let mut scored = 0;
|
||||
|
||||
let entries = context.conversation();
|
||||
|
|
@ -399,7 +357,7 @@ where
|
|||
}
|
||||
|
||||
activity.update(format!("scoring: {}/{} {}", scored + 1, total, key)).await;
|
||||
match score_divergence(client, context, range,
|
||||
match score_divergence(&http, client, context, range,
|
||||
|n| memory_key(n) == Some(key), Some(5)).await {
|
||||
Ok((divs, _)) => {
|
||||
let n_responses = divs.len();
|
||||
|
|
@ -547,7 +505,8 @@ pub async fn score_finetune(
|
|||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let (divs, _) = score_divergence(client, context, range, is_memory_node, Some(5)).await?;
|
||||
let http = http_client();
|
||||
let (divs, _) = score_divergence(&http, client, context, range, is_memory_node, Some(5)).await?;
|
||||
|
||||
let mut results: Vec<(usize, f64)> = response_positions.iter()
|
||||
.enumerate()
|
||||
|
|
@ -845,10 +804,8 @@ pub async fn send_to_train(
|
|||
}
|
||||
});
|
||||
|
||||
let http = http_client();
|
||||
let url = format!("{}/train", client.base_url());
|
||||
let http = crate::agent::api::http::HttpClient::builder()
|
||||
.timeout(std::time::Duration::from_secs(300))
|
||||
.build();
|
||||
let response = http.send_json("POST", &url, &[], &body).await?;
|
||||
|
||||
let status = response.status();
|
||||
|
|
|
|||
|
|
@ -108,6 +108,7 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
|
|||
out.push_str(&format!("Community: {} ", community));
|
||||
}
|
||||
let deg = graph.degree(&item.key);
|
||||
let cc = graph.clustering_coefficient(&item.key);
|
||||
|
||||
// Hub-link ratio: what fraction of this node's edges go to hubs?
|
||||
let neighbors = graph.neighbors(&item.key);
|
||||
|
|
@ -118,7 +119,7 @@ pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph)
|
|||
let is_hub = deg >= hub_thresh;
|
||||
|
||||
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
|
||||
deg, item.cc, hub_ratio * 100.0, hub_links, deg));
|
||||
deg, cc, hub_ratio * 100.0, hub_links, deg));
|
||||
if is_hub {
|
||||
out.push_str(" ← THIS IS A HUB");
|
||||
} else if hub_ratio > 0.6 {
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ impl ConsciousScreen {
|
|||
name: format!("mem: {}", key),
|
||||
tokens: node.tokens(),
|
||||
content: text.clone(),
|
||||
token_ids: leaf.token_ids().to_vec(),
|
||||
children: Vec::new(),
|
||||
status: score.map(|s| format!("{:.2}", s)).unwrap_or_default(),
|
||||
});
|
||||
|
|
@ -56,7 +55,6 @@ impl ConsciousScreen {
|
|||
name: format!("Memory nodes ({})", mem_children.len()),
|
||||
tokens: mem_tokens,
|
||||
content: String::new(),
|
||||
token_ids: Vec::new(),
|
||||
children: mem_children,
|
||||
status: format!("{} scored, {} unscored", scored, unscored),
|
||||
});
|
||||
|
|
@ -72,13 +70,11 @@ impl ConsciousScreen {
|
|||
AstNode::Leaf(leaf) => leaf.body().text().to_string(),
|
||||
_ => String::new(),
|
||||
},
|
||||
token_ids: node.token_ids(),
|
||||
children: match node {
|
||||
AstNode::Branch { children, .. } => children.iter()
|
||||
.map(|c| SectionView {
|
||||
name: c.label(), tokens: c.tokens(),
|
||||
content: match c { AstNode::Leaf(l) => l.body().text().to_string(), _ => String::new() },
|
||||
token_ids: match c { AstNode::Leaf(l) => l.token_ids().to_vec(), _ => c.token_ids() },
|
||||
children: Vec::new(), status: String::new(),
|
||||
}).collect(),
|
||||
_ => Vec::new(),
|
||||
|
|
@ -105,7 +101,6 @@ impl ConsciousScreen {
|
|||
name: format!("Conversation ({} entries)", conv_children.len()),
|
||||
tokens: conv_tokens,
|
||||
content: String::new(),
|
||||
token_ids: Vec::new(),
|
||||
children: conv_children,
|
||||
status: String::new(),
|
||||
});
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ fn truncate(s: &str, max: usize) -> String {
|
|||
}
|
||||
|
||||
/// A screen that can draw itself and handle input.
|
||||
trait ScreenView {
|
||||
trait ScreenView: Send {
|
||||
fn tick(&mut self, frame: &mut ratatui::Frame, area: ratatui::layout::Rect,
|
||||
events: &[ratatui::crossterm::event::Event], app: &mut App);
|
||||
fn label(&self) -> &'static str;
|
||||
|
|
@ -291,8 +291,8 @@ async fn start(cli: crate::user::CliArgs) -> Result<()> {
|
|||
ui_handle.join().unwrap_or_else(|_| Err(anyhow::anyhow!("UI thread panicked")))
|
||||
}
|
||||
|
||||
async fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) {
|
||||
let mut ag = mind.agent.state.lock().await;
|
||||
fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) {
|
||||
if let Ok(mut ag) = mind.agent.state.try_lock() {
|
||||
let next = match ag.reasoning_effort.as_str() {
|
||||
"none" => "low",
|
||||
"low" => "high",
|
||||
|
|
@ -307,6 +307,7 @@ async fn hotkey_cycle_reasoning(mind: &crate::mind::Mind) {
|
|||
};
|
||||
ag.notify(format!("reasoning: {}", label));
|
||||
}
|
||||
}
|
||||
|
||||
async fn hotkey_kill_processes(mind: &crate::mind::Mind) {
|
||||
let mut st = mind.agent.state.lock().await;
|
||||
|
|
@ -591,7 +592,7 @@ async fn run(
|
|||
} else if key.modifiers.contains(KeyModifiers::CONTROL) {
|
||||
match key.code {
|
||||
KeyCode::Char('c') => { app.should_quit = true; }
|
||||
KeyCode::Char('r') => hotkey_cycle_reasoning(mind).await,
|
||||
KeyCode::Char('r') => hotkey_cycle_reasoning(mind),
|
||||
KeyCode::Char('k') => hotkey_kill_processes(mind).await,
|
||||
KeyCode::Char('p') => hotkey_cycle_autonomy(mind),
|
||||
_ => {}
|
||||
|
|
@ -755,11 +756,6 @@ fn restore_stderr(original_fd: std::os::fd::RawFd) {
|
|||
|
||||
#[tokio::main]
|
||||
pub async fn main() {
|
||||
// Install target-routed file logger: `target: "grpc"` records go to
|
||||
// ~/.consciousness/logs/daemon/grpc.log, everything else to debug.log.
|
||||
// Level from RUST_LOG, defaulting to info.
|
||||
let _ = crate::logging::init();
|
||||
|
||||
// Reap channel-daemon zombies via a SIGCHLD handler that only touches
|
||||
// PIDs listed in channels_dir(). Avoids SIGCHLD=SIG_IGN, which would
|
||||
// break tokio::process::Command::wait() (kernel auto-reap → ECHILD).
|
||||
|
|
|
|||
|
|
@ -207,7 +207,6 @@ impl SubconsciousScreen {
|
|||
name: key.clone(),
|
||||
tokens: 0,
|
||||
content: val.clone(),
|
||||
token_ids: Vec::new(),
|
||||
children: Vec::new(),
|
||||
status: String::new(),
|
||||
}
|
||||
|
|
@ -239,7 +238,6 @@ impl SubconsciousScreen {
|
|||
name: format!("Conversation ({} entries)", conv_children.len()),
|
||||
tokens: conv_children.iter().map(|c| c.tokens).sum(),
|
||||
content: String::new(),
|
||||
token_ids: Vec::new(),
|
||||
children: conv_children,
|
||||
status: String::new(),
|
||||
});
|
||||
|
|
|
|||
|
|
@ -8,18 +8,11 @@ use ratatui::{
|
|||
};
|
||||
use crate::agent::context::{AstNode, Ast, NodeBody};
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SectionView {
|
||||
pub name: String,
|
||||
pub tokens: usize,
|
||||
pub content: String,
|
||||
/// Token-id stream for this subtree, displayed in place of
|
||||
/// `content` when the tree's show-tokens mode is on. Populated
|
||||
/// from `leaf.token_ids()` / `node.token_ids()` for views built
|
||||
/// from the AST; empty for views that don't have a corresponding
|
||||
/// AST node (subconscious entries, etc.), in which case the
|
||||
/// token view falls back to the text content.
|
||||
pub token_ids: Vec<u32>,
|
||||
pub children: Vec<SectionView>,
|
||||
/// Extra status text shown after the token count.
|
||||
pub status: String,
|
||||
|
|
@ -39,7 +32,6 @@ fn node_to_view(node: &AstNode) -> SectionView {
|
|||
name,
|
||||
tokens: node.tokens(),
|
||||
content: leaf.body().text().to_string(),
|
||||
token_ids: leaf.token_ids().to_vec(),
|
||||
children: Vec::new(),
|
||||
status,
|
||||
}
|
||||
|
|
@ -52,7 +44,6 @@ fn node_to_view(node: &AstNode) -> SectionView {
|
|||
name: node.label(),
|
||||
tokens: node.tokens(),
|
||||
content: String::new(),
|
||||
token_ids: node.token_ids(),
|
||||
children: child_views,
|
||||
status: String::new(),
|
||||
}
|
||||
|
|
@ -63,12 +54,10 @@ fn node_to_view(node: &AstNode) -> SectionView {
|
|||
pub fn section_to_view(name: &str, nodes: &[AstNode]) -> SectionView {
|
||||
let children: Vec<SectionView> = nodes.iter().map(|n| node_to_view(n)).collect();
|
||||
let total_tokens: usize = nodes.iter().map(|n| n.tokens()).sum();
|
||||
let token_ids: Vec<u32> = nodes.iter().flat_map(|n| n.token_ids()).collect();
|
||||
SectionView {
|
||||
name: name.to_string(),
|
||||
tokens: total_tokens,
|
||||
content: String::new(),
|
||||
token_ids,
|
||||
children,
|
||||
status: String::new(),
|
||||
}
|
||||
|
|
@ -115,7 +104,7 @@ pub fn format_ts_age(ts: i64) -> String {
|
|||
/// Key legend for SectionTree panes.
|
||||
pub fn tree_legend() -> Line<'static> {
|
||||
Line::styled(
|
||||
" ↑↓:nav →/Enter:expand ←:collapse e:expand c:collapse v:toggle tokens/text PgUp/Dn ",
|
||||
" ↑↓:nav →/Enter:expand ←:collapse e:expand all c:collapse all PgUp/Dn Home/End ",
|
||||
Style::default().fg(Color::DarkGray),
|
||||
)
|
||||
}
|
||||
|
|
@ -196,19 +185,11 @@ pub struct SectionTree {
|
|||
pub selected: Option<usize>,
|
||||
pub expanded: std::collections::HashSet<usize>,
|
||||
pub scroll: super::scroll_pane::ScrollPaneState,
|
||||
/// When true, render `token_ids` as space-separated IDs in place
|
||||
/// of `content` in expanded panels. Toggled with 'v'.
|
||||
pub show_tokens: bool,
|
||||
}
|
||||
|
||||
impl SectionTree {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
selected: None,
|
||||
expanded: std::collections::HashSet::new(),
|
||||
scroll: super::scroll_pane::ScrollPaneState::new(),
|
||||
show_tokens: false,
|
||||
}
|
||||
Self { selected: None, expanded: std::collections::HashSet::new(), scroll: super::scroll_pane::ScrollPaneState::new() }
|
||||
}
|
||||
|
||||
fn total_nodes(&self, sections: &[SectionView]) -> usize {
|
||||
|
|
@ -283,9 +264,6 @@ impl SectionTree {
|
|||
KeyCode::Char('c') => {
|
||||
self.expanded.clear();
|
||||
}
|
||||
KeyCode::Char('v') => {
|
||||
self.show_tokens = !self.show_tokens;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
self.scroll_to_selected(height);
|
||||
|
|
@ -348,12 +326,7 @@ impl SectionTree {
|
|||
}
|
||||
} else if has_content {
|
||||
let content_indent = format!("{} │ ", " ".repeat(depth + 1));
|
||||
let body = if self.show_tokens && !section.token_ids.is_empty() {
|
||||
format_token_ids_wrapped(§ion.token_ids)
|
||||
} else {
|
||||
section.content.clone()
|
||||
};
|
||||
let content_lines: Vec<&str> = body.lines().collect();
|
||||
let content_lines: Vec<&str> = section.content.lines().collect();
|
||||
let show = content_lines.len().min(50);
|
||||
for line in &content_lines[..show] {
|
||||
lines.push(Line::styled(
|
||||
|
|
@ -371,16 +344,3 @@ impl SectionTree {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Format token IDs for the content panel: space-separated, wrapped
|
||||
/// at 12 ids per line so they fit comfortably in a pane.
|
||||
fn format_token_ids_wrapped(ids: &[u32]) -> String {
|
||||
let mut out = String::new();
|
||||
for (i, id) in ids.iter().enumerate() {
|
||||
if i > 0 {
|
||||
if i % 12 == 0 { out.push('\n'); } else { out.push(' '); }
|
||||
}
|
||||
out.push_str(&id.to_string());
|
||||
}
|
||||
out
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue