Replace token counting with token generation via HuggingFace tokenizer
Add agent/tokenizer.rs with global Qwen 3.5 tokenizer that generates actual token IDs including chat template wrapping. ContextEntry now stores token_ids: Vec<u32> instead of tokens: usize — the count is derived from the length. ContextEntry::new() tokenizes automatically via the global tokenizer. ContextSection::push_entry() takes a raw ConversationEntry and tokenizes it. set_message() re-tokenizes without needing an external tokenizer parameter. Token IDs include the full chat template: <|im_start|>role\ncontent <|im_end|>\n — so concatenating token_ids across entries produces a ready-to-send prompt for vLLM's /v1/completions endpoint. The old tiktoken CoreBPE is now unused on Agent (will be removed in a followup). Token counts are now exact for Qwen 3.5 instead of the ~85-90% approximation from cl100k_base. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
70ee7abea5
commit
5e4067c04f
10 changed files with 540 additions and 97 deletions
1
.claude/scheduled_tasks.lock
Normal file
1
.claude/scheduled_tasks.lock
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
{"sessionId":"463c6050-b49f-4509-9d4b-4596af79a90e","pid":11339,"acquiredAt":1775649730868}
|
||||||
379
Cargo.lock
generated
379
Cargo.lock
generated
|
|
@ -13,6 +13,20 @@ dependencies = [
|
||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ahash"
|
||||||
|
version = "0.8.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"getrandom 0.3.4",
|
||||||
|
"once_cell",
|
||||||
|
"serde",
|
||||||
|
"version_check",
|
||||||
|
"zerocopy",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "1.1.4"
|
version = "1.1.4"
|
||||||
|
|
@ -136,6 +150,12 @@ dependencies = [
|
||||||
"fs_extra",
|
"fs_extra",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "base64"
|
||||||
|
version = "0.13.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "base64"
|
name = "base64"
|
||||||
version = "0.22.1"
|
version = "0.22.1"
|
||||||
|
|
@ -402,6 +422,7 @@ dependencies = [
|
||||||
"itoa",
|
"itoa",
|
||||||
"rustversion",
|
"rustversion",
|
||||||
"ryu",
|
"ryu",
|
||||||
|
"serde",
|
||||||
"static_assertions",
|
"static_assertions",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -476,6 +497,19 @@ dependencies = [
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "console"
|
||||||
|
version = "0.15.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
|
||||||
|
dependencies = [
|
||||||
|
"encode_unicode",
|
||||||
|
"libc",
|
||||||
|
"once_cell",
|
||||||
|
"unicode-width",
|
||||||
|
"windows-sys 0.59.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "convert_case"
|
name = "convert_case"
|
||||||
version = "0.10.0"
|
version = "0.10.0"
|
||||||
|
|
@ -583,14 +617,38 @@ dependencies = [
|
||||||
"phf",
|
"phf",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "darling"
|
||||||
|
version = "0.20.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
|
||||||
|
dependencies = [
|
||||||
|
"darling_core 0.20.11",
|
||||||
|
"darling_macro 0.20.11",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "darling"
|
name = "darling"
|
||||||
version = "0.23.0"
|
version = "0.23.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
|
checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"darling_core",
|
"darling_core 0.23.0",
|
||||||
"darling_macro",
|
"darling_macro 0.23.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "darling_core"
|
||||||
|
version = "0.20.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
|
||||||
|
dependencies = [
|
||||||
|
"fnv",
|
||||||
|
"ident_case",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"strsim",
|
||||||
|
"syn 2.0.117",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -606,17 +664,37 @@ dependencies = [
|
||||||
"syn 2.0.117",
|
"syn 2.0.117",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "darling_macro"
|
||||||
|
version = "0.20.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
|
||||||
|
dependencies = [
|
||||||
|
"darling_core 0.20.11",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.117",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "darling_macro"
|
name = "darling_macro"
|
||||||
version = "0.23.0"
|
version = "0.23.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
|
checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"darling_core",
|
"darling_core 0.23.0",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.117",
|
"syn 2.0.117",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dary_heap"
|
||||||
|
version = "0.3.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "deltae"
|
name = "deltae"
|
||||||
version = "0.3.2"
|
version = "0.3.2"
|
||||||
|
|
@ -632,6 +710,37 @@ dependencies = [
|
||||||
"powerfmt",
|
"powerfmt",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_builder"
|
||||||
|
version = "0.20.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
|
||||||
|
dependencies = [
|
||||||
|
"derive_builder_macro",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_builder_core"
|
||||||
|
version = "0.20.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
|
||||||
|
dependencies = [
|
||||||
|
"darling 0.20.11",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.117",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_builder_macro"
|
||||||
|
version = "0.20.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
|
||||||
|
dependencies = [
|
||||||
|
"derive_builder_core",
|
||||||
|
"syn 2.0.117",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "derive_more"
|
name = "derive_more"
|
||||||
version = "2.1.1"
|
version = "2.1.1"
|
||||||
|
|
@ -712,6 +821,12 @@ version = "0.7.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9eb1aa714776b75c7e67e1da744b81a129b3ff919c8712b5e1b32252c1f07cc7"
|
checksum = "9eb1aa714776b75c7e67e1da744b81a129b3ff919c8712b5e1b32252c1f07cc7"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encode_unicode"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "env_filter"
|
name = "env_filter"
|
||||||
version = "1.0.1"
|
version = "1.0.1"
|
||||||
|
|
@ -751,6 +866,15 @@ dependencies = [
|
||||||
"windows-sys 0.61.2",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "esaxx-rs"
|
||||||
|
version = "0.1.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "euclid"
|
name = "euclid"
|
||||||
version = "0.22.14"
|
version = "0.22.14"
|
||||||
|
|
@ -1017,7 +1141,7 @@ version = "0.12.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash 0.7.8",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -1173,6 +1297,19 @@ dependencies = [
|
||||||
"serde_core",
|
"serde_core",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indicatif"
|
||||||
|
version = "0.17.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
|
||||||
|
dependencies = [
|
||||||
|
"console",
|
||||||
|
"number_prefix",
|
||||||
|
"portable-atomic",
|
||||||
|
"unicode-width",
|
||||||
|
"web-time",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indoc"
|
name = "indoc"
|
||||||
version = "2.0.7"
|
version = "2.0.7"
|
||||||
|
|
@ -1194,7 +1331,7 @@ version = "0.3.12"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5eb2d60ef19920a3a9193c3e371f726ec1dafc045dac788d0fb3704272458971"
|
checksum = "5eb2d60ef19920a3a9193c3e371f726ec1dafc045dac788d0fb3704272458971"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"darling",
|
"darling 0.23.0",
|
||||||
"indoc",
|
"indoc",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
|
@ -1389,6 +1526,22 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "macro_rules_attribute"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520"
|
||||||
|
dependencies = [
|
||||||
|
"macro_rules_attribute-proc_macro",
|
||||||
|
"paste",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "macro_rules_attribute-proc_macro"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memchr"
|
name = "memchr"
|
||||||
version = "2.8.0"
|
version = "2.8.0"
|
||||||
|
|
@ -1437,6 +1590,28 @@ dependencies = [
|
||||||
"windows-sys 0.61.2",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "monostate"
|
||||||
|
version = "0.1.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3341a273f6c9d5bef1908f17b7267bbab0e95c9bf69a0d4dcf8e9e1b2c76ef67"
|
||||||
|
dependencies = [
|
||||||
|
"monostate-impl",
|
||||||
|
"serde",
|
||||||
|
"serde_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "monostate-impl"
|
||||||
|
version = "0.1.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.117",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nix"
|
name = "nix"
|
||||||
version = "0.29.0"
|
version = "0.29.0"
|
||||||
|
|
@ -1495,6 +1670,12 @@ dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "number_prefix"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "once_cell"
|
name = "once_cell"
|
||||||
version = "1.21.4"
|
version = "1.21.4"
|
||||||
|
|
@ -1507,6 +1688,28 @@ version = "1.70.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "onig"
|
||||||
|
version = "6.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 2.11.0",
|
||||||
|
"libc",
|
||||||
|
"once_cell",
|
||||||
|
"onig_sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "onig_sys"
|
||||||
|
version = "69.9.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"pkg-config",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openssl-probe"
|
name = "openssl-probe"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
|
|
@ -1683,7 +1886,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf_shared",
|
"phf_shared",
|
||||||
"rand",
|
"rand 0.8.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -1714,12 +1917,18 @@ version = "0.2.17"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
|
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pkg-config"
|
||||||
|
version = "0.3.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "poc-memory"
|
name = "poc-memory"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"base64",
|
"base64 0.22.1",
|
||||||
"bincode",
|
"bincode",
|
||||||
"bytes",
|
"bytes",
|
||||||
"capnp",
|
"capnp",
|
||||||
|
|
@ -1757,6 +1966,7 @@ dependencies = [
|
||||||
"serde_urlencoded",
|
"serde_urlencoded",
|
||||||
"skillratings",
|
"skillratings",
|
||||||
"tiktoken-rs",
|
"tiktoken-rs",
|
||||||
|
"tokenizers",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-rustls",
|
"tokio-rustls",
|
||||||
"tokio-scoped",
|
"tokio-scoped",
|
||||||
|
|
@ -1787,6 +1997,15 @@ version = "0.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
|
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ppv-lite86"
|
||||||
|
version = "0.2.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
|
||||||
|
dependencies = [
|
||||||
|
"zerocopy",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "prettyplease"
|
name = "prettyplease"
|
||||||
version = "0.2.37"
|
version = "0.2.37"
|
||||||
|
|
@ -1910,7 +2129,27 @@ version = "0.8.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"rand_core",
|
"rand_core 0.6.4",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand"
|
||||||
|
version = "0.9.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
|
||||||
|
dependencies = [
|
||||||
|
"rand_chacha",
|
||||||
|
"rand_core 0.9.5",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand_chacha"
|
||||||
|
version = "0.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
|
||||||
|
dependencies = [
|
||||||
|
"ppv-lite86",
|
||||||
|
"rand_core 0.9.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -1919,6 +2158,15 @@ version = "0.6.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand_core"
|
||||||
|
version = "0.9.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom 0.3.4",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ratatui"
|
name = "ratatui"
|
||||||
version = "0.30.0"
|
version = "0.30.0"
|
||||||
|
|
@ -2014,6 +2262,17 @@ dependencies = [
|
||||||
"rayon-core",
|
"rayon-core",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon-cond"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
"itertools",
|
||||||
|
"rayon",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rayon-core"
|
name = "rayon-core"
|
||||||
version = "1.13.0"
|
version = "1.13.0"
|
||||||
|
|
@ -2416,6 +2675,18 @@ dependencies = [
|
||||||
"windows-sys 0.61.2",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "spm_precompiled"
|
||||||
|
version = "0.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326"
|
||||||
|
dependencies = [
|
||||||
|
"base64 0.13.1",
|
||||||
|
"nom",
|
||||||
|
"serde",
|
||||||
|
"unicode-segmentation",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "static_assertions"
|
name = "static_assertions"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
|
|
@ -2511,7 +2782,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4676b37242ccbd1aabf56edb093a4827dc49086c0ffd764a5705899e0f35f8f7"
|
checksum = "4676b37242ccbd1aabf56edb093a4827dc49086c0ffd764a5705899e0f35f8f7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"base64",
|
"base64 0.22.1",
|
||||||
"bitflags 2.11.0",
|
"bitflags 2.11.0",
|
||||||
"fancy-regex 0.11.0",
|
"fancy-regex 0.11.0",
|
||||||
"filedescriptor",
|
"filedescriptor",
|
||||||
|
|
@ -2593,7 +2864,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3a19830747d9034cd9da43a60eaa8e552dfda7712424aebf187b7a60126bae0d"
|
checksum = "3a19830747d9034cd9da43a60eaa8e552dfda7712424aebf187b7a60126bae0d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"base64",
|
"base64 0.22.1",
|
||||||
"bstr",
|
"bstr",
|
||||||
"fancy-regex 0.13.0",
|
"fancy-regex 0.13.0",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
|
|
@ -2637,6 +2908,40 @@ version = "0.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokenizers"
|
||||||
|
version = "0.21.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a620b996116a59e184c2fa2dfd8251ea34a36d0a514758c6f966386bd2e03476"
|
||||||
|
dependencies = [
|
||||||
|
"ahash 0.8.12",
|
||||||
|
"aho-corasick",
|
||||||
|
"compact_str",
|
||||||
|
"dary_heap",
|
||||||
|
"derive_builder",
|
||||||
|
"esaxx-rs",
|
||||||
|
"getrandom 0.3.4",
|
||||||
|
"indicatif",
|
||||||
|
"itertools",
|
||||||
|
"log",
|
||||||
|
"macro_rules_attribute",
|
||||||
|
"monostate",
|
||||||
|
"onig",
|
||||||
|
"paste",
|
||||||
|
"rand 0.9.2",
|
||||||
|
"rayon",
|
||||||
|
"rayon-cond",
|
||||||
|
"regex",
|
||||||
|
"regex-syntax",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"spm_precompiled",
|
||||||
|
"thiserror 2.0.18",
|
||||||
|
"unicode-normalization-alignments",
|
||||||
|
"unicode-segmentation",
|
||||||
|
"unicode_categories",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio"
|
name = "tokio"
|
||||||
version = "1.51.0"
|
version = "1.51.0"
|
||||||
|
|
@ -2774,6 +3079,15 @@ version = "1.0.24"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-normalization-alignments"
|
||||||
|
version = "0.1.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
|
||||||
|
dependencies = [
|
||||||
|
"smallvec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-segmentation"
|
name = "unicode-segmentation"
|
||||||
version = "1.13.2"
|
version = "1.13.2"
|
||||||
|
|
@ -2803,6 +3117,12 @@ version = "0.2.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode_categories"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "untrusted"
|
name = "untrusted"
|
||||||
version = "0.9.0"
|
version = "0.9.0"
|
||||||
|
|
@ -2954,6 +3274,16 @@ dependencies = [
|
||||||
"semver",
|
"semver",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "web-time"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
|
||||||
|
dependencies = [
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "webpki-roots"
|
name = "webpki-roots"
|
||||||
version = "1.0.6"
|
version = "1.0.6"
|
||||||
|
|
@ -3125,6 +3455,15 @@ dependencies = [
|
||||||
"windows-targets",
|
"windows-targets",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.59.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.61.2"
|
version = "0.61.2"
|
||||||
|
|
@ -3301,6 +3640,26 @@ version = "1.0.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
|
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerocopy"
|
||||||
|
version = "0.8.48"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
|
||||||
|
dependencies = [
|
||||||
|
"zerocopy-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerocopy-derive"
|
||||||
|
version = "0.8.48"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.117",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zeroize"
|
name = "zeroize"
|
||||||
version = "1.8.2"
|
version = "1.8.2"
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,7 @@ capnp = "0.25"
|
||||||
capnp-rpc = "0.25"
|
capnp-rpc = "0.25"
|
||||||
|
|
||||||
tiktoken-rs = "0.9.1"
|
tiktoken-rs = "0.9.1"
|
||||||
|
tokenizers = "0.21"
|
||||||
skillratings = "0.28"
|
skillratings = "0.28"
|
||||||
|
|
||||||
http = "1"
|
http = "1"
|
||||||
|
|
|
||||||
|
|
@ -33,12 +33,25 @@ pub enum ConversationEntry {
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct ContextEntry {
|
pub struct ContextEntry {
|
||||||
pub entry: ConversationEntry,
|
pub entry: ConversationEntry,
|
||||||
/// Cached token count (0 for Log entries).
|
/// Cached tokenization — the actual token IDs for this entry's
|
||||||
pub tokens: usize,
|
/// contribution to the prompt (including chat template wrapping).
|
||||||
|
/// Empty for Log entries.
|
||||||
|
pub token_ids: Vec<u32>,
|
||||||
/// When this entry was added to the context.
|
/// When this entry was added to the context.
|
||||||
pub timestamp: Option<DateTime<Utc>>,
|
pub timestamp: Option<DateTime<Utc>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ContextEntry {
|
||||||
|
/// Create a new entry, tokenizing via the global tokenizer.
|
||||||
|
pub fn new(entry: ConversationEntry, timestamp: Option<DateTime<Utc>>) -> Self {
|
||||||
|
let token_ids = super::tokenizer::tokenize_conv_entry(&entry);
|
||||||
|
Self { entry, token_ids, timestamp }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Token count — derived from cached token_ids length.
|
||||||
|
pub fn tokens(&self) -> usize { self.token_ids.len() }
|
||||||
|
}
|
||||||
|
|
||||||
/// A named section of the context window with cached token total.
|
/// A named section of the context window with cached token total.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct ContextSection {
|
pub struct ContextSection {
|
||||||
|
|
@ -58,32 +71,40 @@ impl ContextSection {
|
||||||
pub fn len(&self) -> usize { self.entries.len() }
|
pub fn len(&self) -> usize { self.entries.len() }
|
||||||
pub fn is_empty(&self) -> bool { self.entries.is_empty() }
|
pub fn is_empty(&self) -> bool { self.entries.is_empty() }
|
||||||
|
|
||||||
/// Push an entry, updating the cached token total.
|
/// Push a ConversationEntry, tokenizing it and updating the total.
|
||||||
|
pub fn push_entry(&mut self, entry: ConversationEntry, timestamp: Option<DateTime<Utc>>) {
|
||||||
|
let ce = ContextEntry::new(entry, timestamp);
|
||||||
|
self.tokens += ce.tokens();
|
||||||
|
self.entries.push(ce);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Push a pre-built ContextEntry (for restore, cloning, etc).
|
||||||
pub fn push(&mut self, entry: ContextEntry) {
|
pub fn push(&mut self, entry: ContextEntry) {
|
||||||
self.tokens += entry.tokens;
|
self.tokens += entry.tokens();
|
||||||
self.entries.push(entry);
|
self.entries.push(entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Replace an entry at `index`, adjusting the token total.
|
/// Replace an entry at `index`, adjusting the token total.
|
||||||
pub fn set(&mut self, index: usize, entry: ContextEntry) {
|
pub fn set(&mut self, index: usize, entry: ContextEntry) {
|
||||||
self.tokens -= self.entries[index].tokens;
|
self.tokens -= self.entries[index].tokens();
|
||||||
self.tokens += entry.tokens;
|
self.tokens += entry.tokens();
|
||||||
self.entries[index] = entry;
|
self.entries[index] = entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Remove an entry at `index`, adjusting the token total.
|
/// Remove an entry at `index`, adjusting the token total.
|
||||||
pub fn del(&mut self, index: usize) -> ContextEntry {
|
pub fn del(&mut self, index: usize) -> ContextEntry {
|
||||||
let removed = self.entries.remove(index);
|
let removed = self.entries.remove(index);
|
||||||
self.tokens -= removed.tokens;
|
self.tokens -= removed.tokens();
|
||||||
removed
|
removed
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Replace the message inside an entry, recomputing its token count.
|
/// Replace the message inside an entry, re-tokenizing it.
|
||||||
pub fn set_message(&mut self, index: usize, tokenizer: &CoreBPE, msg: Message) {
|
pub fn set_message(&mut self, index: usize, msg: Message) {
|
||||||
let old_tokens = self.entries[index].tokens;
|
let old_tokens = self.entries[index].tokens();
|
||||||
*self.entries[index].entry.message_mut() = msg;
|
*self.entries[index].entry.message_mut() = msg;
|
||||||
let new_tokens = msg_token_count(tokenizer, self.entries[index].entry.api_message());
|
self.entries[index].token_ids = super::tokenizer::tokenize_conv_entry(
|
||||||
self.entries[index].tokens = new_tokens;
|
&self.entries[index].entry);
|
||||||
|
let new_tokens = self.entries[index].tokens();
|
||||||
self.tokens = self.tokens - old_tokens + new_tokens;
|
self.tokens = self.tokens - old_tokens + new_tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -96,7 +117,7 @@ impl ContextSection {
|
||||||
|
|
||||||
/// Bulk replace all entries, recomputing token total.
|
/// Bulk replace all entries, recomputing token total.
|
||||||
pub fn set_entries(&mut self, entries: Vec<ContextEntry>) {
|
pub fn set_entries(&mut self, entries: Vec<ContextEntry>) {
|
||||||
self.tokens = entries.iter().map(|e| e.tokens).sum();
|
self.tokens = entries.iter().map(|e| e.tokens()).sum();
|
||||||
self.entries = entries;
|
self.entries = entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -104,7 +125,7 @@ impl ContextSection {
|
||||||
pub fn trim(&mut self, fixed_tokens: usize) {
|
pub fn trim(&mut self, fixed_tokens: usize) {
|
||||||
let result = trim_entries(&self.entries, fixed_tokens);
|
let result = trim_entries(&self.entries, fixed_tokens);
|
||||||
self.entries = result;
|
self.entries = result;
|
||||||
self.tokens = self.entries.iter().map(|e| e.tokens).sum();
|
self.tokens = self.entries.iter().map(|e| e.tokens()).sum();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clear all entries.
|
/// Clear all entries.
|
||||||
|
|
@ -189,9 +210,9 @@ fn trim_entries(entries: &[ContextEntry], fixed_tokens: usize) -> Vec<ContextEnt
|
||||||
.map(|(_, e)| e.clone())
|
.map(|(_, e)| e.clone())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let entry_total = |r: &[ContextEntry]| -> usize { r.iter().map(|e| e.tokens).sum::<usize>() };
|
let entry_total = |r: &[ContextEntry]| -> usize { r.iter().map(|e| e.tokens()).sum::<usize>() };
|
||||||
let mem_total = |r: &[ContextEntry]| -> usize {
|
let mem_total = |r: &[ContextEntry]| -> usize {
|
||||||
r.iter().filter(|e| e.entry.is_memory()).map(|e| e.tokens).sum()
|
r.iter().filter(|e| e.entry.is_memory()).map(|e| e.tokens()).sum()
|
||||||
};
|
};
|
||||||
|
|
||||||
dbglog!("[trim] max={} fixed={} total={} entries={}",
|
dbglog!("[trim] max={} fixed={} total={} entries={}",
|
||||||
|
|
|
||||||
108
src/agent/mod.rs
108
src/agent/mod.rs
|
|
@ -16,6 +16,7 @@
|
||||||
pub mod api;
|
pub mod api;
|
||||||
pub mod context;
|
pub mod context;
|
||||||
pub mod oneshot;
|
pub mod oneshot;
|
||||||
|
pub mod tokenizer;
|
||||||
pub mod tools;
|
pub mod tools;
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
@ -196,19 +197,12 @@ impl Agent {
|
||||||
.expect("failed to load cl100k_base tokenizer");
|
.expect("failed to load cl100k_base tokenizer");
|
||||||
|
|
||||||
let mut system = ContextSection::new("System prompt");
|
let mut system = ContextSection::new("System prompt");
|
||||||
system.push(ContextEntry {
|
system.push(ContextEntry::new(
|
||||||
entry: ConversationEntry::System(Message::system(&system_prompt)),
|
ConversationEntry::System(Message::system(&system_prompt)), None));
|
||||||
tokens: context::msg_token_count(&tokenizer, &Message::system(&system_prompt)),
|
|
||||||
timestamp: None,
|
|
||||||
});
|
|
||||||
let mut identity = ContextSection::new("Identity");
|
let mut identity = ContextSection::new("Identity");
|
||||||
for (_name, content) in &personality {
|
for (_name, content) in &personality {
|
||||||
let msg = Message::user(content);
|
identity.push(ContextEntry::new(
|
||||||
identity.push(ContextEntry {
|
ConversationEntry::Message(Message::user(content)), None));
|
||||||
tokens: context::msg_token_count(&tokenizer, &msg),
|
|
||||||
entry: ConversationEntry::Message(msg),
|
|
||||||
timestamp: None,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
let context = ContextState {
|
let context = ContextState {
|
||||||
system,
|
system,
|
||||||
|
|
@ -324,12 +318,8 @@ impl Agent {
|
||||||
eprintln!("warning: failed to log entry: {:#}", e);
|
eprintln!("warning: failed to log entry: {:#}", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let tokens = if entry.is_log() || entry.is_thinking() { 0 } else {
|
self.context.conversation.push(ContextEntry::new(
|
||||||
context::msg_token_count(&self.tokenizer, entry.api_message())
|
entry, Some(chrono::Utc::now())));
|
||||||
};
|
|
||||||
self.context.conversation.push(ContextEntry {
|
|
||||||
entry, tokens, timestamp: Some(chrono::Utc::now()),
|
|
||||||
});
|
|
||||||
|
|
||||||
self.changed.notify_one();
|
self.changed.notify_one();
|
||||||
}
|
}
|
||||||
|
|
@ -348,22 +338,19 @@ impl Agent {
|
||||||
if let Some(idx) = self.streaming_index() {
|
if let Some(idx) = self.streaming_index() {
|
||||||
let mut msg = self.context.conversation.entries()[idx].entry.message().clone();
|
let mut msg = self.context.conversation.entries()[idx].entry.message().clone();
|
||||||
msg.append_content(text);
|
msg.append_content(text);
|
||||||
self.context.conversation.set_message(idx, &self.tokenizer, msg);
|
self.context.conversation.set_message(idx, msg);
|
||||||
} else {
|
} else {
|
||||||
let msg = Message {
|
self.context.conversation.push(ContextEntry::new(
|
||||||
role: Role::Assistant,
|
ConversationEntry::Message(Message {
|
||||||
content: Some(MessageContent::Text(text.to_string())),
|
role: Role::Assistant,
|
||||||
tool_calls: None,
|
content: Some(MessageContent::Text(text.to_string())),
|
||||||
tool_call_id: None,
|
tool_calls: None,
|
||||||
name: None,
|
tool_call_id: None,
|
||||||
timestamp: None,
|
name: None,
|
||||||
};
|
timestamp: None,
|
||||||
let tokens = context::msg_token_count(&self.tokenizer, &msg);
|
}),
|
||||||
self.context.conversation.push(ContextEntry {
|
None,
|
||||||
entry: ConversationEntry::Message(msg),
|
));
|
||||||
tokens,
|
|
||||||
timestamp: None,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self.changed.notify_one();
|
self.changed.notify_one();
|
||||||
|
|
@ -375,12 +362,10 @@ impl Agent {
|
||||||
if let Some(i) = self.streaming_index() {
|
if let Some(i) = self.streaming_index() {
|
||||||
let mut stamped = msg.clone();
|
let mut stamped = msg.clone();
|
||||||
stamped.stamp();
|
stamped.stamp();
|
||||||
let tokens = context::msg_token_count(&self.tokenizer, &stamped);
|
self.context.conversation.set(i, ContextEntry::new(
|
||||||
self.context.conversation.set(i, ContextEntry {
|
ConversationEntry::Message(stamped),
|
||||||
entry: ConversationEntry::Message(stamped),
|
Some(chrono::Utc::now()),
|
||||||
tokens,
|
));
|
||||||
timestamp: Some(chrono::Utc::now()),
|
|
||||||
});
|
|
||||||
} else {
|
} else {
|
||||||
self.push_message(msg.clone());
|
self.push_message(msg.clone());
|
||||||
}
|
}
|
||||||
|
|
@ -770,16 +755,15 @@ impl Agent {
|
||||||
|
|
||||||
for node in journal_nodes[..cutoff_idx].iter().rev() {
|
for node in journal_nodes[..cutoff_idx].iter().rev() {
|
||||||
let msg = Message::user(&node.content);
|
let msg = Message::user(&node.content);
|
||||||
let tokens = context::msg_token_count(&self.tokenizer, &msg);
|
let ce = ContextEntry::new(
|
||||||
if total_tokens + tokens > journal_budget && !journal_entries.is_empty() {
|
ConversationEntry::Message(msg),
|
||||||
|
chrono::DateTime::from_timestamp(node.created_at, 0),
|
||||||
|
);
|
||||||
|
if total_tokens + ce.tokens() > journal_budget && !journal_entries.is_empty() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
journal_entries.push(ContextEntry {
|
total_tokens += ce.tokens();
|
||||||
entry: ConversationEntry::Message(msg),
|
journal_entries.push(ce);
|
||||||
tokens,
|
|
||||||
timestamp: chrono::DateTime::from_timestamp(node.created_at, 0),
|
|
||||||
});
|
|
||||||
total_tokens += tokens;
|
|
||||||
}
|
}
|
||||||
journal_entries.reverse();
|
journal_entries.reverse();
|
||||||
dbg_log!("[journal] loaded {} entries, {} tokens", journal_entries.len(), total_tokens);
|
dbg_log!("[journal] loaded {} entries, {} tokens", journal_entries.len(), total_tokens);
|
||||||
|
|
@ -842,12 +826,10 @@ impl Agent {
|
||||||
}
|
}
|
||||||
let mut new_msg = msg.clone();
|
let mut new_msg = msg.clone();
|
||||||
new_msg.content = Some(MessageContent::Text(replacement));
|
new_msg.content = Some(MessageContent::Text(replacement));
|
||||||
let tokens = context::msg_token_count(&self.tokenizer, &new_msg);
|
self.context.conversation.set(i, ContextEntry::new(
|
||||||
self.context.conversation.set(i, ContextEntry {
|
ConversationEntry::Message(new_msg),
|
||||||
entry: ConversationEntry::Message(new_msg),
|
old.timestamp,
|
||||||
tokens,
|
));
|
||||||
timestamp: old.timestamp,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.generation += 1;
|
self.generation += 1;
|
||||||
|
|
@ -866,19 +848,12 @@ impl Agent {
|
||||||
match crate::config::reload_for_model(&self.app_config, &self.prompt_file) {
|
match crate::config::reload_for_model(&self.app_config, &self.prompt_file) {
|
||||||
Ok((system_prompt, personality)) => {
|
Ok((system_prompt, personality)) => {
|
||||||
self.context.system.clear();
|
self.context.system.clear();
|
||||||
self.context.system.push(ContextEntry {
|
self.context.system.push(ContextEntry::new(
|
||||||
entry: ConversationEntry::System(Message::system(&system_prompt)),
|
ConversationEntry::System(Message::system(&system_prompt)), None));
|
||||||
tokens: context::msg_token_count(&self.tokenizer, &Message::system(&system_prompt)),
|
|
||||||
timestamp: None,
|
|
||||||
});
|
|
||||||
self.context.identity.clear();
|
self.context.identity.clear();
|
||||||
for (_name, content) in &personality {
|
for (_name, content) in &personality {
|
||||||
let msg = Message::user(content);
|
self.context.identity.push(ContextEntry::new(
|
||||||
self.context.identity.push(ContextEntry {
|
ConversationEntry::Message(Message::user(content)), None));
|
||||||
tokens: context::msg_token_count(&self.tokenizer, &msg),
|
|
||||||
entry: ConversationEntry::Message(msg),
|
|
||||||
timestamp: None,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
|
@ -932,16 +907,13 @@ impl Agent {
|
||||||
let all: Vec<ContextEntry> = entries.into_iter()
|
let all: Vec<ContextEntry> = entries.into_iter()
|
||||||
.filter(|e| !e.is_log() && !e.is_thinking() && e.message().role != Role::System)
|
.filter(|e| !e.is_log() && !e.is_thinking() && e.message().role != Role::System)
|
||||||
.map(|e| {
|
.map(|e| {
|
||||||
let tokens = if e.is_log() { 0 } else {
|
let timestamp = if e.is_log() || e.is_thinking() { None } else {
|
||||||
context::msg_token_count(&self.tokenizer, e.api_message())
|
|
||||||
};
|
|
||||||
let timestamp = if e.is_log() { None } else {
|
|
||||||
e.message().timestamp.as_ref().and_then(|ts| {
|
e.message().timestamp.as_ref().and_then(|ts| {
|
||||||
chrono::DateTime::parse_from_rfc3339(ts).ok()
|
chrono::DateTime::parse_from_rfc3339(ts).ok()
|
||||||
.map(|dt| dt.with_timezone(&chrono::Utc))
|
.map(|dt| dt.with_timezone(&chrono::Utc))
|
||||||
})
|
})
|
||||||
};
|
};
|
||||||
ContextEntry { entry: e, tokens, timestamp }
|
ContextEntry::new(e, timestamp)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
let mem_count = all.iter().filter(|e| e.entry.is_memory()).count();
|
let mem_count = all.iter().filter(|e| e.entry.is_memory()).count();
|
||||||
|
|
|
||||||
82
src/agent/tokenizer.rs
Normal file
82
src/agent/tokenizer.rs
Normal file
|
|
@ -0,0 +1,82 @@
|
||||||
|
// tokenizer.rs — Qwen tokenizer for direct token generation
|
||||||
|
//
|
||||||
|
// Loads the HuggingFace tokenizer.json for the target model and provides
|
||||||
|
// tokenization for context entries. The tokenizer is loaded once globally
|
||||||
|
// and shared across all callers.
|
||||||
|
//
|
||||||
|
// Token IDs include the chat template wrapping:
|
||||||
|
// <|im_start|>role\ncontent<|im_end|>\n
|
||||||
|
// so concatenating token_ids across entries produces a ready-to-send prompt.
|
||||||
|
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
use tokenizers::Tokenizer;
|
||||||
|
|
||||||
|
static TOKENIZER: OnceLock<Tokenizer> = OnceLock::new();
|
||||||
|
|
||||||
|
/// Special token IDs for Qwen 3.5
|
||||||
|
pub const IM_START: u32 = 248045;
|
||||||
|
pub const IM_END: u32 = 248046;
|
||||||
|
|
||||||
|
/// Initialize the global tokenizer from a file path.
|
||||||
|
/// Call once at startup. Panics if the file can't be loaded.
|
||||||
|
pub fn init(path: &str) {
|
||||||
|
let t = Tokenizer::from_file(path)
|
||||||
|
.unwrap_or_else(|e| panic!("failed to load tokenizer from {}: {}", path, e));
|
||||||
|
TOKENIZER.set(t).ok();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the global tokenizer. Panics if not initialized.
|
||||||
|
fn get() -> &'static Tokenizer {
|
||||||
|
TOKENIZER.get().expect("tokenizer not initialized — call tokenizer::init() first")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tokenize a raw string, returning token IDs.
|
||||||
|
pub fn encode(text: &str) -> Vec<u32> {
|
||||||
|
get().encode(text, false)
|
||||||
|
.unwrap_or_else(|e| panic!("tokenization failed: {}", e))
|
||||||
|
.get_ids()
|
||||||
|
.to_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tokenize a chat entry with template wrapping:
|
||||||
|
/// <|im_start|>role\ncontent<|im_end|>\n
|
||||||
|
/// Returns the complete token ID sequence for this entry.
|
||||||
|
pub fn tokenize_entry(role: &str, content: &str) -> Vec<u32> {
|
||||||
|
let mut ids = Vec::new();
|
||||||
|
ids.push(IM_START);
|
||||||
|
ids.extend(encode(role));
|
||||||
|
ids.extend(encode("\n"));
|
||||||
|
ids.extend(encode(content));
|
||||||
|
ids.push(IM_END);
|
||||||
|
ids.extend(encode("\n"));
|
||||||
|
ids
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Count tokens for a string (convenience for budget checks).
|
||||||
|
pub fn count(text: &str) -> usize {
|
||||||
|
encode(text).len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decode token IDs back to text.
|
||||||
|
pub fn decode(ids: &[u32]) -> String {
|
||||||
|
get().decode(ids, true)
|
||||||
|
.unwrap_or_else(|e| panic!("detokenization failed: {}", e))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the tokenizer is initialized.
|
||||||
|
pub fn is_initialized() -> bool {
|
||||||
|
TOKENIZER.get().is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tokenize a ConversationEntry with its role and content.
|
||||||
|
pub fn tokenize_conv_entry(entry: &super::context::ConversationEntry) -> Vec<u32> {
|
||||||
|
use super::context::ConversationEntry;
|
||||||
|
match entry {
|
||||||
|
ConversationEntry::System(m) => tokenize_entry("system", m.content_text()),
|
||||||
|
ConversationEntry::Message(m) => tokenize_entry(m.role_str(), m.content_text()),
|
||||||
|
ConversationEntry::Memory { message, .. } => tokenize_entry("memory", message.content_text()),
|
||||||
|
ConversationEntry::Dmn(m) => tokenize_entry("dmn", m.content_text()),
|
||||||
|
ConversationEntry::Thinking(text) => tokenize_entry("thinking", text),
|
||||||
|
ConversationEntry::Log(_) => vec![], // logs don't consume tokens
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -950,6 +950,13 @@ fn main() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize the Qwen tokenizer for direct token generation
|
||||||
|
let tokenizer_path = dirs::home_dir().unwrap_or_default()
|
||||||
|
.join(".consciousness/tokenizer-qwen35.json");
|
||||||
|
if tokenizer_path.exists() {
|
||||||
|
crate::agent::tokenizer::init(&tokenizer_path.to_string_lossy());
|
||||||
|
}
|
||||||
|
|
||||||
let cli = Cli::parse();
|
let cli = Cli::parse();
|
||||||
|
|
||||||
if let Err(e) = cli.command.run() {
|
if let Err(e) = cli.command.run() {
|
||||||
|
|
|
||||||
|
|
@ -345,7 +345,7 @@ where
|
||||||
let mut cumulative: Vec<usize> = Vec::with_capacity(entries.len());
|
let mut cumulative: Vec<usize> = Vec::with_capacity(entries.len());
|
||||||
let mut running = 0;
|
let mut running = 0;
|
||||||
for e in entries {
|
for e in entries {
|
||||||
running += e.tokens;
|
running += e.tokens();
|
||||||
cumulative.push(running);
|
cumulative.push(running);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ impl ConsciousScreen {
|
||||||
};
|
};
|
||||||
mem_children.push(SectionView {
|
mem_children.push(SectionView {
|
||||||
name: key.clone(),
|
name: key.clone(),
|
||||||
tokens: ce.tokens,
|
tokens: ce.tokens(),
|
||||||
content: ce.entry.message().content_text().to_string(),
|
content: ce.entry.message().content_text().to_string(),
|
||||||
children: Vec::new(),
|
children: Vec::new(),
|
||||||
status,
|
status,
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ pub fn section_to_view(section: &ContextSection) -> SectionView {
|
||||||
};
|
};
|
||||||
SectionView {
|
SectionView {
|
||||||
name: ce.entry.label(),
|
name: ce.entry.label(),
|
||||||
tokens: ce.tokens,
|
tokens: ce.tokens(),
|
||||||
content,
|
content,
|
||||||
children: Vec::new(),
|
children: Vec::new(),
|
||||||
status: String::new(),
|
status: String::new(),
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue