Replace token counting with token generation via HuggingFace tokenizer

Add agent/tokenizer.rs with global Qwen 3.5 tokenizer that generates
actual token IDs including chat template wrapping. ContextEntry now
stores token_ids: Vec<u32> instead of tokens: usize — the count is
derived from the length.

ContextEntry::new() tokenizes automatically via the global tokenizer.
ContextSection::push_entry() takes a raw ConversationEntry and
tokenizes it. set_message() re-tokenizes without needing an external
tokenizer parameter.

Token IDs include the full chat template: <|im_start|>role\ncontent
<|im_end|>\n — so concatenating token_ids across entries produces a
ready-to-send prompt for vLLM's /v1/completions endpoint.

The old tiktoken CoreBPE is now unused on Agent (will be removed in
a followup). Token counts are now exact for Qwen 3.5 instead of the
~85-90% approximation from cl100k_base.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-08 11:20:03 -04:00
parent 70ee7abea5
commit 5e4067c04f
10 changed files with 540 additions and 97 deletions

View file

@ -0,0 +1 @@
{"sessionId":"463c6050-b49f-4509-9d4b-4596af79a90e","pid":11339,"acquiredAt":1775649730868}

379
Cargo.lock generated
View file

@ -13,6 +13,20 @@ dependencies = [
"version_check",
]
[[package]]
name = "ahash"
version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
"cfg-if",
"getrandom 0.3.4",
"once_cell",
"serde",
"version_check",
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.4"
@ -136,6 +150,12 @@ dependencies = [
"fs_extra",
]
[[package]]
name = "base64"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
[[package]]
name = "base64"
version = "0.22.1"
@ -402,6 +422,7 @@ dependencies = [
"itoa",
"rustversion",
"ryu",
"serde",
"static_assertions",
]
@ -476,6 +497,19 @@ dependencies = [
"tokio-util",
]
[[package]]
name = "console"
version = "0.15.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
dependencies = [
"encode_unicode",
"libc",
"once_cell",
"unicode-width",
"windows-sys 0.59.0",
]
[[package]]
name = "convert_case"
version = "0.10.0"
@ -583,14 +617,38 @@ dependencies = [
"phf",
]
[[package]]
name = "darling"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
dependencies = [
"darling_core 0.20.11",
"darling_macro 0.20.11",
]
[[package]]
name = "darling"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
dependencies = [
"darling_core",
"darling_macro",
"darling_core 0.23.0",
"darling_macro 0.23.0",
]
[[package]]
name = "darling_core"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn 2.0.117",
]
[[package]]
@ -606,17 +664,37 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "darling_macro"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core 0.20.11",
"quote",
"syn 2.0.117",
]
[[package]]
name = "darling_macro"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
dependencies = [
"darling_core",
"darling_core 0.23.0",
"quote",
"syn 2.0.117",
]
[[package]]
name = "dary_heap"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04"
dependencies = [
"serde",
]
[[package]]
name = "deltae"
version = "0.3.2"
@ -632,6 +710,37 @@ dependencies = [
"powerfmt",
]
[[package]]
name = "derive_builder"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
dependencies = [
"derive_builder_macro",
]
[[package]]
name = "derive_builder_core"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
dependencies = [
"darling 0.20.11",
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "derive_builder_macro"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [
"derive_builder_core",
"syn 2.0.117",
]
[[package]]
name = "derive_more"
version = "2.1.1"
@ -712,6 +821,12 @@ version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9eb1aa714776b75c7e67e1da744b81a129b3ff919c8712b5e1b32252c1f07cc7"
[[package]]
name = "encode_unicode"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "env_filter"
version = "1.0.1"
@ -751,6 +866,15 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "esaxx-rs"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
dependencies = [
"cc",
]
[[package]]
name = "euclid"
version = "0.22.14"
@ -1017,7 +1141,7 @@ version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash",
"ahash 0.7.8",
]
[[package]]
@ -1173,6 +1297,19 @@ dependencies = [
"serde_core",
]
[[package]]
name = "indicatif"
version = "0.17.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
dependencies = [
"console",
"number_prefix",
"portable-atomic",
"unicode-width",
"web-time",
]
[[package]]
name = "indoc"
version = "2.0.7"
@ -1194,7 +1331,7 @@ version = "0.3.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5eb2d60ef19920a3a9193c3e371f726ec1dafc045dac788d0fb3704272458971"
dependencies = [
"darling",
"darling 0.23.0",
"indoc",
"proc-macro2",
"quote",
@ -1389,6 +1526,22 @@ dependencies = [
"winapi",
]
[[package]]
name = "macro_rules_attribute"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520"
dependencies = [
"macro_rules_attribute-proc_macro",
"paste",
]
[[package]]
name = "macro_rules_attribute-proc_macro"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30"
[[package]]
name = "memchr"
version = "2.8.0"
@ -1437,6 +1590,28 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "monostate"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3341a273f6c9d5bef1908f17b7267bbab0e95c9bf69a0d4dcf8e9e1b2c76ef67"
dependencies = [
"monostate-impl",
"serde",
"serde_core",
]
[[package]]
name = "monostate-impl"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "nix"
version = "0.29.0"
@ -1495,6 +1670,12 @@ dependencies = [
"libc",
]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "once_cell"
version = "1.21.4"
@ -1507,6 +1688,28 @@ version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
name = "onig"
version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [
"bitflags 2.11.0",
"libc",
"once_cell",
"onig_sys",
]
[[package]]
name = "onig_sys"
version = "69.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc"
dependencies = [
"cc",
"pkg-config",
]
[[package]]
name = "openssl-probe"
version = "0.2.1"
@ -1683,7 +1886,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
"phf_shared",
"rand",
"rand 0.8.5",
]
[[package]]
@ -1714,12 +1917,18 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "poc-memory"
version = "0.4.0"
dependencies = [
"anyhow",
"base64",
"base64 0.22.1",
"bincode",
"bytes",
"capnp",
@ -1757,6 +1966,7 @@ dependencies = [
"serde_urlencoded",
"skillratings",
"tiktoken-rs",
"tokenizers",
"tokio",
"tokio-rustls",
"tokio-scoped",
@ -1787,6 +1997,15 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "ppv-lite86"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy",
]
[[package]]
name = "prettyplease"
version = "0.2.37"
@ -1910,7 +2129,27 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"rand_core",
"rand_core 0.6.4",
]
[[package]]
name = "rand"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
"rand_chacha",
"rand_core 0.9.5",
]
[[package]]
name = "rand_chacha"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
"rand_core 0.9.5",
]
[[package]]
@ -1919,6 +2158,15 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]]
name = "rand_core"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
dependencies = [
"getrandom 0.3.4",
]
[[package]]
name = "ratatui"
version = "0.30.0"
@ -2014,6 +2262,17 @@ dependencies = [
"rayon-core",
]
[[package]]
name = "rayon-cond"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f"
dependencies = [
"either",
"itertools",
"rayon",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
@ -2416,6 +2675,18 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "spm_precompiled"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326"
dependencies = [
"base64 0.13.1",
"nom",
"serde",
"unicode-segmentation",
]
[[package]]
name = "static_assertions"
version = "1.1.0"
@ -2511,7 +2782,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4676b37242ccbd1aabf56edb093a4827dc49086c0ffd764a5705899e0f35f8f7"
dependencies = [
"anyhow",
"base64",
"base64 0.22.1",
"bitflags 2.11.0",
"fancy-regex 0.11.0",
"filedescriptor",
@ -2593,7 +2864,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a19830747d9034cd9da43a60eaa8e552dfda7712424aebf187b7a60126bae0d"
dependencies = [
"anyhow",
"base64",
"base64 0.22.1",
"bstr",
"fancy-regex 0.13.0",
"lazy_static",
@ -2637,6 +2908,40 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokenizers"
version = "0.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a620b996116a59e184c2fa2dfd8251ea34a36d0a514758c6f966386bd2e03476"
dependencies = [
"ahash 0.8.12",
"aho-corasick",
"compact_str",
"dary_heap",
"derive_builder",
"esaxx-rs",
"getrandom 0.3.4",
"indicatif",
"itertools",
"log",
"macro_rules_attribute",
"monostate",
"onig",
"paste",
"rand 0.9.2",
"rayon",
"rayon-cond",
"regex",
"regex-syntax",
"serde",
"serde_json",
"spm_precompiled",
"thiserror 2.0.18",
"unicode-normalization-alignments",
"unicode-segmentation",
"unicode_categories",
]
[[package]]
name = "tokio"
version = "1.51.0"
@ -2774,6 +3079,15 @@ version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "unicode-normalization-alignments"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
dependencies = [
"smallvec",
]
[[package]]
name = "unicode-segmentation"
version = "1.13.2"
@ -2803,6 +3117,12 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "unicode_categories"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "untrusted"
version = "0.9.0"
@ -2954,6 +3274,16 @@ dependencies = [
"semver",
]
[[package]]
name = "web-time"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "webpki-roots"
version = "1.0.6"
@ -3125,6 +3455,15 @@ dependencies = [
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.61.2"
@ -3301,6 +3640,26 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]]
name = "zerocopy"
version = "0.8.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "zeroize"
version = "1.8.2"

View file

@ -60,6 +60,7 @@ capnp = "0.25"
capnp-rpc = "0.25"
tiktoken-rs = "0.9.1"
tokenizers = "0.21"
skillratings = "0.28"
http = "1"

View file

@ -33,12 +33,25 @@ pub enum ConversationEntry {
#[derive(Debug, Clone)]
pub struct ContextEntry {
pub entry: ConversationEntry,
/// Cached token count (0 for Log entries).
pub tokens: usize,
/// Cached tokenization — the actual token IDs for this entry's
/// contribution to the prompt (including chat template wrapping).
/// Empty for Log entries.
pub token_ids: Vec<u32>,
/// When this entry was added to the context.
pub timestamp: Option<DateTime<Utc>>,
}
impl ContextEntry {
/// Create a new entry, tokenizing via the global tokenizer.
pub fn new(entry: ConversationEntry, timestamp: Option<DateTime<Utc>>) -> Self {
let token_ids = super::tokenizer::tokenize_conv_entry(&entry);
Self { entry, token_ids, timestamp }
}
/// Token count — derived from cached token_ids length.
pub fn tokens(&self) -> usize { self.token_ids.len() }
}
/// A named section of the context window with cached token total.
#[derive(Debug, Clone)]
pub struct ContextSection {
@ -58,32 +71,40 @@ impl ContextSection {
pub fn len(&self) -> usize { self.entries.len() }
pub fn is_empty(&self) -> bool { self.entries.is_empty() }
/// Push an entry, updating the cached token total.
/// Push a ConversationEntry, tokenizing it and updating the total.
pub fn push_entry(&mut self, entry: ConversationEntry, timestamp: Option<DateTime<Utc>>) {
let ce = ContextEntry::new(entry, timestamp);
self.tokens += ce.tokens();
self.entries.push(ce);
}
/// Push a pre-built ContextEntry (for restore, cloning, etc).
pub fn push(&mut self, entry: ContextEntry) {
self.tokens += entry.tokens;
self.tokens += entry.tokens();
self.entries.push(entry);
}
/// Replace an entry at `index`, adjusting the token total.
pub fn set(&mut self, index: usize, entry: ContextEntry) {
self.tokens -= self.entries[index].tokens;
self.tokens += entry.tokens;
self.tokens -= self.entries[index].tokens();
self.tokens += entry.tokens();
self.entries[index] = entry;
}
/// Remove an entry at `index`, adjusting the token total.
pub fn del(&mut self, index: usize) -> ContextEntry {
let removed = self.entries.remove(index);
self.tokens -= removed.tokens;
self.tokens -= removed.tokens();
removed
}
/// Replace the message inside an entry, recomputing its token count.
pub fn set_message(&mut self, index: usize, tokenizer: &CoreBPE, msg: Message) {
let old_tokens = self.entries[index].tokens;
/// Replace the message inside an entry, re-tokenizing it.
pub fn set_message(&mut self, index: usize, msg: Message) {
let old_tokens = self.entries[index].tokens();
*self.entries[index].entry.message_mut() = msg;
let new_tokens = msg_token_count(tokenizer, self.entries[index].entry.api_message());
self.entries[index].tokens = new_tokens;
self.entries[index].token_ids = super::tokenizer::tokenize_conv_entry(
&self.entries[index].entry);
let new_tokens = self.entries[index].tokens();
self.tokens = self.tokens - old_tokens + new_tokens;
}
@ -96,7 +117,7 @@ impl ContextSection {
/// Bulk replace all entries, recomputing token total.
pub fn set_entries(&mut self, entries: Vec<ContextEntry>) {
self.tokens = entries.iter().map(|e| e.tokens).sum();
self.tokens = entries.iter().map(|e| e.tokens()).sum();
self.entries = entries;
}
@ -104,7 +125,7 @@ impl ContextSection {
pub fn trim(&mut self, fixed_tokens: usize) {
let result = trim_entries(&self.entries, fixed_tokens);
self.entries = result;
self.tokens = self.entries.iter().map(|e| e.tokens).sum();
self.tokens = self.entries.iter().map(|e| e.tokens()).sum();
}
/// Clear all entries.
@ -189,9 +210,9 @@ fn trim_entries(entries: &[ContextEntry], fixed_tokens: usize) -> Vec<ContextEnt
.map(|(_, e)| e.clone())
.collect();
let entry_total = |r: &[ContextEntry]| -> usize { r.iter().map(|e| e.tokens).sum::<usize>() };
let entry_total = |r: &[ContextEntry]| -> usize { r.iter().map(|e| e.tokens()).sum::<usize>() };
let mem_total = |r: &[ContextEntry]| -> usize {
r.iter().filter(|e| e.entry.is_memory()).map(|e| e.tokens).sum()
r.iter().filter(|e| e.entry.is_memory()).map(|e| e.tokens()).sum()
};
dbglog!("[trim] max={} fixed={} total={} entries={}",

View file

@ -16,6 +16,7 @@
pub mod api;
pub mod context;
pub mod oneshot;
pub mod tokenizer;
pub mod tools;
use std::sync::Arc;
@ -196,19 +197,12 @@ impl Agent {
.expect("failed to load cl100k_base tokenizer");
let mut system = ContextSection::new("System prompt");
system.push(ContextEntry {
entry: ConversationEntry::System(Message::system(&system_prompt)),
tokens: context::msg_token_count(&tokenizer, &Message::system(&system_prompt)),
timestamp: None,
});
system.push(ContextEntry::new(
ConversationEntry::System(Message::system(&system_prompt)), None));
let mut identity = ContextSection::new("Identity");
for (_name, content) in &personality {
let msg = Message::user(content);
identity.push(ContextEntry {
tokens: context::msg_token_count(&tokenizer, &msg),
entry: ConversationEntry::Message(msg),
timestamp: None,
});
identity.push(ContextEntry::new(
ConversationEntry::Message(Message::user(content)), None));
}
let context = ContextState {
system,
@ -324,12 +318,8 @@ impl Agent {
eprintln!("warning: failed to log entry: {:#}", e);
}
}
let tokens = if entry.is_log() || entry.is_thinking() { 0 } else {
context::msg_token_count(&self.tokenizer, entry.api_message())
};
self.context.conversation.push(ContextEntry {
entry, tokens, timestamp: Some(chrono::Utc::now()),
});
self.context.conversation.push(ContextEntry::new(
entry, Some(chrono::Utc::now())));
self.changed.notify_one();
}
@ -348,22 +338,19 @@ impl Agent {
if let Some(idx) = self.streaming_index() {
let mut msg = self.context.conversation.entries()[idx].entry.message().clone();
msg.append_content(text);
self.context.conversation.set_message(idx, &self.tokenizer, msg);
self.context.conversation.set_message(idx, msg);
} else {
let msg = Message {
self.context.conversation.push(ContextEntry::new(
ConversationEntry::Message(Message {
role: Role::Assistant,
content: Some(MessageContent::Text(text.to_string())),
tool_calls: None,
tool_call_id: None,
name: None,
timestamp: None,
};
let tokens = context::msg_token_count(&self.tokenizer, &msg);
self.context.conversation.push(ContextEntry {
entry: ConversationEntry::Message(msg),
tokens,
timestamp: None,
});
}),
None,
));
}
self.changed.notify_one();
@ -375,12 +362,10 @@ impl Agent {
if let Some(i) = self.streaming_index() {
let mut stamped = msg.clone();
stamped.stamp();
let tokens = context::msg_token_count(&self.tokenizer, &stamped);
self.context.conversation.set(i, ContextEntry {
entry: ConversationEntry::Message(stamped),
tokens,
timestamp: Some(chrono::Utc::now()),
});
self.context.conversation.set(i, ContextEntry::new(
ConversationEntry::Message(stamped),
Some(chrono::Utc::now()),
));
} else {
self.push_message(msg.clone());
}
@ -770,16 +755,15 @@ impl Agent {
for node in journal_nodes[..cutoff_idx].iter().rev() {
let msg = Message::user(&node.content);
let tokens = context::msg_token_count(&self.tokenizer, &msg);
if total_tokens + tokens > journal_budget && !journal_entries.is_empty() {
let ce = ContextEntry::new(
ConversationEntry::Message(msg),
chrono::DateTime::from_timestamp(node.created_at, 0),
);
if total_tokens + ce.tokens() > journal_budget && !journal_entries.is_empty() {
break;
}
journal_entries.push(ContextEntry {
entry: ConversationEntry::Message(msg),
tokens,
timestamp: chrono::DateTime::from_timestamp(node.created_at, 0),
});
total_tokens += tokens;
total_tokens += ce.tokens();
journal_entries.push(ce);
}
journal_entries.reverse();
dbg_log!("[journal] loaded {} entries, {} tokens", journal_entries.len(), total_tokens);
@ -842,12 +826,10 @@ impl Agent {
}
let mut new_msg = msg.clone();
new_msg.content = Some(MessageContent::Text(replacement));
let tokens = context::msg_token_count(&self.tokenizer, &new_msg);
self.context.conversation.set(i, ContextEntry {
entry: ConversationEntry::Message(new_msg),
tokens,
timestamp: old.timestamp,
});
self.context.conversation.set(i, ContextEntry::new(
ConversationEntry::Message(new_msg),
old.timestamp,
));
}
}
self.generation += 1;
@ -866,19 +848,12 @@ impl Agent {
match crate::config::reload_for_model(&self.app_config, &self.prompt_file) {
Ok((system_prompt, personality)) => {
self.context.system.clear();
self.context.system.push(ContextEntry {
entry: ConversationEntry::System(Message::system(&system_prompt)),
tokens: context::msg_token_count(&self.tokenizer, &Message::system(&system_prompt)),
timestamp: None,
});
self.context.system.push(ContextEntry::new(
ConversationEntry::System(Message::system(&system_prompt)), None));
self.context.identity.clear();
for (_name, content) in &personality {
let msg = Message::user(content);
self.context.identity.push(ContextEntry {
tokens: context::msg_token_count(&self.tokenizer, &msg),
entry: ConversationEntry::Message(msg),
timestamp: None,
});
self.context.identity.push(ContextEntry::new(
ConversationEntry::Message(Message::user(content)), None));
}
}
Err(e) => {
@ -932,16 +907,13 @@ impl Agent {
let all: Vec<ContextEntry> = entries.into_iter()
.filter(|e| !e.is_log() && !e.is_thinking() && e.message().role != Role::System)
.map(|e| {
let tokens = if e.is_log() { 0 } else {
context::msg_token_count(&self.tokenizer, e.api_message())
};
let timestamp = if e.is_log() { None } else {
let timestamp = if e.is_log() || e.is_thinking() { None } else {
e.message().timestamp.as_ref().and_then(|ts| {
chrono::DateTime::parse_from_rfc3339(ts).ok()
.map(|dt| dt.with_timezone(&chrono::Utc))
})
};
ContextEntry { entry: e, tokens, timestamp }
ContextEntry::new(e, timestamp)
})
.collect();
let mem_count = all.iter().filter(|e| e.entry.is_memory()).count();

82
src/agent/tokenizer.rs Normal file
View file

@ -0,0 +1,82 @@
// tokenizer.rs — Qwen tokenizer for direct token generation
//
// Loads the HuggingFace tokenizer.json for the target model and provides
// tokenization for context entries. The tokenizer is loaded once globally
// and shared across all callers.
//
// Token IDs include the chat template wrapping:
// <|im_start|>role\ncontent<|im_end|>\n
// so concatenating token_ids across entries produces a ready-to-send prompt.
use std::sync::OnceLock;
use tokenizers::Tokenizer;
static TOKENIZER: OnceLock<Tokenizer> = OnceLock::new();
/// Special token IDs for Qwen 3.5
pub const IM_START: u32 = 248045;
pub const IM_END: u32 = 248046;
/// Initialize the global tokenizer from a file path.
/// Call once at startup. Panics if the file can't be loaded.
pub fn init(path: &str) {
let t = Tokenizer::from_file(path)
.unwrap_or_else(|e| panic!("failed to load tokenizer from {}: {}", path, e));
TOKENIZER.set(t).ok();
}
/// Get the global tokenizer. Panics if not initialized.
fn get() -> &'static Tokenizer {
TOKENIZER.get().expect("tokenizer not initialized — call tokenizer::init() first")
}
/// Tokenize a raw string, returning token IDs.
pub fn encode(text: &str) -> Vec<u32> {
get().encode(text, false)
.unwrap_or_else(|e| panic!("tokenization failed: {}", e))
.get_ids()
.to_vec()
}
/// Tokenize a chat entry with template wrapping:
/// <|im_start|>role\ncontent<|im_end|>\n
/// Returns the complete token ID sequence for this entry.
pub fn tokenize_entry(role: &str, content: &str) -> Vec<u32> {
let mut ids = Vec::new();
ids.push(IM_START);
ids.extend(encode(role));
ids.extend(encode("\n"));
ids.extend(encode(content));
ids.push(IM_END);
ids.extend(encode("\n"));
ids
}
/// Count tokens for a string (convenience for budget checks).
pub fn count(text: &str) -> usize {
encode(text).len()
}
/// Decode token IDs back to text.
pub fn decode(ids: &[u32]) -> String {
get().decode(ids, true)
.unwrap_or_else(|e| panic!("detokenization failed: {}", e))
}
/// Check if the tokenizer is initialized.
pub fn is_initialized() -> bool {
TOKENIZER.get().is_some()
}
/// Tokenize a ConversationEntry with its role and content.
pub fn tokenize_conv_entry(entry: &super::context::ConversationEntry) -> Vec<u32> {
use super::context::ConversationEntry;
match entry {
ConversationEntry::System(m) => tokenize_entry("system", m.content_text()),
ConversationEntry::Message(m) => tokenize_entry(m.role_str(), m.content_text()),
ConversationEntry::Memory { message, .. } => tokenize_entry("memory", message.content_text()),
ConversationEntry::Dmn(m) => tokenize_entry("dmn", m.content_text()),
ConversationEntry::Thinking(text) => tokenize_entry("thinking", text),
ConversationEntry::Log(_) => vec![], // logs don't consume tokens
}
}

View file

@ -950,6 +950,13 @@ fn main() {
return;
}
// Initialize the Qwen tokenizer for direct token generation
let tokenizer_path = dirs::home_dir().unwrap_or_default()
.join(".consciousness/tokenizer-qwen35.json");
if tokenizer_path.exists() {
crate::agent::tokenizer::init(&tokenizer_path.to_string_lossy());
}
let cli = Cli::parse();
if let Err(e) = cli.command.run() {

View file

@ -345,7 +345,7 @@ where
let mut cumulative: Vec<usize> = Vec::with_capacity(entries.len());
let mut running = 0;
for e in entries {
running += e.tokens;
running += e.tokens();
cumulative.push(running);
}

View file

@ -50,7 +50,7 @@ impl ConsciousScreen {
};
mem_children.push(SectionView {
name: key.clone(),
tokens: ce.tokens,
tokens: ce.tokens(),
content: ce.entry.message().content_text().to_string(),
children: Vec::new(),
status,

View file

@ -33,7 +33,7 @@ pub fn section_to_view(section: &ContextSection) -> SectionView {
};
SectionView {
name: ce.entry.label(),
tokens: ce.tokens,
tokens: ce.tokens(),
content,
children: Vec::new(),
status: String::new(),