Kill tiktoken — all token counting now uses Qwen 3.5 tokenizer
Remove tiktoken-rs dependency, CoreBPE field on Agent, and the msg_token_count() function. All tokenization now goes through the global HuggingFace tokenizer in agent/tokenizer.rs. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
5e4067c04f
commit
67e3228c32
4 changed files with 1 additions and 78 deletions
46
Cargo.lock
generated
46
Cargo.lock
generated
|
|
@ -219,17 +219,6 @@ dependencies = [
|
||||||
"generic-array",
|
"generic-array",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "bstr"
|
|
||||||
version = "1.12.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
|
|
||||||
dependencies = [
|
|
||||||
"memchr",
|
|
||||||
"regex-automata",
|
|
||||||
"serde",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bumpalo"
|
name = "bumpalo"
|
||||||
version = "3.20.2"
|
version = "3.20.2"
|
||||||
|
|
@ -894,17 +883,6 @@ dependencies = [
|
||||||
"regex",
|
"regex",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "fancy-regex"
|
|
||||||
version = "0.13.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
|
|
||||||
dependencies = [
|
|
||||||
"bit-set",
|
|
||||||
"regex-automata",
|
|
||||||
"regex-syntax",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "figment"
|
name = "figment"
|
||||||
version = "0.10.19"
|
version = "0.10.19"
|
||||||
|
|
@ -1965,7 +1943,6 @@ dependencies = [
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"serde_urlencoded",
|
"serde_urlencoded",
|
||||||
"skillratings",
|
"skillratings",
|
||||||
"tiktoken-rs",
|
|
||||||
"tokenizers",
|
"tokenizers",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-rustls",
|
"tokio-rustls",
|
||||||
|
|
@ -2393,12 +2370,6 @@ dependencies = [
|
||||||
"syn 1.0.109",
|
"syn 1.0.109",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "rustc-hash"
|
|
||||||
version = "1.1.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc_version"
|
name = "rustc_version"
|
||||||
version = "0.4.1"
|
version = "0.4.1"
|
||||||
|
|
@ -2784,7 +2755,7 @@ dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"bitflags 2.11.0",
|
"bitflags 2.11.0",
|
||||||
"fancy-regex 0.11.0",
|
"fancy-regex",
|
||||||
"filedescriptor",
|
"filedescriptor",
|
||||||
"finl_unicode",
|
"finl_unicode",
|
||||||
"fixedbitset",
|
"fixedbitset",
|
||||||
|
|
@ -2857,21 +2828,6 @@ dependencies = [
|
||||||
"syn 2.0.117",
|
"syn 2.0.117",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tiktoken-rs"
|
|
||||||
version = "0.9.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3a19830747d9034cd9da43a60eaa8e552dfda7712424aebf187b7a60126bae0d"
|
|
||||||
dependencies = [
|
|
||||||
"anyhow",
|
|
||||||
"base64 0.22.1",
|
|
||||||
"bstr",
|
|
||||||
"fancy-regex 0.13.0",
|
|
||||||
"lazy_static",
|
|
||||||
"regex",
|
|
||||||
"rustc-hash",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "time"
|
name = "time"
|
||||||
version = "0.3.47"
|
version = "0.3.47"
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,6 @@ futures = "0.3"
|
||||||
capnp = "0.25"
|
capnp = "0.25"
|
||||||
capnp-rpc = "0.25"
|
capnp-rpc = "0.25"
|
||||||
|
|
||||||
tiktoken-rs = "0.9.1"
|
|
||||||
tokenizers = "0.21"
|
tokenizers = "0.21"
|
||||||
skillratings = "0.28"
|
skillratings = "0.28"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,6 @@
|
||||||
use crate::agent::api::*;
|
use crate::agent::api::*;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tiktoken_rs::CoreBPE;
|
|
||||||
use crate::agent::tools::working_stack;
|
use crate::agent::tools::working_stack;
|
||||||
|
|
||||||
// --- Context state types ---
|
// --- Context state types ---
|
||||||
|
|
@ -254,26 +253,6 @@ fn lowest_scored_memory(entries: &[ContextEntry]) -> Option<usize> {
|
||||||
.map(|(i, _)| i)
|
.map(|(i, _)| i)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Count the token footprint of a message using BPE tokenization.
|
|
||||||
pub fn msg_token_count(tokenizer: &CoreBPE, msg: &Message) -> usize {
|
|
||||||
let count = |s: &str| tokenizer.encode_with_special_tokens(s).len();
|
|
||||||
let content = msg.content.as_ref().map_or(0, |c| match c {
|
|
||||||
MessageContent::Text(s) => count(s),
|
|
||||||
MessageContent::Parts(parts) => parts.iter()
|
|
||||||
.map(|p| match p {
|
|
||||||
ContentPart::Text { text } => count(text),
|
|
||||||
ContentPart::ImageUrl { .. } => 85,
|
|
||||||
})
|
|
||||||
.sum(),
|
|
||||||
});
|
|
||||||
let tools = msg.tool_calls.as_ref().map_or(0, |calls| {
|
|
||||||
calls.iter()
|
|
||||||
.map(|c| count(&c.function.arguments) + count(&c.function.name))
|
|
||||||
.sum()
|
|
||||||
});
|
|
||||||
content + tools
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Detect context window overflow errors from the API.
|
/// Detect context window overflow errors from the API.
|
||||||
pub fn is_context_overflow(err: &anyhow::Error) -> bool {
|
pub fn is_context_overflow(err: &anyhow::Error) -> bool {
|
||||||
let msg = err.to_string().to_lowercase();
|
let msg = err.to_string().to_lowercase();
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,6 @@ pub mod tools;
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use tiktoken_rs::CoreBPE;
|
|
||||||
|
|
||||||
use api::{ApiClient, ToolCall};
|
use api::{ApiClient, ToolCall};
|
||||||
use api::{ContentPart, Message, MessageContent, Role};
|
use api::{ContentPart, Message, MessageContent, Role};
|
||||||
|
|
@ -163,9 +162,6 @@ pub struct Agent {
|
||||||
pub provenance: String,
|
pub provenance: String,
|
||||||
/// Persistent conversation log — append-only record of all messages.
|
/// Persistent conversation log — append-only record of all messages.
|
||||||
pub conversation_log: Option<ConversationLog>,
|
pub conversation_log: Option<ConversationLog>,
|
||||||
/// BPE tokenizer for token counting (cl100k_base — close enough
|
|
||||||
/// for Claude and Qwen budget allocation, ~85-90% count accuracy).
|
|
||||||
tokenizer: CoreBPE,
|
|
||||||
/// Mutable context state — personality, working stack, etc.
|
/// Mutable context state — personality, working stack, etc.
|
||||||
pub context: ContextState,
|
pub context: ContextState,
|
||||||
/// App config — used to reload identity on compaction and model switching.
|
/// App config — used to reload identity on compaction and model switching.
|
||||||
|
|
@ -193,9 +189,6 @@ impl Agent {
|
||||||
conversation_log: Option<ConversationLog>,
|
conversation_log: Option<ConversationLog>,
|
||||||
active_tools: tools::SharedActiveTools,
|
active_tools: tools::SharedActiveTools,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let tokenizer = tiktoken_rs::cl100k_base()
|
|
||||||
.expect("failed to load cl100k_base tokenizer");
|
|
||||||
|
|
||||||
let mut system = ContextSection::new("System prompt");
|
let mut system = ContextSection::new("System prompt");
|
||||||
system.push(ContextEntry::new(
|
system.push(ContextEntry::new(
|
||||||
ConversationEntry::System(Message::system(&system_prompt)), None));
|
ConversationEntry::System(Message::system(&system_prompt)), None));
|
||||||
|
|
@ -227,7 +220,6 @@ impl Agent {
|
||||||
pending_dmn_pause: false,
|
pending_dmn_pause: false,
|
||||||
provenance: "manual".to_string(),
|
provenance: "manual".to_string(),
|
||||||
conversation_log,
|
conversation_log,
|
||||||
tokenizer,
|
|
||||||
context,
|
context,
|
||||||
app_config,
|
app_config,
|
||||||
prompt_file,
|
prompt_file,
|
||||||
|
|
@ -249,8 +241,6 @@ impl Agent {
|
||||||
/// personality, journal, entries) for KV cache sharing. The caller
|
/// personality, journal, entries) for KV cache sharing. The caller
|
||||||
/// appends the subconscious prompt as a user message and runs the turn.
|
/// appends the subconscious prompt as a user message and runs the turn.
|
||||||
pub fn fork(&self, tools: Vec<tools::Tool>) -> Self {
|
pub fn fork(&self, tools: Vec<tools::Tool>) -> Self {
|
||||||
let tokenizer = tiktoken_rs::cl100k_base()
|
|
||||||
.expect("failed to load cl100k_base tokenizer");
|
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
client: self.client.clone(),
|
client: self.client.clone(),
|
||||||
|
|
@ -267,7 +257,6 @@ impl Agent {
|
||||||
pending_dmn_pause: false,
|
pending_dmn_pause: false,
|
||||||
provenance: self.provenance.clone(),
|
provenance: self.provenance.clone(),
|
||||||
conversation_log: None,
|
conversation_log: None,
|
||||||
tokenizer,
|
|
||||||
context: self.context.clone(),
|
context: self.context.clone(),
|
||||||
app_config: self.app_config.clone(),
|
app_config: self.app_config.clone(),
|
||||||
prompt_file: self.prompt_file.clone(),
|
prompt_file: self.prompt_file.clone(),
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue