consciousness/Cargo.toml
Kent Overstreet 5e4067c04f Replace token counting with token generation via HuggingFace tokenizer
Add agent/tokenizer.rs with global Qwen 3.5 tokenizer that generates
actual token IDs including chat template wrapping. ContextEntry now
stores token_ids: Vec<u32> instead of tokens: usize — the count is
derived from the length.

ContextEntry::new() tokenizes automatically via the global tokenizer.
ContextSection::push_entry() takes a raw ConversationEntry and
tokenizes it. set_message() re-tokenizes without needing an external
tokenizer parameter.

Token IDs include the full chat template: <|im_start|>role\ncontent
<|im_end|>\n — so concatenating token_ids across entries produces a
ready-to-send prompt for vLLM's /v1/completions endpoint.

The old tiktoken CoreBPE is now unused on Agent (will be removed in
a followup). Token counts are now exact for Qwen 3.5 instead of the
~85-90% approximation from cl100k_base.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-08 11:20:03 -04:00

119 lines
2.4 KiB
TOML

[workspace]
members = ["channels/irc", "channels/telegram", "channels/tmux", "channels/socat"]
resolver = "2"
[workspace.package]
version = "0.4.0"
edition = "2024"
[profile.release]
opt-level = 2
debug = 1
[profile.release.package."*"]
debug = false
[package]
name = "poc-memory"
version.workspace = true
edition.workspace = true
[dependencies]
anyhow = "1"
clap = { version = "4", features = ["derive"] }
figment = { version = "0.10", features = ["env"] }
dirs = "6"
env_logger = "0.11"
log = "0.4"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
json5 = "1.3"
crossterm = { version = "0.29", features = ["event-stream"] }
ratatui = { version = "0.30", features = ["unstable-rendered-line-info"] }
tui-markdown = { git = "https://github.com/koverstreet/tui-markdown", subdirectory = "tui-markdown", default-features = false }
tui-textarea = { version = "0.10.2", package = "tui-textarea-2" }
uuid = { version = "1", features = ["v4"] }
bincode = "1"
regex = "1"
glob = "0.3"
chrono = "0.4"
libc = "0.2"
memchr = "2"
memmap2 = "0.9"
peg = "0.8"
paste = "1"
redb = "4"
rkyv = { version = "0.7", features = ["validation", "std"] }
rayon = "1"
jobkit = { git = "https://evilpiepirate.org/git/jobkit.git", features = ["daemon"] }
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
tokio-scoped = "0.2.0"
futures = "0.3"
capnp = "0.25"
capnp-rpc = "0.25"
tiktoken-rs = "0.9.1"
tokenizers = "0.21"
skillratings = "0.28"
http = "1"
hyper = { version = "1", features = ["client", "http1"] }
hyper-util = { version = "0.1", features = ["tokio"], default-features = false }
http-body-util = "0.1"
bytes = "1"
base64 = "0.22"
rustls = "0.23"
tokio-rustls = "0.26"
rustls-native-certs = "0.8"
serde_urlencoded = "0.7"
[build-dependencies]
capnpc = "0.25"
[lib]
name = "poc_memory"
path = "src/lib.rs"
[[bin]]
name = "consciousness"
path = "src/bin/consciousness.rs"
[[bin]]
name = "poc-memory"
path = "src/main.rs"
[[bin]]
name = "merge-logs"
path = "src/bin/merge-logs.rs"
[[bin]]
name = "diag-key"
path = "src/bin/diag-key.rs"
[[bin]]
name = "find-deleted"
path = "src/bin/find-deleted.rs"
[[bin]]
name = "poc-hook"
path = "src/claude/poc-hook.rs"
[[bin]]
name = "poc-daemon"
path = "src/claude/poc-daemon.rs"
[[bin]]
name = "memory-search"
path = "src/claude/memory-search.rs"
[[bin]]
name = "consciousness-mcp"
path = "src/claude/mcp-server.rs"