ContextState + private AstNode fields: enforce token_ids invariant

AstNode fields are now private with read-only accessors. All mutation
goes through ContextState methods (push, set_message, set_score, del)
which guarantee token_ids stays in sync with text on every leaf.

Also fix ResponseParser to use AstNode::tool_call() constructor,
widen parsing module visibility to pub(crate).

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-08 12:58:59 -04:00
parent 29dc339f54
commit 6730d136d4
3 changed files with 124 additions and 42 deletions

View file

@ -7,7 +7,7 @@
// Set POC_DEBUG=1 for verbose per-turn logging.
pub mod http;
mod parsing;
pub(crate) mod parsing;
mod types;
mod openai;

View file

@ -17,7 +17,7 @@ use super::types::{ToolCall, FunctionCall};
/// Looks for `<tool_call>...</tool_call>` blocks and tries both
/// XML and JSON formats for the body.
/// Parse a single tool call body (content between `<tool_call>` and `</tool_call>`).
pub(super) fn parse_tool_call_body(body: &str) -> Option<ToolCall> {
pub(crate) fn parse_tool_call_body(body: &str) -> Option<ToolCall> {
let normalized = normalize_xml_tags(body);
let body = normalized.trim();
let mut counter = 0u32;

View file

@ -59,16 +59,14 @@ pub enum NodeBody {
#[derive(Debug, Clone)]
pub struct AstNode {
pub role: Role,
pub body: NodeBody,
pub timestamp: Option<DateTime<Utc>>,
// Optional metadata
pub memory_key: Option<String>,
pub memory_score: Option<f64>,
pub tool_name: Option<String>,
pub tool_args: Option<String>,
pub tool_call_id: Option<String>,
role: Role,
body: NodeBody,
timestamp: Option<DateTime<Utc>>,
memory_key: Option<String>,
memory_score: Option<f64>,
tool_name: Option<String>,
tool_args: Option<String>,
tool_call_id: Option<String>,
}
impl Role {
@ -195,26 +193,13 @@ impl AstNode {
}
}
/// Get mutable children.
pub fn children_mut(&mut self) -> Option<&mut Vec<AstNode>> {
match &mut self.body {
NodeBody::Branch(c) => Some(c),
NodeBody::Leaf { .. } => None,
}
}
/// Push a child node. Only valid on Branch nodes.
pub fn push_child(&mut self, child: AstNode) {
match &mut self.body {
NodeBody::Branch(children) => children.push(child),
NodeBody::Leaf { .. } => panic!("push_child on leaf node"),
}
}
/// Set score on a Memory node.
pub fn set_score(&mut self, score: Option<f64>) {
self.memory_score = score;
}
pub fn role(&self) -> Role { self.role }
pub fn timestamp(&self) -> Option<DateTime<Utc>> { self.timestamp }
pub fn memory_key(&self) -> Option<&str> { self.memory_key.as_deref() }
pub fn memory_score(&self) -> Option<f64> { self.memory_score }
pub fn tool_name(&self) -> Option<&str> { self.tool_name.as_deref() }
pub fn tool_args(&self) -> Option<&str> { self.tool_args.as_deref() }
pub fn tool_call_id(&self) -> Option<&str> { self.tool_call_id.as_deref() }
/// Short label for the UI.
pub fn label(&self) -> String {
@ -392,16 +377,11 @@ impl ResponseParser {
self.buf = self.buf[end + 12..].to_string();
self.in_tool_call = false;
if let Some(call) = super::api::parsing::parse_tool_call_body(&self.tool_call_buf) {
let node = AstNode {
role: Role::ToolCall,
body: NodeBody::Leaf(self.tool_call_buf.clone()),
token_ids: vec![], // tokenized when attached to parent
timestamp: None,
memory_key: None, memory_score: None,
tool_name: Some(call.function.name),
tool_args: Some(call.function.arguments),
tool_call_id: Some(call.id),
};
let node = AstNode::tool_call(
call.id.clone(),
call.function.name.clone(),
call.function.arguments.clone(),
);
new_calls.push(node.clone());
self.flush_content();
self.children.push(node);
@ -489,6 +469,108 @@ impl ResponseParser {
}
}
// ---------------------------------------------------------------------------
// ContextState — the full context window
// ---------------------------------------------------------------------------
/// The context window: four sections, each a branch AstNode.
/// All mutation goes through ContextState methods to maintain the invariant
/// that token_ids on every leaf matches its rendered text.
pub struct ContextState {
system: AstNode,
identity: AstNode,
journal: AstNode,
conversation: AstNode,
}
impl ContextState {
pub fn new() -> Self {
Self {
system: AstNode::branch(Role::SystemSection, vec![]),
identity: AstNode::branch(Role::IdentitySection, vec![]),
journal: AstNode::branch(Role::JournalSection, vec![]),
conversation: AstNode::branch(Role::ConversationSection, vec![]),
}
}
// -- Read access ----------------------------------------------------------
pub fn system(&self) -> &[AstNode] { self.system.children() }
pub fn identity(&self) -> &[AstNode] { self.identity.children() }
pub fn journal(&self) -> &[AstNode] { self.journal.children() }
pub fn conversation(&self) -> &[AstNode] { self.conversation.children() }
pub fn tokens(&self) -> usize {
self.system.tokens()
+ self.identity.tokens()
+ self.journal.tokens()
+ self.conversation.tokens()
}
pub fn token_ids(&self) -> Vec<u32> {
let mut ids = self.system.token_ids();
ids.extend(self.identity.token_ids());
ids.extend(self.journal.token_ids());
ids.extend(self.conversation.token_ids());
ids
}
pub fn render(&self) -> String {
let mut s = self.system.render();
s.push_str(&self.identity.render());
s.push_str(&self.journal.render());
s.push_str(&self.conversation.render());
s
}
// -- Mutation --------------------------------------------------------------
fn section_mut(&mut self, role: Role) -> &mut AstNode {
match role {
Role::SystemSection => &mut self.system,
Role::IdentitySection => &mut self.identity,
Role::JournalSection => &mut self.journal,
Role::ConversationSection => &mut self.conversation,
_ => panic!("not a section role: {:?}", role),
}
}
fn children_mut(section: &mut AstNode) -> &mut Vec<AstNode> {
match &mut section.body {
NodeBody::Branch(c) => c,
_ => unreachable!("section is always a branch"),
}
}
/// Push a node into a section.
pub fn push(&mut self, section: Role, node: AstNode) {
let s = self.section_mut(section);
Self::children_mut(s).push(node);
}
/// Replace the text content of a leaf at `index` in `section`.
/// Re-tokenizes the leaf to maintain the invariant.
pub fn set_message(&mut self, section: Role, index: usize, text: impl Into<String>) {
let s = self.section_mut(section);
let node = &mut Self::children_mut(s)[index];
let text = text.into();
let token_ids = tokenize_leaf(node.role, &text);
node.body = NodeBody::Leaf { text, token_ids };
}
/// Set the memory score on a node at `index` in `section`.
pub fn set_score(&mut self, section: Role, index: usize, score: Option<f64>) {
let s = self.section_mut(section);
Self::children_mut(s)[index].memory_score = score;
}
/// Remove a node at `index` from `section`.
pub fn del(&mut self, section: Role, index: usize) -> AstNode {
let s = self.section_mut(section);
Self::children_mut(s).remove(index)
}
}
// ---------------------------------------------------------------------------
// Context window size
// ---------------------------------------------------------------------------