ContextState + private AstNode fields: enforce token_ids invariant
AstNode fields are now private with read-only accessors. All mutation goes through ContextState methods (push, set_message, set_score, del) which guarantee token_ids stays in sync with text on every leaf. Also fix ResponseParser to use AstNode::tool_call() constructor, widen parsing module visibility to pub(crate). Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
29dc339f54
commit
6730d136d4
3 changed files with 124 additions and 42 deletions
|
|
@ -7,7 +7,7 @@
|
|||
// Set POC_DEBUG=1 for verbose per-turn logging.
|
||||
|
||||
pub mod http;
|
||||
mod parsing;
|
||||
pub(crate) mod parsing;
|
||||
mod types;
|
||||
mod openai;
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ use super::types::{ToolCall, FunctionCall};
|
|||
/// Looks for `<tool_call>...</tool_call>` blocks and tries both
|
||||
/// XML and JSON formats for the body.
|
||||
/// Parse a single tool call body (content between `<tool_call>` and `</tool_call>`).
|
||||
pub(super) fn parse_tool_call_body(body: &str) -> Option<ToolCall> {
|
||||
pub(crate) fn parse_tool_call_body(body: &str) -> Option<ToolCall> {
|
||||
let normalized = normalize_xml_tags(body);
|
||||
let body = normalized.trim();
|
||||
let mut counter = 0u32;
|
||||
|
|
|
|||
|
|
@ -59,16 +59,14 @@ pub enum NodeBody {
|
|||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AstNode {
|
||||
pub role: Role,
|
||||
pub body: NodeBody,
|
||||
pub timestamp: Option<DateTime<Utc>>,
|
||||
|
||||
// Optional metadata
|
||||
pub memory_key: Option<String>,
|
||||
pub memory_score: Option<f64>,
|
||||
pub tool_name: Option<String>,
|
||||
pub tool_args: Option<String>,
|
||||
pub tool_call_id: Option<String>,
|
||||
role: Role,
|
||||
body: NodeBody,
|
||||
timestamp: Option<DateTime<Utc>>,
|
||||
memory_key: Option<String>,
|
||||
memory_score: Option<f64>,
|
||||
tool_name: Option<String>,
|
||||
tool_args: Option<String>,
|
||||
tool_call_id: Option<String>,
|
||||
}
|
||||
|
||||
impl Role {
|
||||
|
|
@ -195,26 +193,13 @@ impl AstNode {
|
|||
}
|
||||
}
|
||||
|
||||
/// Get mutable children.
|
||||
pub fn children_mut(&mut self) -> Option<&mut Vec<AstNode>> {
|
||||
match &mut self.body {
|
||||
NodeBody::Branch(c) => Some(c),
|
||||
NodeBody::Leaf { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Push a child node. Only valid on Branch nodes.
|
||||
pub fn push_child(&mut self, child: AstNode) {
|
||||
match &mut self.body {
|
||||
NodeBody::Branch(children) => children.push(child),
|
||||
NodeBody::Leaf { .. } => panic!("push_child on leaf node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set score on a Memory node.
|
||||
pub fn set_score(&mut self, score: Option<f64>) {
|
||||
self.memory_score = score;
|
||||
}
|
||||
pub fn role(&self) -> Role { self.role }
|
||||
pub fn timestamp(&self) -> Option<DateTime<Utc>> { self.timestamp }
|
||||
pub fn memory_key(&self) -> Option<&str> { self.memory_key.as_deref() }
|
||||
pub fn memory_score(&self) -> Option<f64> { self.memory_score }
|
||||
pub fn tool_name(&self) -> Option<&str> { self.tool_name.as_deref() }
|
||||
pub fn tool_args(&self) -> Option<&str> { self.tool_args.as_deref() }
|
||||
pub fn tool_call_id(&self) -> Option<&str> { self.tool_call_id.as_deref() }
|
||||
|
||||
/// Short label for the UI.
|
||||
pub fn label(&self) -> String {
|
||||
|
|
@ -392,16 +377,11 @@ impl ResponseParser {
|
|||
self.buf = self.buf[end + 12..].to_string();
|
||||
self.in_tool_call = false;
|
||||
if let Some(call) = super::api::parsing::parse_tool_call_body(&self.tool_call_buf) {
|
||||
let node = AstNode {
|
||||
role: Role::ToolCall,
|
||||
body: NodeBody::Leaf(self.tool_call_buf.clone()),
|
||||
token_ids: vec![], // tokenized when attached to parent
|
||||
timestamp: None,
|
||||
memory_key: None, memory_score: None,
|
||||
tool_name: Some(call.function.name),
|
||||
tool_args: Some(call.function.arguments),
|
||||
tool_call_id: Some(call.id),
|
||||
};
|
||||
let node = AstNode::tool_call(
|
||||
call.id.clone(),
|
||||
call.function.name.clone(),
|
||||
call.function.arguments.clone(),
|
||||
);
|
||||
new_calls.push(node.clone());
|
||||
self.flush_content();
|
||||
self.children.push(node);
|
||||
|
|
@ -489,6 +469,108 @@ impl ResponseParser {
|
|||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ContextState — the full context window
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// The context window: four sections, each a branch AstNode.
|
||||
/// All mutation goes through ContextState methods to maintain the invariant
|
||||
/// that token_ids on every leaf matches its rendered text.
|
||||
pub struct ContextState {
|
||||
system: AstNode,
|
||||
identity: AstNode,
|
||||
journal: AstNode,
|
||||
conversation: AstNode,
|
||||
}
|
||||
|
||||
impl ContextState {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
system: AstNode::branch(Role::SystemSection, vec![]),
|
||||
identity: AstNode::branch(Role::IdentitySection, vec![]),
|
||||
journal: AstNode::branch(Role::JournalSection, vec![]),
|
||||
conversation: AstNode::branch(Role::ConversationSection, vec![]),
|
||||
}
|
||||
}
|
||||
|
||||
// -- Read access ----------------------------------------------------------
|
||||
|
||||
pub fn system(&self) -> &[AstNode] { self.system.children() }
|
||||
pub fn identity(&self) -> &[AstNode] { self.identity.children() }
|
||||
pub fn journal(&self) -> &[AstNode] { self.journal.children() }
|
||||
pub fn conversation(&self) -> &[AstNode] { self.conversation.children() }
|
||||
|
||||
pub fn tokens(&self) -> usize {
|
||||
self.system.tokens()
|
||||
+ self.identity.tokens()
|
||||
+ self.journal.tokens()
|
||||
+ self.conversation.tokens()
|
||||
}
|
||||
|
||||
pub fn token_ids(&self) -> Vec<u32> {
|
||||
let mut ids = self.system.token_ids();
|
||||
ids.extend(self.identity.token_ids());
|
||||
ids.extend(self.journal.token_ids());
|
||||
ids.extend(self.conversation.token_ids());
|
||||
ids
|
||||
}
|
||||
|
||||
pub fn render(&self) -> String {
|
||||
let mut s = self.system.render();
|
||||
s.push_str(&self.identity.render());
|
||||
s.push_str(&self.journal.render());
|
||||
s.push_str(&self.conversation.render());
|
||||
s
|
||||
}
|
||||
|
||||
// -- Mutation --------------------------------------------------------------
|
||||
|
||||
fn section_mut(&mut self, role: Role) -> &mut AstNode {
|
||||
match role {
|
||||
Role::SystemSection => &mut self.system,
|
||||
Role::IdentitySection => &mut self.identity,
|
||||
Role::JournalSection => &mut self.journal,
|
||||
Role::ConversationSection => &mut self.conversation,
|
||||
_ => panic!("not a section role: {:?}", role),
|
||||
}
|
||||
}
|
||||
|
||||
fn children_mut(section: &mut AstNode) -> &mut Vec<AstNode> {
|
||||
match &mut section.body {
|
||||
NodeBody::Branch(c) => c,
|
||||
_ => unreachable!("section is always a branch"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Push a node into a section.
|
||||
pub fn push(&mut self, section: Role, node: AstNode) {
|
||||
let s = self.section_mut(section);
|
||||
Self::children_mut(s).push(node);
|
||||
}
|
||||
|
||||
/// Replace the text content of a leaf at `index` in `section`.
|
||||
/// Re-tokenizes the leaf to maintain the invariant.
|
||||
pub fn set_message(&mut self, section: Role, index: usize, text: impl Into<String>) {
|
||||
let s = self.section_mut(section);
|
||||
let node = &mut Self::children_mut(s)[index];
|
||||
let text = text.into();
|
||||
let token_ids = tokenize_leaf(node.role, &text);
|
||||
node.body = NodeBody::Leaf { text, token_ids };
|
||||
}
|
||||
|
||||
/// Set the memory score on a node at `index` in `section`.
|
||||
pub fn set_score(&mut self, section: Role, index: usize, score: Option<f64>) {
|
||||
let s = self.section_mut(section);
|
||||
Self::children_mut(s)[index].memory_score = score;
|
||||
}
|
||||
|
||||
/// Remove a node at `index` from `section`.
|
||||
pub fn del(&mut self, section: Role, index: usize) -> AstNode {
|
||||
let s = self.section_mut(section);
|
||||
Self::children_mut(s).remove(index)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Context window size
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue