// context.rs — Context window as an AST // // The context window is a tree of AstNodes. Each node is either a leaf // (typed content with cached token IDs) or a branch (role + children). // The full prompt is a depth-first traversal of the sections in ContextState. // Streaming responses are parsed into new nodes by the ResponseParser. // // Grammar (EBNF): // // context = section* ; // section = (message | leaf)* ; // message = IM_START role "\n" element* IM_END "\n" ; // role = "system" | "user" | "assistant" ; // element = thinking | tool_call | content ; // thinking = "" TEXT "" ; // tool_call = "\n" tool_xml "\n" ; // tool_xml = "\n" param* "" ; // param = "\n" VALUE "\n\n" ; // content = TEXT ; // // Self-wrapping leaves (not inside a message branch): // dmn = IM_START "dmn\n" TEXT IM_END "\n" ; // memory = IM_START "memory\n" TEXT IM_END "\n" ; // tool_result = IM_START "tool\n" TEXT IM_END "\n" ; // // Non-visible leaves (not in prompt): // log = TEXT ; // // Role is only for branch (interior) nodes. Leaf type is determined by // the NodeBody variant. Grammar constraints enforced by construction. use chrono::{DateTime, Utc}; use serde::{Serialize, Deserialize}; use super::tokenizer; // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- /// Branch roles — maps directly to the grammar's message roles. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub enum Role { System, User, Assistant, } /// Leaf content — each variant knows how to render itself. #[derive(Debug, Clone, Serialize, Deserialize)] pub enum NodeBody { // Children of message branches — rendered without im_start/im_end Content(String), Thinking(String), ToolCall { name: String, arguments: String }, // Self-wrapping leaves — render their own im_start/im_end ToolResult(String), Memory { key: String, text: String, score: Option }, Dmn(String), // Non-visible (0 tokens in prompt) Log(String), } /// A leaf node: typed content with cached token IDs. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NodeLeaf { body: NodeBody, #[serde(skip)] token_ids: Vec, timestamp: Option>, } /// A node in the context AST. #[derive(Debug, Clone, Serialize, Deserialize)] pub enum AstNode { Leaf(NodeLeaf), Branch { role: Role, children: Vec }, } /// The context window: four sections as Vec. /// All mutation goes through ContextState methods to maintain the invariant /// that token_ids on every leaf matches its rendered text. #[derive(Clone)] pub struct ContextState { system: Vec, identity: Vec, journal: Vec, conversation: Vec, } /// Identifies a section for mutation methods. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Section { System, Identity, Journal, Conversation, } /// Ephemeral handle for dispatching a tool call. Not persisted in the AST. #[derive(Debug, Clone)] pub struct PendingToolCall { pub name: String, pub arguments: String, pub id: String, } pub trait Ast { fn render(&self) -> String; fn token_ids(&self) -> Vec; fn tokens(&self) -> usize; } pub struct ResponseParser { branch_idx: usize, call_counter: u32, buf: String, content_parts: Vec, in_think: bool, think_buf: String, in_tool_call: bool, tool_call_buf: String, } impl Role { pub fn as_str(&self) -> &'static str { match self { Self::System => "system", Self::User => "user", Self::Assistant => "assistant", } } } impl NodeBody { /// Render this leaf body to text for the prompt. fn render_into(&self, out: &mut String) { match self { Self::Content(text) => out.push_str(text), Self::Thinking(_) => {}, Self::Log(_) => {}, Self::ToolCall { name, arguments } => { out.push_str("\n"); out.push_str(&format_tool_call_xml(name, arguments)); out.push_str("\n\n"); } Self::ToolResult(text) => { out.push_str("<|im_start|>tool\n"); out.push_str(text); out.push_str("<|im_end|>\n"); } Self::Memory { text, .. } => { out.push_str("<|im_start|>memory\n"); out.push_str(text); out.push_str("<|im_end|>\n"); } Self::Dmn(text) => { out.push_str("<|im_start|>dmn\n"); out.push_str(text); out.push_str("<|im_end|>\n"); } } } /// Whether this leaf contributes tokens to the prompt. fn render(&self) -> String { let mut s = String::new(); self.render_into(&mut s); s } fn is_prompt_visible(&self) -> bool { !matches!(self, Self::Thinking(_) | Self::Log(_)) } /// The text content of this leaf (for display, not rendering). pub fn text(&self) -> &str { match self { Self::Content(t) | Self::Thinking(t) | Self::Log(t) | Self::ToolResult(t) | Self::Dmn(t) => t, Self::ToolCall { name, .. } => name, Self::Memory { text, .. } => text, } } } impl NodeLeaf { fn new(body: NodeBody) -> Self { let token_ids = if body.is_prompt_visible() { tokenizer::encode(&body.render()) } else { vec![] }; Self { body, token_ids, timestamp: None } } pub fn with_timestamp(mut self, ts: DateTime) -> Self { self.timestamp = Some(ts); self } pub fn body(&self) -> &NodeBody { &self.body } pub fn token_ids(&self) -> &[u32] { &self.token_ids } pub fn tokens(&self) -> usize { self.token_ids.len() } pub fn timestamp(&self) -> Option> { self.timestamp } } impl AstNode { // -- Leaf constructors ---------------------------------------------------- pub fn content(text: impl Into) -> Self { Self::Leaf(NodeLeaf::new(NodeBody::Content(text.into()))) } pub fn thinking(text: impl Into) -> Self { Self::Leaf(NodeLeaf::new(NodeBody::Thinking(text.into()))) } pub fn tool_call(name: impl Into, arguments: impl Into) -> Self { Self::Leaf(NodeLeaf::new(NodeBody::ToolCall { name: name.into(), arguments: arguments.into(), })) } pub fn tool_result(text: impl Into) -> Self { Self::Leaf(NodeLeaf::new(NodeBody::ToolResult(text.into()))) } pub fn memory(key: impl Into, text: impl Into) -> Self { Self::Leaf(NodeLeaf::new(NodeBody::Memory { key: key.into(), text: text.into(), score: None, })) } pub fn dmn(text: impl Into) -> Self { Self::Leaf(NodeLeaf::new(NodeBody::Dmn(text.into()))) } pub fn log(text: impl Into) -> Self { Self::Leaf(NodeLeaf::new(NodeBody::Log(text.into()))) } // -- Branch constructors -------------------------------------------------- pub fn branch(role: Role, children: Vec) -> Self { Self::Branch { role, children } } pub fn system_msg(text: impl Into) -> Self { Self::Branch { role: Role::System, children: vec![Self::content(text)], } } pub fn user_msg(text: impl Into) -> Self { Self::Branch { role: Role::User, children: vec![Self::content(text)], } } // -- Builder -------------------------------------------------------------- pub fn with_timestamp(mut self, ts: DateTime) -> Self { match &mut self { Self::Leaf(leaf) => leaf.timestamp = Some(ts), Self::Branch { .. } => {} } self } pub fn children(&self) -> &[AstNode] { match self { Self::Branch { children, .. } => children, Self::Leaf(_) => &[], } } pub fn leaf(&self) -> Option<&NodeLeaf> { match self { Self::Leaf(l) => Some(l), _ => None, } } /// Short label for the UI. pub fn label(&self) -> String { let cfg = crate::config::get(); match self { Self::Branch { role, children } => { let preview = children.first() .and_then(|c| c.leaf()) .map(|l| truncate_preview(l.body.text(), 60)) .unwrap_or_default(); match role { Role::System => "system".into(), Role::User => format!("{}: {}", cfg.user_name, preview), Role::Assistant => format!("{}: {}", cfg.assistant_name, preview), } } Self::Leaf(leaf) => match &leaf.body { NodeBody::Content(t) => truncate_preview(t, 60), NodeBody::Thinking(t) => format!("thinking: {}", truncate_preview(t, 60)), NodeBody::ToolCall { name, .. } => format!("tool_call: {}", name), NodeBody::ToolResult(_) => "tool_result".into(), NodeBody::Memory { key, score, .. } => match score { Some(s) => format!("mem: {} score:{:.1}", key, s), None => format!("mem: {}", key), }, NodeBody::Dmn(_) => "dmn".into(), NodeBody::Log(t) => format!("log: {}", truncate_preview(t, 60)), }, } } } impl AstNode { fn render_into(&self, out: &mut String) { match self { Self::Leaf(leaf) => leaf.body.render_into(out), Self::Branch { role, children } => { out.push_str(&format!("<|im_start|>{}\n", role.as_str())); for child in children { child.render_into(out); } out.push_str("<|im_end|>\n"); } } } fn token_ids_into(&self, out: &mut Vec) { match self { Self::Leaf(leaf) => out.extend_from_slice(&leaf.token_ids), Self::Branch { role, children } => { out.push(tokenizer::IM_START); out.extend(tokenizer::encode(&format!("{}\n", role.as_str()))); for child in children { child.token_ids_into(out); } out.push(tokenizer::IM_END); out.extend(tokenizer::encode("\n")); } } } } impl Ast for AstNode { fn render(&self) -> String { let mut s = String::new(); self.render_into(&mut s); s } fn token_ids(&self) -> Vec { let mut ids = Vec::new(); self.token_ids_into(&mut ids); ids } fn tokens(&self) -> usize { match self { Self::Leaf(leaf) => leaf.tokens(), Self::Branch { role, children } => { 1 + tokenizer::encode(&format!("{}\n", role.as_str())).len() + children.iter().map(|c| c.tokens()).sum::() + 1 + tokenizer::encode("\n").len() } } } } fn truncate_preview(s: &str, max: usize) -> String { let preview: String = s.chars().take(max).collect(); let preview = preview.replace('\n', " "); if s.len() > max { format!("{}...", preview) } else { preview } } fn format_tool_call_xml(name: &str, args_json: &str) -> String { let args: serde_json::Value = serde_json::from_str(args_json) .unwrap_or(serde_json::Value::Object(Default::default())); let mut xml = format!("\n", name); if let Some(obj) = args.as_object() { for (key, value) in obj { let val_str = match value { serde_json::Value::String(s) => s.clone(), other => other.to_string(), }; xml.push_str(&format!("\n{}\n\n", key, val_str)); } } xml.push_str(""); xml } fn normalize_xml_tags(text: &str) -> String { let mut result = String::with_capacity(text.len()); let mut chars = text.chars().peekable(); while let Some(ch) = chars.next() { if ch == '<' { let mut tag = String::from('<'); for inner in chars.by_ref() { if inner == '>' { tag.push('>'); break; } else if inner.is_whitespace() { // Skip whitespace inside tags } else { tag.push(inner); } } result.push_str(&tag); } else { result.push(ch); } } result } fn parse_qwen_tag<'a>(s: &'a str, tag: &str) -> Option<(&'a str, &'a str, &'a str)> { let open = format!("<{}=", tag); let close = format!("", tag); let start = s.find(&open)? + open.len(); let name_end = start + s[start..].find('>')?; let body_start = name_end + 1; let body_end = body_start + s[body_start..].find(&close)?; Some(( s[start..name_end].trim(), s[body_start..body_end].trim(), &s[body_end + close.len()..], )) } fn parse_tool_call_body(body: &str) -> Option<(String, String)> { let normalized = normalize_xml_tags(body); let body = normalized.trim(); parse_xml_tool_call(body) .or_else(|| parse_json_tool_call(body)) } fn parse_xml_tool_call(body: &str) -> Option<(String, String)> { let (func_name, func_body, _) = parse_qwen_tag(body, "function")?; let mut args = serde_json::Map::new(); let mut rest = func_body; while let Some((key, val, remainder)) = parse_qwen_tag(rest, "parameter") { args.insert(key.to_string(), serde_json::Value::String(val.to_string())); rest = remainder; } Some((func_name.to_string(), serde_json::to_string(&args).unwrap_or_default())) } fn parse_json_tool_call(body: &str) -> Option<(String, String)> { let v: serde_json::Value = serde_json::from_str(body).ok()?; let name = v["name"].as_str()?; let arguments = &v["arguments"]; Some((name.to_string(), serde_json::to_string(arguments).unwrap_or_default())) } impl ResponseParser { pub fn new(branch_idx: usize) -> Self { Self { branch_idx, call_counter: 0, buf: String::new(), content_parts: Vec::new(), in_think: false, think_buf: String::new(), in_tool_call: false, tool_call_buf: String::new(), } } /// Consume a token stream, parse into the AST, yield tool calls. /// Spawns a background task. Returns a tool call receiver and a /// join handle that resolves to Ok(()) or the stream error. pub fn run( self, mut stream: tokio::sync::mpsc::UnboundedReceiver, agent: std::sync::Arc, ) -> ( tokio::sync::mpsc::UnboundedReceiver, tokio::task::JoinHandle>, ) { let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); let handle = tokio::spawn(async move { let mut parser = self; let agent_name = agent.state.lock().await.provenance.clone(); let log_path = format!("/tmp/poc-{}.log", agent_name); let mut log_file = std::fs::OpenOptions::new() .create(true).append(true).open(&log_path).ok(); let mut full_text = String::new(); while let Some(event) = stream.recv().await { match event { super::api::StreamToken::Token { text, id } => { full_text.push_str(&text); let mut ctx = agent.context.lock().await; let calls = parser.feed_token(&text, id, &mut ctx); if !calls.is_empty() { if let Some(ref mut f) = log_file { use std::io::Write; for c in &calls { let _ = writeln!(f, "tool_call: {} args={}", c.name, &c.arguments[..c.arguments.len().min(200)]); } } } for call in calls { let _ = tx.send(call); } } super::api::StreamToken::Done { usage } => { if let Some(ref mut f) = log_file { use std::io::Write; let tc_count = full_text.matches("").count(); let ctx_tokens = agent.context.lock().await.tokens(); let _ = writeln!(f, "done: {} chars, {} tags, ctx: {} tokens", full_text.len(), tc_count, ctx_tokens); if tc_count == 0 && full_text.len() > 0 { let _ = writeln!(f, "full text:\n{}", &full_text[..full_text.len().min(2000)]); } for (i, part) in full_text.split("").enumerate() { if i > 0 { let _ = writeln!(f, "tool_call body: {}...", &part[..part.len().min(200)]); } } } if let Some(u) = usage { agent.state.lock().await.last_prompt_tokens = u.prompt_tokens; } let mut ctx = agent.context.lock().await; parser.finish(&mut ctx); return Ok(()); } super::api::StreamToken::Error(e) => { return Err(anyhow::anyhow!("{}", e)); } } } Ok(()) }); (rx, handle) } pub fn feed_token(&mut self, text: &str, _token_id: u32, ctx: &mut ContextState) -> Vec { let mut pending = Vec::new(); self.buf.push_str(text); loop { if self.in_think { match self.buf.find("") { Some(end) => { self.think_buf.push_str(&self.buf[..end]); self.buf = self.buf[end + 8..].to_string(); self.in_think = false; let text = std::mem::take(&mut self.think_buf).trim().to_string(); if !text.is_empty() { self.push_child(ctx, AstNode::thinking(text)); } continue; } None => { let safe = self.buf.len().saturating_sub(8); if safe > 0 { let safe = self.buf.floor_char_boundary(safe); self.think_buf.push_str(&self.buf[..safe]); self.buf = self.buf[safe..].to_string(); } break; } } } if self.in_tool_call { match self.buf.find("") { Some(end) => { self.tool_call_buf.push_str(&self.buf[..end]); self.buf = self.buf[end + 12..].to_string(); self.in_tool_call = false; if let Some((name, args)) = parse_tool_call_body(&self.tool_call_buf) { self.flush_content(ctx); self.push_child(ctx, AstNode::tool_call(&name, &args)); self.call_counter += 1; pending.push(PendingToolCall { name, arguments: args, id: format!("call_{}", self.call_counter), }); } self.tool_call_buf.clear(); continue; } None => { let safe = self.buf.len().saturating_sub(12); if safe > 0 { let safe = self.buf.floor_char_boundary(safe); self.tool_call_buf.push_str(&self.buf[..safe]); self.buf = self.buf[safe..].to_string(); } break; } } } let think_pos = self.buf.find(""); let tool_pos = self.buf.find(""); let next_tag = match (think_pos, tool_pos) { (Some(a), Some(b)) => Some(a.min(b)), (Some(a), None) => Some(a), (None, Some(b)) => Some(b), (None, None) => None, }; match next_tag { Some(pos) => { if pos > 0 { self.content_parts.push(self.buf[..pos].to_string()); } if self.buf[pos..].starts_with("") { self.buf = self.buf[pos + 7..].to_string(); self.flush_content(ctx); self.in_think = true; } else { self.buf = self.buf[pos + 11..].to_string(); self.flush_content(ctx); self.in_tool_call = true; } continue; } None => { let safe = self.buf.len().saturating_sub(11); if safe > 0 { let safe = self.buf.floor_char_boundary(safe); self.content_parts.push(self.buf[..safe].to_string()); self.buf = self.buf[safe..].to_string(); } break; } } } pending } fn push_child(&self, ctx: &mut ContextState, child: AstNode) { ctx.push_child(Section::Conversation, self.branch_idx, child); } fn flush_content(&mut self, ctx: &mut ContextState) { if !self.content_parts.is_empty() { let text: String = self.content_parts.drain(..).collect(); let text = text.trim().to_string(); if !text.is_empty() { self.push_child(ctx, AstNode::content(text)); } } } pub fn finish(mut self, ctx: &mut ContextState) { if !self.buf.is_empty() { self.content_parts.push(std::mem::take(&mut self.buf)); } self.flush_content(ctx); } } impl ContextState { pub fn new() -> Self { Self { system: Vec::new(), identity: Vec::new(), journal: Vec::new(), conversation: Vec::new(), } } // -- Read access ---------------------------------------------------------- pub fn system(&self) -> &[AstNode] { &self.system } pub fn identity(&self) -> &[AstNode] { &self.identity } pub fn journal(&self) -> &[AstNode] { &self.journal } pub fn conversation(&self) -> &[AstNode] { &self.conversation } fn sections(&self) -> [&Vec; 4] { [&self.system, &self.identity, &self.journal, &self.conversation] } } impl Ast for ContextState { fn render(&self) -> String { let mut s = String::new(); for section in self.sections() { for node in section { s.push_str(&node.render()); } } s } fn token_ids(&self) -> Vec { let mut ids = Vec::new(); for section in self.sections() { for node in section { ids.extend(node.token_ids()); } } ids } fn tokens(&self) -> usize { self.sections().iter() .flat_map(|s| s.iter()) .map(|n| n.tokens()) .sum() } } impl ContextState { fn section_mut(&mut self, section: Section) -> &mut Vec { match section { Section::System => &mut self.system, Section::Identity => &mut self.identity, Section::Journal => &mut self.journal, Section::Conversation => &mut self.conversation, } } pub fn push(&mut self, section: Section, node: AstNode) { self.section_mut(section).push(node); } /// Replace the body of a leaf at `index` in `section`. /// Re-tokenizes to maintain the invariant. pub fn set_message(&mut self, section: Section, index: usize, body: NodeBody) { let nodes = self.section_mut(section); let node = &mut nodes[index]; match node { AstNode::Leaf(leaf) => { let token_ids = if body.is_prompt_visible() { tokenizer::encode(&body.render()) } else { vec![] }; leaf.body = body; leaf.token_ids = token_ids; } AstNode::Branch { .. } => panic!("set_message on branch node"), } } /// Set the memory score on a Memory leaf at `index` in `section`. pub fn set_score(&mut self, section: Section, index: usize, score: Option) { let node = &mut self.section_mut(section)[index]; match node { AstNode::Leaf(leaf) => match &mut leaf.body { NodeBody::Memory { score: s, .. } => *s = score, _ => panic!("set_score on non-memory node"), }, _ => panic!("set_score on branch node"), } } pub fn del(&mut self, section: Section, index: usize) -> AstNode { self.section_mut(section).remove(index) } pub fn clear(&mut self, section: Section) { self.section_mut(section).clear(); } /// Push a child node into a branch at `index` in `section`. pub fn push_child(&mut self, section: Section, index: usize, child: AstNode) { let node = &mut self.section_mut(section)[index]; match node { AstNode::Branch { children, .. } => children.push(child), AstNode::Leaf(_) => panic!("push_child on leaf node"), } } /// Number of nodes in a section. pub fn len(&self, section: Section) -> usize { match section { Section::System => self.system.len(), Section::Identity => self.identity.len(), Section::Journal => self.journal.len(), Section::Conversation => self.conversation.len(), } } } pub fn context_window() -> usize { crate::config::get().api_context_window } pub fn context_budget_tokens() -> usize { context_window() * 80 / 100 } pub fn is_context_overflow(err: &anyhow::Error) -> bool { let msg = err.to_string().to_lowercase(); msg.contains("context length") || msg.contains("token limit") || msg.contains("too many tokens") || msg.contains("maximum context") || msg.contains("prompt is too long") || msg.contains("request too large") || msg.contains("input validation error") || msg.contains("content length limit") || (msg.contains("400") && msg.contains("tokens")) } pub fn is_stream_error(err: &anyhow::Error) -> bool { err.to_string().contains("model stream error") } #[cfg(test)] mod tests { use super::*; // -- Helpers for inspecting parse results ---------------------------------- fn bodies(nodes: &[AstNode]) -> Vec<&NodeBody> { nodes.iter().filter_map(|c| c.leaf()).map(|l| l.body()).collect() } fn assert_content(body: &NodeBody, expected: &str) { match body { NodeBody::Content(t) => assert_eq!(t, expected), other => panic!("expected Content, got {:?}", other), } } fn assert_thinking(body: &NodeBody, expected: &str) { match body { NodeBody::Thinking(t) => assert_eq!(t, expected), other => panic!("expected Thinking, got {:?}", other), } } fn assert_tool_call<'a>(body: &'a NodeBody, expected_name: &str) -> &'a str { match body { NodeBody::ToolCall { name, arguments } => { assert_eq!(name, expected_name); arguments } other => panic!("expected ToolCall, got {:?}", other), } } // -- XML parsing tests ---------------------------------------------------- #[test] fn test_tool_call_xml_parse_clean() { let body = "\npoc-memory used core-personality\n"; let (name, args) = parse_tool_call_body(body).unwrap(); assert_eq!(name, "bash"); let args: serde_json::Value = serde_json::from_str(&args).unwrap(); assert_eq!(args["command"], "poc-memory used core-personality"); } #[test] fn test_tool_call_xml_parse_streamed_whitespace() { let body = "<\nfunction\n=\nbash\n>\n<\nparameter\n=\ncommand\n>pwd\n"; let (name, args) = parse_tool_call_body(body).unwrap(); assert_eq!(name, "bash"); let args: serde_json::Value = serde_json::from_str(&args).unwrap(); assert_eq!(args["command"], "pwd"); } #[test] fn test_tool_call_json_parse() { let body = r#"{"name": "bash", "arguments": {"command": "ls"}}"#; let (name, args) = parse_tool_call_body(body).unwrap(); assert_eq!(name, "bash"); let args: serde_json::Value = serde_json::from_str(&args).unwrap(); assert_eq!(args["command"], "ls"); } #[test] fn test_normalize_preserves_content() { let text = "\necho hello world\n"; let normalized = normalize_xml_tags(text); assert_eq!(normalized, text); } #[test] fn test_normalize_strips_tag_internal_whitespace() { assert_eq!(normalize_xml_tags("<\nfunction\n=\nbash\n>"), ""); } // -- ResponseParser tests ------------------------------------------------- /// Set up a ContextState with an assistant branch, run the parser, /// return the children that were pushed into the branch. fn parse_into_ctx(chunks: &[&str]) -> (ContextState, Vec) { let mut ctx = ContextState::new(); ctx.push(Section::Conversation, AstNode::branch(Role::Assistant, vec![])); let mut p = ResponseParser::new(0); let mut calls = Vec::new(); for chunk in chunks { // Feed each chunk as a single token (id=0 for tests) calls.extend(p.feed_token(chunk, 0, &mut ctx)); } p.finish(&mut ctx); (ctx, calls) } fn assistant_children(ctx: &ContextState) -> &[AstNode] { ctx.conversation()[0].children() } #[test] fn test_parser_plain_text() { let (ctx, _) = parse_into_ctx(&["hello world"]); let b = bodies(assistant_children(&ctx)); assert_eq!(b.len(), 1); assert_content(b[0], "hello world"); } #[test] fn test_parser_thinking_then_content() { let (ctx, _) = parse_into_ctx(&["reasoninganswer"]); let b = bodies(assistant_children(&ctx)); assert_eq!(b.len(), 2); assert_thinking(b[0], "reasoning"); assert_content(b[1], "answer"); } #[test] fn test_parser_tool_call() { let (ctx, calls) = parse_into_ctx(&[ "\n\nls\n\n" ]); assert_eq!(calls.len(), 1); assert_eq!(calls[0].name, "bash"); let b = bodies(assistant_children(&ctx)); assert_eq!(b.len(), 1); let args = assert_tool_call(b[0], "bash"); let args: serde_json::Value = serde_json::from_str(args).unwrap(); assert_eq!(args["command"], "ls"); } #[test] fn test_parser_content_then_tool_call_then_content() { let (ctx, _) = parse_into_ctx(&[ "before", "\n\npwd\n\n", "after", ]); let b = bodies(assistant_children(&ctx)); assert_eq!(b.len(), 3); assert_content(b[0], "before"); assert_tool_call(b[1], "bash"); assert_content(b[2], "after"); } #[test] fn test_parser_incremental_feed() { let text = "thoughtresponse"; let mut ctx = ContextState::new(); ctx.push(Section::Conversation, AstNode::branch(Role::Assistant, vec![])); let mut p = ResponseParser::new(0); for ch in text.chars() { p.feed_token(&ch.to_string(), 0, &mut ctx); } p.finish(&mut ctx); let b = bodies(assistant_children(&ctx)); assert_eq!(b.len(), 2); assert_thinking(b[0], "thought"); assert_content(b[1], "response"); } #[test] fn test_parser_incremental_tool_call() { let text = "text\n\nls\n\nmore"; let mut ctx = ContextState::new(); ctx.push(Section::Conversation, AstNode::branch(Role::Assistant, vec![])); let mut p = ResponseParser::new(0); let mut tool_calls = 0; for ch in text.chars() { tool_calls += p.feed_token(&ch.to_string(), 0, &mut ctx).len(); } p.finish(&mut ctx); assert_eq!(tool_calls, 1); let b = bodies(assistant_children(&ctx)); assert_eq!(b.len(), 3); assert_content(b[0], "text"); assert_tool_call(b[1], "bash"); assert_content(b[2], "more"); } #[test] fn test_parser_thinking_tool_call_content() { let (ctx, _) = parse_into_ctx(&[ "let me think", "\n\n/etc/hosts\n\n", "here's what I found", ]); let b = bodies(assistant_children(&ctx)); assert_eq!(b.len(), 3); assert_thinking(b[0], "let me think"); assert_tool_call(b[1], "read"); assert_content(b[2], "here's what I found"); } // -- Round-trip rendering tests ------------------------------------------- #[test] fn test_render_system_msg() { let node = AstNode::system_msg("you are helpful"); assert_eq!(node.render(), "<|im_start|>system\nyou are helpful<|im_end|>\n"); } #[test] fn test_render_user_msg() { let node = AstNode::user_msg("hello"); assert_eq!(node.render(), "<|im_start|>user\nhello<|im_end|>\n"); } #[test] fn test_render_assistant_with_thinking_and_content() { let node = AstNode::branch(Role::Assistant, vec![ AstNode::thinking("hmm"), AstNode::content("answer"), ]); // Thinking renders as empty, content renders as-is assert_eq!(node.render(), "<|im_start|>assistant\nanswer<|im_end|>\n"); } #[test] fn test_render_tool_result() { let node = AstNode::tool_result("output here"); assert_eq!(node.render(), "<|im_start|>tool\noutput here<|im_end|>\n"); } #[test] fn test_render_memory() { let node = AstNode::memory("identity", "I am Proof of Concept"); assert_eq!(node.render(), "<|im_start|>memory\nI am Proof of Concept<|im_end|>\n"); } #[test] fn test_render_dmn() { let node = AstNode::dmn("subconscious prompt"); assert_eq!(node.render(), "<|im_start|>dmn\nsubconscious prompt<|im_end|>\n"); } #[test] fn test_render_tool_call() { let node = AstNode::tool_call("bash", r#"{"command":"ls"}"#); let rendered = node.render(); assert!(rendered.contains("")); assert!(rendered.contains("")); assert!(rendered.contains("")); assert!(rendered.contains("ls")); assert!(rendered.contains("")); } // -- Tokenizer round-trip tests ------------------------------------------- // These require the tokenizer file; skipped if not present. fn init_tokenizer() -> bool { let path = format!("{}/.consciousness/tokenizer-qwen35.json", std::env::var("HOME").unwrap_or_default()); if std::path::Path::new(&path).exists() { tokenizer::init(&path); true } else { false } } fn assert_token_invariants(node: &AstNode) { assert_eq!(node.tokens(), node.token_ids().len(), "tokens() != token_ids().len()"); } #[test] fn test_tokenize_roundtrip_leaf_types() { if !init_tokenizer() { return; } assert_token_invariants(&AstNode::system_msg("you are a helpful assistant")); assert_token_invariants(&AstNode::user_msg("what is 2+2?")); assert_token_invariants(&AstNode::tool_result("4")); assert_token_invariants(&AstNode::memory("identity", "I am Proof of Concept")); assert_token_invariants(&AstNode::dmn("check the memory store")); assert_token_invariants(&AstNode::tool_call("bash", r#"{"command":"ls -la"}"#)); } #[test] fn test_tokenize_roundtrip_assistant_branch() { if !init_tokenizer() { return; } let node = AstNode::branch(Role::Assistant, vec![ AstNode::content("here's what I found:\n"), AstNode::tool_call("bash", r#"{"command":"pwd"}"#), AstNode::content("\nthat's the current directory"), ]); assert_token_invariants(&node); } #[test] fn test_tokenize_invisible_nodes_are_zero() { if !init_tokenizer() { return; } assert_eq!(AstNode::thinking("deep thoughts").tokens(), 0); assert_eq!(AstNode::log("debug info").tokens(), 0); } #[test] fn test_tokenize_decode_roundtrip() { if !init_tokenizer() { return; } // Content without special tokens round-trips through decode let text = "hello world, this is a test"; let ids = tokenizer::encode(text); let decoded = tokenizer::decode(&ids); assert_eq!(decoded, text); } #[test] fn test_tokenize_context_state_matches_concatenation() { if !init_tokenizer() { return; } let mut ctx = ContextState::new(); ctx.push(Section::System, AstNode::system_msg("you are helpful")); ctx.push(Section::Identity, AstNode::memory("name", "Proof of Concept")); ctx.push(Section::Conversation, AstNode::user_msg("hi")); assert_eq!(ctx.tokens(), ctx.token_ids().len()); } #[test] fn test_parser_roundtrip_through_tokenizer() { if !init_tokenizer() { return; } let (ctx, _) = parse_into_ctx(&[ "I'll check that for you", "\n\nls\n\n", ]); let node = &ctx.conversation()[0]; assert_token_invariants(node); assert!(node.tokens() > 0); } }