Compare commits
7 commits
592a3e2e52
...
0d1044c2e8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0d1044c2e8 | ||
|
|
b8485ed6c1 | ||
|
|
e59f6a59e2 | ||
|
|
6f20e68865 | ||
|
|
204ba5570a | ||
|
|
91106deaa1 | ||
|
|
0bf71b9110 |
12 changed files with 439 additions and 116 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
|
@ -492,6 +492,7 @@ dependencies = [
|
||||||
"http-body-util",
|
"http-body-util",
|
||||||
"hyper",
|
"hyper",
|
||||||
"hyper-util",
|
"hyper-util",
|
||||||
|
"imagesize",
|
||||||
"json-five",
|
"json-five",
|
||||||
"libc",
|
"libc",
|
||||||
"log",
|
"log",
|
||||||
|
|
@ -1423,6 +1424,12 @@ dependencies = [
|
||||||
"winapi-util",
|
"winapi-util",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "imagesize"
|
||||||
|
version = "0.14.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09e54e57b4c48b40f7aec75635392b12b3421fa26fe8b4332e63138ed278459c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indexmap"
|
name = "indexmap"
|
||||||
version = "2.14.0"
|
version = "2.14.0"
|
||||||
|
|
|
||||||
|
|
@ -68,6 +68,7 @@ hyper-util = { version = "0.1", features = ["tokio"], default-features = false }
|
||||||
http-body-util = "0.1"
|
http-body-util = "0.1"
|
||||||
bytes = "1"
|
bytes = "1"
|
||||||
base64 = "0.22"
|
base64 = "0.22"
|
||||||
|
imagesize = "0.14"
|
||||||
|
|
||||||
rustls = "0.23"
|
rustls = "0.23"
|
||||||
tokio-rustls = "0.26"
|
tokio-rustls = "0.26"
|
||||||
|
|
|
||||||
|
|
@ -78,18 +78,31 @@ impl ApiClient {
|
||||||
prompt_tokens: &[u32],
|
prompt_tokens: &[u32],
|
||||||
sampling: SamplingParams,
|
sampling: SamplingParams,
|
||||||
priority: Option<i32>,
|
priority: Option<i32>,
|
||||||
|
) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
|
||||||
|
self.stream_completion_mm(prompt_tokens, &[], sampling, priority)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn stream_completion_mm(
|
||||||
|
&self,
|
||||||
|
prompt_tokens: &[u32],
|
||||||
|
images: &[super::context::WireImage],
|
||||||
|
sampling: SamplingParams,
|
||||||
|
priority: Option<i32>,
|
||||||
) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
|
) -> (mpsc::UnboundedReceiver<StreamToken>, AbortOnDrop) {
|
||||||
let (tx, rx) = mpsc::unbounded_channel();
|
let (tx, rx) = mpsc::unbounded_channel();
|
||||||
let client = self.client.clone();
|
let client = self.client.clone();
|
||||||
let api_key = self.api_key.clone();
|
let api_key = self.api_key.clone();
|
||||||
let model = self.model.clone();
|
let model = self.model.clone();
|
||||||
let prompt_tokens = prompt_tokens.to_vec();
|
let prompt_tokens = prompt_tokens.to_vec();
|
||||||
|
let images: Vec<(Vec<u8>, String)> = images.iter()
|
||||||
|
.map(|i| (i.bytes.clone(), i.mime.clone()))
|
||||||
|
.collect();
|
||||||
let base_url = self.base_url.clone();
|
let base_url = self.base_url.clone();
|
||||||
|
|
||||||
let handle = tokio::spawn(async move {
|
let handle = tokio::spawn(async move {
|
||||||
let result = stream_completions(
|
let result = stream_completions(
|
||||||
&client, &base_url, &api_key, &model,
|
&client, &base_url, &api_key, &model,
|
||||||
&prompt_tokens, &tx, sampling, priority,
|
&prompt_tokens, &images, &tx, sampling, priority,
|
||||||
).await;
|
).await;
|
||||||
if let Err(e) = result {
|
if let Err(e) = result {
|
||||||
let _ = tx.send(StreamToken::Error(e.to_string()));
|
let _ = tx.send(StreamToken::Error(e.to_string()));
|
||||||
|
|
@ -110,6 +123,7 @@ async fn stream_completions(
|
||||||
api_key: &str,
|
api_key: &str,
|
||||||
model: &str,
|
model: &str,
|
||||||
prompt_tokens: &[u32],
|
prompt_tokens: &[u32],
|
||||||
|
images: &[(Vec<u8>, String)],
|
||||||
tx: &mpsc::UnboundedSender<StreamToken>,
|
tx: &mpsc::UnboundedSender<StreamToken>,
|
||||||
sampling: SamplingParams,
|
sampling: SamplingParams,
|
||||||
priority: Option<i32>,
|
priority: Option<i32>,
|
||||||
|
|
@ -126,6 +140,14 @@ async fn stream_completions(
|
||||||
"skip_special_tokens": false,
|
"skip_special_tokens": false,
|
||||||
"stop_token_ids": [super::tokenizer::IM_END],
|
"stop_token_ids": [super::tokenizer::IM_END],
|
||||||
});
|
});
|
||||||
|
if !images.is_empty() {
|
||||||
|
use base64::Engine;
|
||||||
|
let b64 = base64::engine::general_purpose::STANDARD;
|
||||||
|
let uris: Vec<String> = images.iter()
|
||||||
|
.map(|(bytes, mime)| format!("data:{};base64,{}", mime, b64.encode(bytes)))
|
||||||
|
.collect();
|
||||||
|
request["multi_modal_data"] = serde_json::json!({ "image": uris });
|
||||||
|
}
|
||||||
if let Some(p) = priority {
|
if let Some(p) = priority {
|
||||||
request["priority"] = serde_json::json!(p);
|
request["priority"] = serde_json::json!(p);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -81,10 +81,33 @@ pub enum NodeBody {
|
||||||
Memory { key: String, text: String, score: Option<f64> },
|
Memory { key: String, text: String, score: Option<f64> },
|
||||||
Dmn(String),
|
Dmn(String),
|
||||||
|
|
||||||
|
// Vision input — rendered as <|vision_start|> <|image_pad|>×N <|vision_end|>.
|
||||||
|
// `token_count` is N, the count vLLM will compute for this image's grid.
|
||||||
|
Image {
|
||||||
|
#[serde(with = "b64_bytes")]
|
||||||
|
bytes: Vec<u8>,
|
||||||
|
mime: String,
|
||||||
|
orig_height: u32,
|
||||||
|
orig_width: u32,
|
||||||
|
token_count: u32,
|
||||||
|
},
|
||||||
|
|
||||||
// Non-visible (0 tokens in prompt)
|
// Non-visible (0 tokens in prompt)
|
||||||
Log(String),
|
Log(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mod b64_bytes {
|
||||||
|
use base64::{Engine, engine::general_purpose::STANDARD};
|
||||||
|
use serde::{Serializer, Deserializer, Deserialize};
|
||||||
|
pub fn serialize<S: Serializer>(bytes: &[u8], s: S) -> Result<S::Ok, S::Error> {
|
||||||
|
s.serialize_str(&STANDARD.encode(bytes))
|
||||||
|
}
|
||||||
|
pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<u8>, D::Error> {
|
||||||
|
let s = String::deserialize(d)?;
|
||||||
|
STANDARD.decode(s).map_err(serde::de::Error::custom)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A leaf node: typed content with cached token IDs.
|
/// A leaf node: typed content with cached token IDs.
|
||||||
/// Token IDs are not serialized — they're recomputed on deserialization.
|
/// Token IDs are not serialized — they're recomputed on deserialization.
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
|
@ -103,11 +126,7 @@ impl<'de> Deserialize<'de> for NodeLeaf {
|
||||||
timestamp: DateTime<Utc>,
|
timestamp: DateTime<Utc>,
|
||||||
}
|
}
|
||||||
let raw = Raw::deserialize(deserializer)?;
|
let raw = Raw::deserialize(deserializer)?;
|
||||||
let token_ids = if raw.body.is_prompt_visible() {
|
let token_ids = raw.body.compute_token_ids();
|
||||||
tokenizer::encode(&raw.body.render())
|
|
||||||
} else {
|
|
||||||
vec![]
|
|
||||||
};
|
|
||||||
Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp })
|
Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -221,6 +240,13 @@ impl NodeBody {
|
||||||
out.push_str(text);
|
out.push_str(text);
|
||||||
out.push_str("<|im_end|>\n");
|
out.push_str("<|im_end|>\n");
|
||||||
}
|
}
|
||||||
|
Self::Image { token_count, .. } => {
|
||||||
|
out.push_str("<|vision_start|>");
|
||||||
|
for _ in 0..*token_count {
|
||||||
|
out.push_str("<|image_pad|>");
|
||||||
|
}
|
||||||
|
out.push_str("<|vision_end|>");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -235,6 +261,26 @@ impl NodeBody {
|
||||||
!matches!(self, Self::Thinking(_) | Self::Log(_))
|
!matches!(self, Self::Thinking(_) | Self::Log(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Hand-assemble token IDs for body types where running the tokenizer
|
||||||
|
/// on the rendered text would be needlessly expensive (Image). Falls
|
||||||
|
/// back to encoding the rendered text for everything else.
|
||||||
|
fn compute_token_ids(&self) -> Vec<u32> {
|
||||||
|
if !self.is_prompt_visible() {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
match self {
|
||||||
|
Self::Image { token_count, .. } => {
|
||||||
|
let mut ids = Vec::with_capacity(*token_count as usize + 2);
|
||||||
|
ids.push(tokenizer::VISION_START);
|
||||||
|
ids.extend(std::iter::repeat(tokenizer::IMAGE_PAD)
|
||||||
|
.take(*token_count as usize));
|
||||||
|
ids.push(tokenizer::VISION_END);
|
||||||
|
ids
|
||||||
|
}
|
||||||
|
_ => tokenizer::encode(&self.render()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// The text content of this leaf (for display, not rendering).
|
/// The text content of this leaf (for display, not rendering).
|
||||||
pub fn text(&self) -> &str {
|
pub fn text(&self) -> &str {
|
||||||
match self {
|
match self {
|
||||||
|
|
@ -242,17 +288,14 @@ impl NodeBody {
|
||||||
| Self::ToolResult(t) | Self::Dmn(t) => t,
|
| Self::ToolResult(t) | Self::Dmn(t) => t,
|
||||||
Self::ToolCall { name, .. } => name,
|
Self::ToolCall { name, .. } => name,
|
||||||
Self::Memory { text, .. } => text,
|
Self::Memory { text, .. } => text,
|
||||||
|
Self::Image { mime, .. } => mime,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NodeLeaf {
|
impl NodeLeaf {
|
||||||
fn new(body: NodeBody) -> Self {
|
fn new(body: NodeBody) -> Self {
|
||||||
let token_ids = if body.is_prompt_visible() {
|
let token_ids = body.compute_token_ids();
|
||||||
tokenizer::encode(&body.render())
|
|
||||||
} else {
|
|
||||||
vec![]
|
|
||||||
};
|
|
||||||
Self { body, token_ids, timestamp: Utc::now() }
|
Self { body, token_ids, timestamp: Utc::now() }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -305,6 +348,24 @@ impl AstNode {
|
||||||
Self::Leaf(NodeLeaf::new(NodeBody::Log(text.into())))
|
Self::Leaf(NodeLeaf::new(NodeBody::Log(text.into())))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Build an Image leaf. `token_count` is computed from the image
|
||||||
|
/// dimensions using Qwen3-VL's resizing rules.
|
||||||
|
pub fn image(
|
||||||
|
bytes: Vec<u8>,
|
||||||
|
mime: impl Into<String>,
|
||||||
|
orig_height: u32,
|
||||||
|
orig_width: u32,
|
||||||
|
) -> Self {
|
||||||
|
let token_count = qwen3_image_token_count(orig_height, orig_width);
|
||||||
|
Self::Leaf(NodeLeaf::new(NodeBody::Image {
|
||||||
|
bytes,
|
||||||
|
mime: mime.into(),
|
||||||
|
orig_height,
|
||||||
|
orig_width,
|
||||||
|
token_count,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
// -- Branch constructors --------------------------------------------------
|
// -- Branch constructors --------------------------------------------------
|
||||||
|
|
||||||
pub fn branch(role: Role, children: Vec<AstNode>) -> Self {
|
pub fn branch(role: Role, children: Vec<AstNode>) -> Self {
|
||||||
|
|
@ -334,11 +395,7 @@ impl AstNode {
|
||||||
pub fn retokenize(self) -> Self {
|
pub fn retokenize(self) -> Self {
|
||||||
match self {
|
match self {
|
||||||
Self::Leaf(leaf) => {
|
Self::Leaf(leaf) => {
|
||||||
let token_ids = if leaf.body.is_prompt_visible() {
|
let token_ids = leaf.body.compute_token_ids();
|
||||||
tokenizer::encode(&leaf.body.render())
|
|
||||||
} else {
|
|
||||||
vec![]
|
|
||||||
};
|
|
||||||
Self::Leaf(NodeLeaf { token_ids, ..leaf })
|
Self::Leaf(NodeLeaf { token_ids, ..leaf })
|
||||||
}
|
}
|
||||||
Self::Branch { role, children, timestamp, memory_scores } => Self::Branch {
|
Self::Branch { role, children, timestamp, memory_scores } => Self::Branch {
|
||||||
|
|
@ -397,6 +454,8 @@ impl AstNode {
|
||||||
None => format!("mem: {}", key),
|
None => format!("mem: {}", key),
|
||||||
},
|
},
|
||||||
NodeBody::Dmn(_) => "dmn".into(),
|
NodeBody::Dmn(_) => "dmn".into(),
|
||||||
|
NodeBody::Image { orig_height, orig_width, token_count, .. } =>
|
||||||
|
format!("image: {}x{} ({} tokens)", orig_width, orig_height, token_count),
|
||||||
NodeBody::Log(t) => format!("log: {}", truncate_preview(t, 60)),
|
NodeBody::Log(t) => format!("log: {}", truncate_preview(t, 60)),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
@ -825,6 +884,58 @@ impl Ast for ContextState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An image collected from the AST for a request body. The AST stores
|
||||||
|
/// the pre-expanded token form (N image_pads) for accurate budget
|
||||||
|
/// accounting; the wire form collapses each Image to a single
|
||||||
|
/// `<|image_pad|>` between vision bookends and ships the bytes
|
||||||
|
/// separately as multi_modal_data.
|
||||||
|
pub struct WireImage {
|
||||||
|
pub bytes: Vec<u8>,
|
||||||
|
pub mime: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn wire_into(node: &AstNode, tokens: &mut Vec<u32>, images: &mut Vec<WireImage>) {
|
||||||
|
match node {
|
||||||
|
AstNode::Leaf(leaf) => match leaf.body() {
|
||||||
|
NodeBody::Image { bytes, mime, .. } => {
|
||||||
|
tokens.push(tokenizer::VISION_START);
|
||||||
|
tokens.push(tokenizer::IMAGE_PAD);
|
||||||
|
tokens.push(tokenizer::VISION_END);
|
||||||
|
images.push(WireImage {
|
||||||
|
bytes: bytes.clone(),
|
||||||
|
mime: mime.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
_ => tokens.extend_from_slice(leaf.token_ids()),
|
||||||
|
},
|
||||||
|
AstNode::Branch { role, children, .. } => {
|
||||||
|
tokens.push(tokenizer::IM_START);
|
||||||
|
tokens.extend(tokenizer::encode(&format!("{}\n", role.as_str())));
|
||||||
|
for c in children {
|
||||||
|
wire_into(c, tokens, images);
|
||||||
|
}
|
||||||
|
tokens.push(tokenizer::IM_END);
|
||||||
|
tokens.extend(tokenizer::encode("\n"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ContextState {
|
||||||
|
/// Assemble the prompt in wire form: token stream with a single
|
||||||
|
/// `<|image_pad|>` per image (vLLM expands back to N), plus the list
|
||||||
|
/// of images to send as multi_modal_data.
|
||||||
|
pub fn wire_prompt(&self) -> (Vec<u32>, Vec<WireImage>) {
|
||||||
|
let mut tokens = Vec::new();
|
||||||
|
let mut images = Vec::new();
|
||||||
|
for section in self.sections() {
|
||||||
|
for node in section {
|
||||||
|
wire_into(node, &mut tokens, &mut images);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(tokens, images)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl ContextState {
|
impl ContextState {
|
||||||
fn section_mut(&mut self, section: Section) -> &mut Vec<AstNode> {
|
fn section_mut(&mut self, section: Section) -> &mut Vec<AstNode> {
|
||||||
match section {
|
match section {
|
||||||
|
|
@ -857,11 +968,7 @@ impl ContextState {
|
||||||
let node = &mut nodes[index];
|
let node = &mut nodes[index];
|
||||||
match node {
|
match node {
|
||||||
AstNode::Leaf(leaf) => {
|
AstNode::Leaf(leaf) => {
|
||||||
let token_ids = if body.is_prompt_visible() {
|
let token_ids = body.compute_token_ids();
|
||||||
tokenizer::encode(&body.render())
|
|
||||||
} else {
|
|
||||||
vec![]
|
|
||||||
};
|
|
||||||
leaf.body = body;
|
leaf.body = body;
|
||||||
leaf.token_ids = token_ids;
|
leaf.token_ids = token_ids;
|
||||||
}
|
}
|
||||||
|
|
@ -991,6 +1098,58 @@ impl ContextState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Qwen3-VL image token count
|
||||||
|
//
|
||||||
|
// Port of Qwen2VLImageProcessor.smart_resize + image_token_count. We need the
|
||||||
|
// exact same answer that vLLM's Qwen3VL processor will produce, because the
|
||||||
|
// token stream in our context must match what vLLM expands `<|image_pad|>`
|
||||||
|
// to at request time. Constants come from Qwen3.5-27B's preprocessor_config.
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const QWEN3_PATCH_SIZE: u32 = 16;
|
||||||
|
const QWEN3_MERGE_SIZE: u32 = 2;
|
||||||
|
const QWEN3_MIN_PIXELS: u64 = 65_536;
|
||||||
|
const QWEN3_MAX_PIXELS: u64 = 16_777_216;
|
||||||
|
|
||||||
|
fn smart_resize(h: u32, w: u32, factor: u32, min_pixels: u64, max_pixels: u64) -> (u32, u32) {
|
||||||
|
let max_s = h.max(w) as f64;
|
||||||
|
let min_s = h.min(w) as f64;
|
||||||
|
assert!(max_s / min_s <= 200.0, "aspect ratio too extreme: {}x{}", h, w);
|
||||||
|
|
||||||
|
let fh = h as f64;
|
||||||
|
let fw = w as f64;
|
||||||
|
let ff = factor as f64;
|
||||||
|
|
||||||
|
let h_bar = ((fh / ff).round() as u32) * factor;
|
||||||
|
let w_bar = ((fw / ff).round() as u32) * factor;
|
||||||
|
let total = (h_bar as u64) * (w_bar as u64);
|
||||||
|
|
||||||
|
if total > max_pixels {
|
||||||
|
let beta = ((fh * fw) / max_pixels as f64).sqrt();
|
||||||
|
let hf = ((fh / beta / ff).floor() as u32) * factor;
|
||||||
|
let wf = ((fw / beta / ff).floor() as u32) * factor;
|
||||||
|
(hf.max(factor), wf.max(factor))
|
||||||
|
} else if total < min_pixels {
|
||||||
|
let beta = (min_pixels as f64 / (fh * fw)).sqrt();
|
||||||
|
let hc = ((fh * beta / ff).ceil() as u32) * factor;
|
||||||
|
let wc = ((fw * beta / ff).ceil() as u32) * factor;
|
||||||
|
(hc, wc)
|
||||||
|
} else {
|
||||||
|
(h_bar, w_bar)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute how many `<|image_pad|>` tokens vLLM will emit for an image of
|
||||||
|
/// the given dimensions. Matches Qwen3VL's feature-size calculation exactly:
|
||||||
|
/// (grid_h * grid_w) / merge_size^2
|
||||||
|
/// where (grid_h, grid_w) = resized dims / patch_size.
|
||||||
|
fn qwen3_image_token_count(orig_h: u32, orig_w: u32) -> u32 {
|
||||||
|
let factor = QWEN3_PATCH_SIZE * QWEN3_MERGE_SIZE;
|
||||||
|
let (rh, rw) = smart_resize(orig_h, orig_w, factor, QWEN3_MIN_PIXELS, QWEN3_MAX_PIXELS);
|
||||||
|
(rh / QWEN3_PATCH_SIZE) * (rw / QWEN3_PATCH_SIZE) / (QWEN3_MERGE_SIZE * QWEN3_MERGE_SIZE)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn context_window() -> usize {
|
pub fn context_window() -> usize {
|
||||||
let app = crate::config::app();
|
let app = crate::config::app();
|
||||||
app.backends.get(&app.default_backend)
|
app.backends.get(&app.default_backend)
|
||||||
|
|
@ -1370,6 +1529,110 @@ mod tests {
|
||||||
assert!(serde_json::from_str::<AstNode>(json).is_err());
|
assert!(serde_json::from_str::<AstNode>(json).is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -- Image leaf tests ---------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_smart_resize_within_bounds() {
|
||||||
|
// Typical case: 1024x768 → rounded to multiples of 32, under max.
|
||||||
|
let (h, w) = smart_resize(768, 1024, 32, 65_536, 16_777_216);
|
||||||
|
assert_eq!(h, 768);
|
||||||
|
assert_eq!(w, 1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_smart_resize_upscales_tiny() {
|
||||||
|
// 32x32 = 1024 pixels, below min_pixels=65536. Should scale up.
|
||||||
|
let (h, w) = smart_resize(32, 32, 32, 65_536, 16_777_216);
|
||||||
|
assert!((h as u64) * (w as u64) >= 65_536,
|
||||||
|
"resized {}x{} is under min_pixels", h, w);
|
||||||
|
assert_eq!(h % 32, 0);
|
||||||
|
assert_eq!(w % 32, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_smart_resize_downscales_huge() {
|
||||||
|
// 8000x6000 = 48M pixels, above max_pixels=16M. Should scale down.
|
||||||
|
let (h, w) = smart_resize(8000, 6000, 32, 65_536, 16_777_216);
|
||||||
|
assert!((h as u64) * (w as u64) <= 16_777_216,
|
||||||
|
"resized {}x{} exceeds max_pixels", h, w);
|
||||||
|
assert_eq!(h % 32, 0);
|
||||||
|
assert_eq!(w % 32, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_qwen3_token_count_matches_formula() {
|
||||||
|
// 512x512 → resized to 512x512 (already multiple of 32, within bounds).
|
||||||
|
// grid = 32x32, tokens = 32*32/4 = 256.
|
||||||
|
assert_eq!(qwen3_image_token_count(512, 512), 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_image_render_and_token_ids() {
|
||||||
|
let node = AstNode::image(vec![0u8, 1, 2, 3], "image/png", 512, 512);
|
||||||
|
let leaf = node.leaf().unwrap();
|
||||||
|
// 3 tokens of bookend + 256 image_pad tokens
|
||||||
|
assert_eq!(leaf.token_ids().len(), 258);
|
||||||
|
assert_eq!(leaf.token_ids()[0], tokenizer::VISION_START);
|
||||||
|
assert_eq!(leaf.token_ids()[257], tokenizer::VISION_END);
|
||||||
|
for pad in &leaf.token_ids()[1..257] {
|
||||||
|
assert_eq!(*pad, tokenizer::IMAGE_PAD);
|
||||||
|
}
|
||||||
|
// Rendered text has the expected bookends.
|
||||||
|
let rendered = leaf.body().render();
|
||||||
|
assert!(rendered.starts_with("<|vision_start|>"));
|
||||||
|
assert!(rendered.ends_with("<|vision_end|>"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_wire_prompt_collapses_image_pads() {
|
||||||
|
let mut ctx = ContextState::new();
|
||||||
|
ctx.push_no_log(Section::Conversation, AstNode::branch(Role::User, vec![
|
||||||
|
AstNode::content("look:"),
|
||||||
|
AstNode::image(vec![0xDE, 0xAD], "image/png", 512, 512),
|
||||||
|
]));
|
||||||
|
|
||||||
|
// AST side: N image_pads + bookends, full budget accounting.
|
||||||
|
let full = ctx.token_ids();
|
||||||
|
let n_image_pads_full = full.iter()
|
||||||
|
.filter(|&&t| t == tokenizer::IMAGE_PAD).count();
|
||||||
|
assert_eq!(n_image_pads_full, qwen3_image_token_count(512, 512) as usize);
|
||||||
|
|
||||||
|
// Wire side: single image_pad, bytes moved to images list.
|
||||||
|
let (wire, images) = ctx.wire_prompt();
|
||||||
|
let n_image_pads_wire = wire.iter()
|
||||||
|
.filter(|&&t| t == tokenizer::IMAGE_PAD).count();
|
||||||
|
assert_eq!(n_image_pads_wire, 1);
|
||||||
|
assert_eq!(images.len(), 1);
|
||||||
|
assert_eq!(images[0].bytes, vec![0xDE, 0xAD]);
|
||||||
|
assert_eq!(images[0].mime, "image/png");
|
||||||
|
|
||||||
|
// vision_start/vision_end bookends are preserved in wire form.
|
||||||
|
assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_START).count(), 1);
|
||||||
|
assert_eq!(wire.iter().filter(|&&t| t == tokenizer::VISION_END).count(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_image_serde_roundtrip() {
|
||||||
|
let node = AstNode::image(vec![0xDE, 0xAD, 0xBE, 0xEF], "image/png", 64, 64);
|
||||||
|
let json = serde_json::to_string(&node).unwrap();
|
||||||
|
// bytes must be base64-encoded in the JSON form
|
||||||
|
assert!(json.contains("3q2+7w=="));
|
||||||
|
let back: AstNode = serde_json::from_str(&json).unwrap();
|
||||||
|
let leaf = back.leaf().unwrap();
|
||||||
|
match leaf.body() {
|
||||||
|
NodeBody::Image { bytes, mime, orig_height, orig_width, token_count } => {
|
||||||
|
assert_eq!(bytes, &[0xDE, 0xAD, 0xBE, 0xEF]);
|
||||||
|
assert_eq!(mime, "image/png");
|
||||||
|
assert_eq!(*orig_height, 64);
|
||||||
|
assert_eq!(*orig_width, 64);
|
||||||
|
assert_eq!(*token_count, qwen3_image_token_count(64, 64));
|
||||||
|
}
|
||||||
|
other => panic!("expected Image, got {:?}", other),
|
||||||
|
}
|
||||||
|
// token_ids are recomputed on deserialization
|
||||||
|
assert_eq!(leaf.token_ids().len(), leaf.tokens());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_timestamp_present_accepted() {
|
fn test_timestamp_present_accepted() {
|
||||||
let json = r#"{"Leaf":{"body":{"Content":"hi"},"timestamp":"2026-04-16T12:00:00Z"}}"#;
|
let json = r#"{"Leaf":{"body":{"Content":"hi"},"timestamp":"2026-04-16T12:00:00Z"}}"#;
|
||||||
|
|
|
||||||
|
|
@ -285,16 +285,23 @@ impl Agent {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn assemble_prompt_tokens(&self) -> Vec<u32> {
|
pub async fn assemble_prompt_tokens(&self) -> Vec<u32> {
|
||||||
|
self.assemble_prompt().await.0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Assemble a ready-to-send prompt: token stream in wire form (each
|
||||||
|
/// image collapsed to a single `<|image_pad|>`) paired with the
|
||||||
|
/// images to attach as multi_modal_data.
|
||||||
|
pub async fn assemble_prompt(&self) -> (Vec<u32>, Vec<context::WireImage>) {
|
||||||
let ctx = self.context.lock().await;
|
let ctx = self.context.lock().await;
|
||||||
let st = self.state.lock().await;
|
let st = self.state.lock().await;
|
||||||
let mut tokens = ctx.token_ids();
|
let (mut tokens, images) = ctx.wire_prompt();
|
||||||
tokens.push(tokenizer::IM_START);
|
tokens.push(tokenizer::IM_START);
|
||||||
if st.think_native {
|
if st.think_native {
|
||||||
tokens.extend(tokenizer::encode("assistant\n<think>\n"));
|
tokens.extend(tokenizer::encode("assistant\n<think>\n"));
|
||||||
} else {
|
} else {
|
||||||
tokens.extend(tokenizer::encode("assistant\n"));
|
tokens.extend(tokenizer::encode("assistant\n"));
|
||||||
}
|
}
|
||||||
tokens
|
(tokens, images)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Rebuild the tools section of the system prompt from the current tools list.
|
/// Rebuild the tools section of the system prompt from the current tools list.
|
||||||
|
|
@ -354,10 +361,11 @@ impl Agent {
|
||||||
let _thinking = start_activity(&agent, "thinking...").await;
|
let _thinking = start_activity(&agent, "thinking...").await;
|
||||||
|
|
||||||
let (rx, _stream_guard) = {
|
let (rx, _stream_guard) = {
|
||||||
let prompt_tokens = agent.assemble_prompt_tokens().await;
|
let (prompt_tokens, images) = agent.assemble_prompt().await;
|
||||||
let st = agent.state.lock().await;
|
let st = agent.state.lock().await;
|
||||||
agent.client.stream_completion(
|
agent.client.stream_completion_mm(
|
||||||
&prompt_tokens,
|
&prompt_tokens,
|
||||||
|
&images,
|
||||||
api::SamplingParams {
|
api::SamplingParams {
|
||||||
temperature: st.temperature,
|
temperature: st.temperature,
|
||||||
top_p: st.top_p,
|
top_p: st.top_p,
|
||||||
|
|
@ -575,20 +583,9 @@ impl Agent {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn compact(&self) {
|
pub async fn compact(&self) {
|
||||||
match crate::config::reload_context().await {
|
// Identity section is left in place — mid-session rebuilds discard
|
||||||
Ok(personality) => {
|
// memory scores. Content edits to personality nodes get picked up at
|
||||||
let mut ctx = self.context.lock().await;
|
// the next restart via new() + restore_from_log().
|
||||||
// System section (prompt + tools) set by new(), don't touch it
|
|
||||||
ctx.clear(Section::Identity);
|
|
||||||
for (name, content) in &personality {
|
|
||||||
ctx.push_no_log(Section::Identity, AstNode::memory(name, content));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
dbglog!("warning: failed to reload identity: {:#}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.load_startup_journal().await;
|
self.load_startup_journal().await;
|
||||||
|
|
||||||
self.context.lock().await.trim_conversation();
|
self.context.lock().await.trim_conversation();
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,9 @@ static TOKENIZER: OnceLock<Tokenizer> = OnceLock::new();
|
||||||
/// Special token IDs for Qwen 3.5
|
/// Special token IDs for Qwen 3.5
|
||||||
pub const IM_START: u32 = 248045;
|
pub const IM_START: u32 = 248045;
|
||||||
pub const IM_END: u32 = 248046;
|
pub const IM_END: u32 = 248046;
|
||||||
|
pub const VISION_START: u32 = 248053;
|
||||||
|
pub const VISION_END: u32 = 248054;
|
||||||
|
pub const IMAGE_PAD: u32 = 248056;
|
||||||
|
|
||||||
/// Initialize the global tokenizer from a file path.
|
/// Initialize the global tokenizer from a file path.
|
||||||
/// Call once at startup. Panics if the file can't be loaded.
|
/// Call once at startup. Panics if the file can't be loaded.
|
||||||
|
|
|
||||||
|
|
@ -242,13 +242,7 @@ pub fn summarize_args(tool_name: &str, args: &serde_json::Value) -> String {
|
||||||
.as_str()
|
.as_str()
|
||||||
.unwrap_or("")
|
.unwrap_or("")
|
||||||
.to_string(),
|
.to_string(),
|
||||||
"view_image" => {
|
"view_image" => args["file_path"].as_str().unwrap_or("").to_string(),
|
||||||
if let Some(pane) = args["pane_id"].as_str() {
|
|
||||||
format!("pane {}", pane)
|
|
||||||
} else {
|
|
||||||
args["file_path"].as_str().unwrap_or("").to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"journal" => {
|
"journal" => {
|
||||||
let entry = args["entry"].as_str().unwrap_or("");
|
let entry = args["entry"].as_str().unwrap_or("");
|
||||||
if entry.len() > 60 {
|
if entry.len() > 60 {
|
||||||
|
|
|
||||||
|
|
@ -1,96 +1,71 @@
|
||||||
use std::sync::Arc;
|
|
||||||
// tools/vision.rs — Image viewing tool
|
// tools/vision.rs — Image viewing tool
|
||||||
//
|
//
|
||||||
// Reads image files from disk and returns them as base64 data URIs
|
// Reads an image file from disk, decodes its dimensions, and injects it
|
||||||
// for multimodal models. Also supports capturing tmux pane contents
|
// into the context as a user-role message containing a NodeBody::Image
|
||||||
// as screenshots.
|
// leaf. The leaf carries raw bytes; the API layer extracts them into
|
||||||
|
// multi_modal_data when building vLLM requests.
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use base64::Engine;
|
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
|
use crate::agent::context::{AstNode, Role, Section};
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct Args {
|
struct Args {
|
||||||
file_path: Option<String>,
|
file_path: String,
|
||||||
pane_id: Option<String>,
|
|
||||||
#[serde(default = "default_lines")]
|
|
||||||
lines: usize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_lines() -> usize { 50 }
|
|
||||||
|
|
||||||
pub fn tool() -> super::Tool {
|
pub fn tool() -> super::Tool {
|
||||||
super::Tool {
|
super::Tool {
|
||||||
name: "view_image",
|
name: "view_image",
|
||||||
description: "View an image file or capture a tmux pane screenshot. Supports PNG, JPEG, GIF, WebP. Use pane_id to capture a tmux pane instead.",
|
description: "View an image file. Supports PNG, JPEG, GIF, WebP, BMP. The image is inserted into the conversation and can be analyzed by the vision model.",
|
||||||
parameters_json: r#"{"type":"object","properties":{"file_path":{"type":"string","description":"Path to an image file"},"pane_id":{"type":"string","description":"Tmux pane ID to capture (e.g. '0:1.0')"},"lines":{"type":"integer","description":"Lines to capture from tmux pane (default 50)"}}}"#,
|
parameters_json: r#"{"type":"object","properties":{"file_path":{"type":"string","description":"Path to the image file"}},"required":["file_path"]}"#,
|
||||||
handler: Arc::new(|_a, v| Box::pin(async move { view_image_text(&v) })),
|
handler: Arc::new(|agent, v| Box::pin(async move {
|
||||||
|
view_image(agent, v).await
|
||||||
|
})),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn view_image_text(args: &serde_json::Value) -> anyhow::Result<String> {
|
const MAX_SIZE: usize = 20 * 1024 * 1024;
|
||||||
let a: Args = serde_json::from_value(args.clone())
|
|
||||||
|
async fn view_image(
|
||||||
|
agent: Option<Arc<crate::agent::Agent>>,
|
||||||
|
args: serde_json::Value,
|
||||||
|
) -> Result<String> {
|
||||||
|
let a: Args = serde_json::from_value(args)
|
||||||
.context("invalid view_image arguments")?;
|
.context("invalid view_image arguments")?;
|
||||||
|
|
||||||
if let Some(ref pane_id) = a.pane_id {
|
let path = std::path::Path::new(&a.file_path);
|
||||||
return capture_tmux_pane(pane_id, a.lines);
|
|
||||||
}
|
|
||||||
|
|
||||||
let file_path = a.file_path
|
|
||||||
.as_deref()
|
|
||||||
.context("view_image requires either file_path or pane_id")?;
|
|
||||||
|
|
||||||
let path = std::path::Path::new(file_path);
|
|
||||||
if !path.exists() {
|
if !path.exists() {
|
||||||
anyhow::bail!("File not found: {}", file_path);
|
anyhow::bail!("file not found: {}", a.file_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
let data = std::fs::read(path).with_context(|| format!("Failed to read {}", file_path))?;
|
let bytes = std::fs::read(path)
|
||||||
|
.with_context(|| format!("reading {}", a.file_path))?;
|
||||||
|
|
||||||
// Sanity check file size (don't send huge images)
|
if bytes.len() > MAX_SIZE {
|
||||||
const MAX_SIZE: usize = 20 * 1024 * 1024; // 20 MB
|
|
||||||
if data.len() > MAX_SIZE {
|
|
||||||
anyhow::bail!(
|
anyhow::bail!(
|
||||||
"Image too large: {} bytes (max {} MB)",
|
"image too large: {} bytes (max {} MB)",
|
||||||
data.len(),
|
bytes.len(), MAX_SIZE / (1024 * 1024),
|
||||||
MAX_SIZE / (1024 * 1024)
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let dim = imagesize::blob_size(&bytes)
|
||||||
|
.with_context(|| format!("decoding dimensions of {}", a.file_path))?;
|
||||||
|
let (w, h) = (dim.width as u32, dim.height as u32);
|
||||||
let mime = mime_from_extension(path);
|
let mime = mime_from_extension(path);
|
||||||
let b64 = base64::engine::general_purpose::STANDARD.encode(&data);
|
|
||||||
let data_uri = format!("data:{};base64,{}", mime, b64);
|
|
||||||
|
|
||||||
Ok(format!("Image loaded: {} ({}, {} bytes)\n{}", file_path, mime, data.len(), data_uri))
|
let image_leaf = AstNode::image(bytes.clone(), mime, h, w);
|
||||||
}
|
let token_count = image_leaf.leaf().unwrap().tokens().saturating_sub(2);
|
||||||
|
|
||||||
/// Capture a tmux pane's text content.
|
let agent = agent.context("view_image requires agent context")?;
|
||||||
fn capture_tmux_pane(pane_id: &str, lines: usize) -> Result<String> {
|
let branch = AstNode::branch(Role::User, vec![image_leaf]);
|
||||||
|
agent.context.lock().await.push_log(Section::Conversation, branch);
|
||||||
|
|
||||||
// Use tmux capture-pane to get text content, then render to image
|
Ok(format!("loaded {} ({}, {}x{}, {} tokens)",
|
||||||
// via a simple approach: capture text and return it (the model can
|
a.file_path, mime, w, h, token_count))
|
||||||
// read text directly, which is often more useful than a screenshot).
|
|
||||||
//
|
|
||||||
// For actual pixel-level screenshots we'd need a terminal renderer,
|
|
||||||
// but text capture covers 95% of use cases.
|
|
||||||
let output = std::process::Command::new("tmux")
|
|
||||||
.args(["capture-pane", "-t", pane_id, "-p", "-S", &format!("-{}", lines)])
|
|
||||||
.output()
|
|
||||||
.context("Failed to run tmux capture-pane")?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
||||||
anyhow::bail!("tmux capture-pane failed: {}", stderr.trim());
|
|
||||||
}
|
|
||||||
|
|
||||||
let text = String::from_utf8_lossy(&output.stdout).to_string();
|
|
||||||
|
|
||||||
// Return as text — the model can read terminal output directly.
|
|
||||||
// This is actually more useful than a screenshot for most tasks.
|
|
||||||
Ok(format!(
|
|
||||||
"Tmux pane {} (last {} lines):\n```\n{}\n```",
|
|
||||||
pane_id, lines, text.trim_end()
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn mime_from_extension(path: &std::path::Path) -> &'static str {
|
fn mime_from_extension(path: &std::path::Path) -> &'static str {
|
||||||
|
|
@ -104,8 +79,7 @@ fn mime_from_extension(path: &std::path::Path) -> &'static str {
|
||||||
Some("jpg" | "jpeg") => "image/jpeg",
|
Some("jpg" | "jpeg") => "image/jpeg",
|
||||||
Some("gif") => "image/gif",
|
Some("gif") => "image/gif",
|
||||||
Some("webp") => "image/webp",
|
Some("webp") => "image/webp",
|
||||||
Some("svg") => "image/svg+xml",
|
|
||||||
Some("bmp") => "image/bmp",
|
Some("bmp") => "image/bmp",
|
||||||
_ => "image/png", // default assumption
|
_ => "application/octet-stream",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,9 @@ static CONFIG: OnceLock<RwLock<Arc<Config>>> = OnceLock::new();
|
||||||
fn default_stream_timeout() -> u64 { 60 }
|
fn default_stream_timeout() -> u64 { 60 }
|
||||||
fn default_scoring_interval_secs() -> u64 { 3600 } // 1 hour
|
fn default_scoring_interval_secs() -> u64 { 3600 } // 1 hour
|
||||||
fn default_scoring_response_window() -> usize { 100 }
|
fn default_scoring_response_window() -> usize { 100 }
|
||||||
|
fn default_surface_hooks() -> Vec<String> {
|
||||||
|
vec!["UserPromptSubmit".into(), "PostToolUse".into(), "Stop".into()]
|
||||||
|
}
|
||||||
fn default_node_weight() -> f64 { 0.7 }
|
fn default_node_weight() -> f64 { 0.7 }
|
||||||
fn default_edge_decay() -> f64 { 0.3 }
|
fn default_edge_decay() -> f64 { 0.3 }
|
||||||
fn default_max_hops() -> u32 { 3 }
|
fn default_max_hops() -> u32 { 3 }
|
||||||
|
|
@ -73,6 +76,10 @@ pub struct Config {
|
||||||
/// Max conversation bytes to include in surface agent context.
|
/// Max conversation bytes to include in surface agent context.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub surface_conversation_bytes: Option<usize>,
|
pub surface_conversation_bytes: Option<usize>,
|
||||||
|
/// Claude Code hook events that trigger agent cycles (surface-observe,
|
||||||
|
/// reflect, journal). Read by consciousness-claude/src/hook.rs.
|
||||||
|
#[serde(default = "default_surface_hooks")]
|
||||||
|
pub surface_hooks: Vec<String>,
|
||||||
|
|
||||||
// Spreading activation parameters
|
// Spreading activation parameters
|
||||||
#[serde(default = "default_node_weight")]
|
#[serde(default = "default_node_weight")]
|
||||||
|
|
@ -104,6 +111,7 @@ impl Default for Config {
|
||||||
"separator".into(), "split".into(),
|
"separator".into(), "split".into(),
|
||||||
],
|
],
|
||||||
surface_conversation_bytes: None,
|
surface_conversation_bytes: None,
|
||||||
|
surface_hooks: default_surface_hooks(),
|
||||||
mcp_servers: vec![],
|
mcp_servers: vec![],
|
||||||
lsp_servers: vec![],
|
lsp_servers: vec![],
|
||||||
default_node_weight: default_node_weight(),
|
default_node_weight: default_node_weight(),
|
||||||
|
|
|
||||||
|
|
@ -482,6 +482,14 @@ async fn main() {
|
||||||
|
|
||||||
let cli = Cli::parse();
|
let cli = Cli::parse();
|
||||||
|
|
||||||
|
// Some subcommands (e.g. admin load-context) read from the global
|
||||||
|
// AppConfig. poc-memory has no config CLI flags of its own, so load
|
||||||
|
// with defaults — figment still pulls from ~/.consciousness/config.json5
|
||||||
|
// and env the same way.
|
||||||
|
if let Err(e) = crate::config::load_app(&crate::user::CliArgs::default()) {
|
||||||
|
eprintln!("warning: failed to load config: {:#}", e);
|
||||||
|
}
|
||||||
|
|
||||||
if let Err(e) = cli.command.run().await {
|
if let Err(e) = cli.command.run().await {
|
||||||
eprintln!("Error: {}", e);
|
eprintln!("Error: {}", e);
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
|
|
|
||||||
|
|
@ -103,9 +103,13 @@ fn collect_memory_scores(ctx: &ContextState) -> std::collections::BTreeMap<Strin
|
||||||
|
|
||||||
/// Save memory scores to disk.
|
/// Save memory scores to disk.
|
||||||
fn save_memory_scores(scores: &std::collections::BTreeMap<String, f64>, path: &std::path::Path) {
|
fn save_memory_scores(scores: &std::collections::BTreeMap<String, f64>, path: &std::path::Path) {
|
||||||
if let Ok(json) = serde_json::to_string_pretty(scores) {
|
match serde_json::to_string_pretty(scores) {
|
||||||
let _ = std::fs::write(path, json);
|
Ok(json) => match std::fs::write(path, &json) {
|
||||||
dbglog!("[scoring] saved {} scores to {}", scores.len(), path.display());
|
Ok(()) => dbglog!("[scoring] saved {} scores to {} ({} bytes)",
|
||||||
|
scores.len(), path.display(), json.len()),
|
||||||
|
Err(e) => dbglog!("[scoring] save FAILED ({}): {}", path.display(), e),
|
||||||
|
},
|
||||||
|
Err(e) => dbglog!("[scoring] serialize FAILED: {}", e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -506,6 +510,17 @@ impl Mind {
|
||||||
// Load persistent subconscious state
|
// Load persistent subconscious state
|
||||||
let state_path = self.config.session_dir.join("subconscious-state.json");
|
let state_path = self.config.session_dir.join("subconscious-state.json");
|
||||||
self.subconscious.lock().await.set_state_path(state_path);
|
self.subconscious.lock().await.set_state_path(state_path);
|
||||||
|
|
||||||
|
// Kick off an incremental scoring pass on startup so memories due
|
||||||
|
// for re-scoring get evaluated without requiring a user message.
|
||||||
|
{
|
||||||
|
let mut s = self.shared.lock().unwrap();
|
||||||
|
if !s.scoring_in_flight {
|
||||||
|
s.scoring_in_flight = true;
|
||||||
|
drop(s);
|
||||||
|
self.start_memory_scoring();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn turn_watch(&self) -> tokio::sync::watch::Receiver<bool> {
|
pub fn turn_watch(&self) -> tokio::sync::watch::Receiver<bool> {
|
||||||
|
|
@ -619,14 +634,40 @@ impl Mind {
|
||||||
let mut ctx = agent.context.lock().await;
|
let mut ctx = agent.context.lock().await;
|
||||||
// Find memory by key in identity or conversation
|
// Find memory by key in identity or conversation
|
||||||
let found = find_memory_by_key(&ctx, &key);
|
let found = find_memory_by_key(&ctx, &key);
|
||||||
if let Some((section, i)) = found {
|
match found {
|
||||||
ctx.set_score(section, i, Some(score));
|
Some((section, i)) => {
|
||||||
|
ctx.set_score(section, i, Some(score));
|
||||||
|
let nodes: &[crate::agent::context::AstNode] = match section {
|
||||||
|
Section::Identity => ctx.identity(),
|
||||||
|
Section::Conversation => ctx.conversation(),
|
||||||
|
_ => &[],
|
||||||
|
};
|
||||||
|
let read_back = match nodes.get(i) {
|
||||||
|
Some(crate::agent::context::AstNode::Leaf(l)) => match l.body() {
|
||||||
|
crate::agent::context::NodeBody::Memory { score, .. } => format!("{:?}", score),
|
||||||
|
_ => "not-memory".to_string(),
|
||||||
|
},
|
||||||
|
_ => "out-of-bounds".to_string(),
|
||||||
|
};
|
||||||
|
dbglog!("[scoring] persisted {} → {:.3} ({:?}[{}]) read_back={}",
|
||||||
|
key, score, section, i, read_back);
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
dbglog!(
|
||||||
|
"[scoring] DROP {}: find_memory_by_key None (id={}, cv={})",
|
||||||
|
key, ctx.identity().len(), ctx.conversation().len()
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
let snapshot = collect_memory_scores(&ctx);
|
let snapshot = collect_memory_scores(&ctx);
|
||||||
|
let in_snapshot = snapshot.contains_key(&key);
|
||||||
|
dbglog!("[scoring] snapshot size={} contains({})={}",
|
||||||
|
snapshot.len(), key, in_snapshot);
|
||||||
drop(ctx);
|
drop(ctx);
|
||||||
agent.state.lock().await.changed.notify_one();
|
agent.state.lock().await.changed.notify_one();
|
||||||
snapshot
|
snapshot
|
||||||
};
|
};
|
||||||
|
dbglog!("[scoring] about to save {} entries", scores_snapshot.len());
|
||||||
save_memory_scores(&scores_snapshot, &path);
|
save_memory_scores(&scores_snapshot, &path);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -486,6 +486,11 @@ impl InteractScreen {
|
||||||
if t.is_empty() { vec![] }
|
if t.is_empty() { vec![] }
|
||||||
else { vec![(PaneTarget::ToolResult, text, Marker::None)] }
|
else { vec![(PaneTarget::ToolResult, text, Marker::None)] }
|
||||||
}
|
}
|
||||||
|
NodeBody::Image { orig_height, orig_width, .. } => {
|
||||||
|
vec![(PaneTarget::Conversation,
|
||||||
|
format!("[image {}x{}]", orig_width, orig_height),
|
||||||
|
Marker::None)]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
AstNode::Branch { role, children, .. } => {
|
AstNode::Branch { role, children, .. } => {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue