Custom Deserialize for NodeLeaf: recompute tokens on deserialization
token_ids are not serialized (serde skip), so deserialized nodes had 0 tokens. The custom Deserialize impl recomputes tokens from the body text, restoring the invariant at the reconstruction boundary. No separate recompute step needed. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
a09567849f
commit
5c9590ada7
1 changed files with 19 additions and 1 deletions
|
|
@ -63,7 +63,8 @@ pub enum NodeBody {
|
|||
}
|
||||
|
||||
/// A leaf node: typed content with cached token IDs.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
/// Token IDs are not serialized — they're recomputed on deserialization.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct NodeLeaf {
|
||||
body: NodeBody,
|
||||
#[serde(skip)]
|
||||
|
|
@ -71,6 +72,23 @@ pub struct NodeLeaf {
|
|||
timestamp: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for NodeLeaf {
|
||||
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
|
||||
#[derive(Deserialize)]
|
||||
struct Raw {
|
||||
body: NodeBody,
|
||||
timestamp: Option<DateTime<Utc>>,
|
||||
}
|
||||
let raw = Raw::deserialize(deserializer)?;
|
||||
let token_ids = if raw.body.is_prompt_visible() {
|
||||
tokenizer::encode(&raw.body.render())
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
Ok(NodeLeaf { body: raw.body, token_ids, timestamp: raw.timestamp })
|
||||
}
|
||||
}
|
||||
|
||||
/// A node in the context AST.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum AstNode {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue