context: tighten timestamp schema; every AstNode has one
Previously NodeLeaf.timestamp and AstNode::Branch.timestamp accepted null or missing via a deserialize_timestamp_or_epoch fallback — legacy entries in conversation.jsonl from before Branch timestamps existed (and from before chrono serialization was wired up) would load with UNIX_EPOCH as a sentinel. Downstream, node_timestamp_ns() returned Option<i64> and callers had to handle None as "old entry, skip." That second filter was silently dropping every candidate in score_finetune_candidates when scoring an older session — the F6 screen showed "0 above threshold" even when max_divergence was orders of magnitude above the threshold, because every entry was failing the None check, not the divergence check. The fix, in three parts: 1. src/bin/fix-timestamps.rs — one-off migration tool that walks a conversation.jsonl, linearly interpolates timestamps for entries stuck at UNIX_EPOCH (using surrounding real timestamps as anchors), propagates to child leaves with per-sibling ns offsets, and bumps any collisions by 1 ns for uniqueness. Ran against the current session's log: 11887 entries, 72289 ns bumps, all unique. 2. context.rs — drop default_timestamp and deserialize_timestamp_or_epoch. NodeLeaf and Branch now require a present non-null timestamp on deserialize. Tests flip from "missing/null → UNIX_EPOCH" to "missing/null → Err." 3. subconscious/learn.rs — node_timestamp_ns now returns i64, not Option<i64>. The matching caller in score_finetune_candidates collapses from a Some/None match to a single trained-set check. mind/log.rs's oldest_timestamp no longer filters UNIX_EPOCH. Every line currently on disk has already been migrated. Going forward, new AstNodes always carry real timestamps (Utc::now() at construction time), so the strict schema is the invariant, not an aspiration. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
77822992c8
commit
080b4f9084
4 changed files with 210 additions and 71 deletions
|
|
@ -85,19 +85,6 @@ pub enum NodeBody {
|
|||
Log(String),
|
||||
}
|
||||
|
||||
fn default_timestamp() -> DateTime<Utc> {
|
||||
DateTime::UNIX_EPOCH
|
||||
}
|
||||
|
||||
/// Deserialize timestamp, treating both missing and null as UNIX_EPOCH.
|
||||
fn deserialize_timestamp_or_epoch<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
let opt: Option<DateTime<Utc>> = Option::deserialize(deserializer)?;
|
||||
Ok(opt.unwrap_or(DateTime::UNIX_EPOCH))
|
||||
}
|
||||
|
||||
/// A leaf node: typed content with cached token IDs.
|
||||
/// Token IDs are not serialized — they're recomputed on deserialization.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
|
|
@ -113,7 +100,6 @@ impl<'de> Deserialize<'de> for NodeLeaf {
|
|||
#[derive(Deserialize)]
|
||||
struct Raw {
|
||||
body: NodeBody,
|
||||
#[serde(default = "default_timestamp", deserialize_with = "deserialize_timestamp_or_epoch")]
|
||||
timestamp: DateTime<Utc>,
|
||||
}
|
||||
let raw = Raw::deserialize(deserializer)?;
|
||||
|
|
@ -133,7 +119,6 @@ pub enum AstNode {
|
|||
Branch {
|
||||
role: Role,
|
||||
children: Vec<AstNode>,
|
||||
#[serde(default = "default_timestamp", deserialize_with = "deserialize_timestamp_or_epoch")]
|
||||
timestamp: DateTime<Utc>,
|
||||
/// Per-response memory attribution from full scoring matrix.
|
||||
/// Maps memory key → divergence score for this response.
|
||||
|
|
@ -1363,45 +1348,31 @@ mod tests {
|
|||
// -- Timestamp deserialization tests ------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_null_becomes_epoch() {
|
||||
// Old conversation.jsonl entries have "timestamp":null
|
||||
// serde(default) only handles missing fields, not explicit nulls.
|
||||
// We need to verify our deserialize handles this correctly.
|
||||
fn test_timestamp_null_rejected() {
|
||||
// Missing/null timestamps used to be accepted via a lenient
|
||||
// deserialize fallback. Post-migration the schema is strict.
|
||||
let json = r#"{"Leaf":{"body":{"Content":"hello"},"timestamp":null}}"#;
|
||||
let node: AstNode = serde_json::from_str(json).unwrap();
|
||||
let leaf = node.leaf().unwrap();
|
||||
assert_eq!(leaf.timestamp(), DateTime::<Utc>::UNIX_EPOCH);
|
||||
assert!(serde_json::from_str::<AstNode>(json).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_missing_becomes_epoch() {
|
||||
fn test_timestamp_missing_rejected() {
|
||||
let json = r#"{"Leaf":{"body":{"Content":"hello"}}}"#;
|
||||
assert!(serde_json::from_str::<AstNode>(json).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_branch_timestamp_missing_rejected() {
|
||||
let json = r#"{"Branch":{"role":"User","children":[]}}"#;
|
||||
assert!(serde_json::from_str::<AstNode>(json).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_present_accepted() {
|
||||
let json = r#"{"Leaf":{"body":{"Content":"hi"},"timestamp":"2026-04-16T12:00:00Z"}}"#;
|
||||
let node: AstNode = serde_json::from_str(json).unwrap();
|
||||
let leaf = node.leaf().unwrap();
|
||||
assert_eq!(leaf.timestamp(), DateTime::<Utc>::UNIX_EPOCH);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_branch_timestamp_null_becomes_epoch() {
|
||||
let json = r#"{"Branch":{"role":"User","children":[{"Leaf":{"body":{"Content":"hi"}}}],"timestamp":null}}"#;
|
||||
let node: AstNode = serde_json::from_str(json).unwrap();
|
||||
match node {
|
||||
AstNode::Branch { timestamp, .. } => {
|
||||
assert_eq!(timestamp, DateTime::<Utc>::UNIX_EPOCH);
|
||||
}
|
||||
_ => panic!("expected Branch"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_branch_timestamp_missing_becomes_epoch() {
|
||||
let json = r#"{"Branch":{"role":"User","children":[{"Leaf":{"body":{"Content":"hi"}}}]}}"#;
|
||||
let node: AstNode = serde_json::from_str(json).unwrap();
|
||||
match node {
|
||||
AstNode::Branch { timestamp, .. } => {
|
||||
assert_eq!(timestamp, DateTime::<Utc>::UNIX_EPOCH);
|
||||
}
|
||||
_ => panic!("expected Branch"),
|
||||
}
|
||||
assert_eq!(leaf.timestamp().to_rfc3339(),
|
||||
"2026-04-16T12:00:00+00:00");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue