context: tighten timestamp schema; every AstNode has one

Previously NodeLeaf.timestamp and AstNode::Branch.timestamp accepted
null or missing via a deserialize_timestamp_or_epoch fallback — legacy
entries in conversation.jsonl from before Branch timestamps existed
(and from before chrono serialization was wired up) would load with
UNIX_EPOCH as a sentinel. Downstream, node_timestamp_ns() returned
Option<i64> and callers had to handle None as "old entry, skip."

That second filter was silently dropping every candidate in
score_finetune_candidates when scoring an older session — the F6
screen showed "0 above threshold" even when max_divergence was
orders of magnitude above the threshold, because every entry was
failing the None check, not the divergence check.

The fix, in three parts:

1. src/bin/fix-timestamps.rs — one-off migration tool that walks a
   conversation.jsonl, linearly interpolates timestamps for entries
   stuck at UNIX_EPOCH (using surrounding real timestamps as anchors),
   propagates to child leaves with per-sibling ns offsets, and bumps
   any collisions by 1 ns for uniqueness. Ran against the current
   session's log: 11887 entries, 72289 ns bumps, all unique.

2. context.rs — drop default_timestamp and
   deserialize_timestamp_or_epoch. NodeLeaf and Branch now require a
   present non-null timestamp on deserialize. Tests flip from
   "missing/null → UNIX_EPOCH" to "missing/null → Err."

3. subconscious/learn.rs — node_timestamp_ns now returns i64, not
   Option<i64>. The matching caller in score_finetune_candidates
   collapses from a Some/None match to a single trained-set check.
   mind/log.rs's oldest_timestamp no longer filters UNIX_EPOCH.

Every line currently on disk has already been migrated. Going
forward, new AstNodes always carry real timestamps (Utc::now() at
construction time), so the strict schema is the invariant, not an
aspiration.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-04-16 12:35:16 -04:00
parent 77822992c8
commit 080b4f9084
4 changed files with 210 additions and 71 deletions

View file

@ -85,19 +85,6 @@ pub enum NodeBody {
Log(String),
}
fn default_timestamp() -> DateTime<Utc> {
DateTime::UNIX_EPOCH
}
/// Deserialize timestamp, treating both missing and null as UNIX_EPOCH.
fn deserialize_timestamp_or_epoch<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
where
D: serde::Deserializer<'de>,
{
let opt: Option<DateTime<Utc>> = Option::deserialize(deserializer)?;
Ok(opt.unwrap_or(DateTime::UNIX_EPOCH))
}
/// A leaf node: typed content with cached token IDs.
/// Token IDs are not serialized — they're recomputed on deserialization.
#[derive(Debug, Clone, Serialize)]
@ -113,7 +100,6 @@ impl<'de> Deserialize<'de> for NodeLeaf {
#[derive(Deserialize)]
struct Raw {
body: NodeBody,
#[serde(default = "default_timestamp", deserialize_with = "deserialize_timestamp_or_epoch")]
timestamp: DateTime<Utc>,
}
let raw = Raw::deserialize(deserializer)?;
@ -133,7 +119,6 @@ pub enum AstNode {
Branch {
role: Role,
children: Vec<AstNode>,
#[serde(default = "default_timestamp", deserialize_with = "deserialize_timestamp_or_epoch")]
timestamp: DateTime<Utc>,
/// Per-response memory attribution from full scoring matrix.
/// Maps memory key → divergence score for this response.
@ -1363,45 +1348,31 @@ mod tests {
// -- Timestamp deserialization tests ------------------------------------------
#[test]
fn test_timestamp_null_becomes_epoch() {
// Old conversation.jsonl entries have "timestamp":null
// serde(default) only handles missing fields, not explicit nulls.
// We need to verify our deserialize handles this correctly.
fn test_timestamp_null_rejected() {
// Missing/null timestamps used to be accepted via a lenient
// deserialize fallback. Post-migration the schema is strict.
let json = r#"{"Leaf":{"body":{"Content":"hello"},"timestamp":null}}"#;
let node: AstNode = serde_json::from_str(json).unwrap();
let leaf = node.leaf().unwrap();
assert_eq!(leaf.timestamp(), DateTime::<Utc>::UNIX_EPOCH);
assert!(serde_json::from_str::<AstNode>(json).is_err());
}
#[test]
fn test_timestamp_missing_becomes_epoch() {
fn test_timestamp_missing_rejected() {
let json = r#"{"Leaf":{"body":{"Content":"hello"}}}"#;
assert!(serde_json::from_str::<AstNode>(json).is_err());
}
#[test]
fn test_branch_timestamp_missing_rejected() {
let json = r#"{"Branch":{"role":"User","children":[]}}"#;
assert!(serde_json::from_str::<AstNode>(json).is_err());
}
#[test]
fn test_timestamp_present_accepted() {
let json = r#"{"Leaf":{"body":{"Content":"hi"},"timestamp":"2026-04-16T12:00:00Z"}}"#;
let node: AstNode = serde_json::from_str(json).unwrap();
let leaf = node.leaf().unwrap();
assert_eq!(leaf.timestamp(), DateTime::<Utc>::UNIX_EPOCH);
}
#[test]
fn test_branch_timestamp_null_becomes_epoch() {
let json = r#"{"Branch":{"role":"User","children":[{"Leaf":{"body":{"Content":"hi"}}}],"timestamp":null}}"#;
let node: AstNode = serde_json::from_str(json).unwrap();
match node {
AstNode::Branch { timestamp, .. } => {
assert_eq!(timestamp, DateTime::<Utc>::UNIX_EPOCH);
}
_ => panic!("expected Branch"),
}
}
#[test]
fn test_branch_timestamp_missing_becomes_epoch() {
let json = r#"{"Branch":{"role":"User","children":[{"Leaf":{"body":{"Content":"hi"}}}]}}"#;
let node: AstNode = serde_json::from_str(json).unwrap();
match node {
AstNode::Branch { timestamp, .. } => {
assert_eq!(timestamp, DateTime::<Utc>::UNIX_EPOCH);
}
_ => panic!("expected Branch"),
}
assert_eq!(leaf.timestamp().to_rfc3339(),
"2026-04-16T12:00:00+00:00");
}
}