graph: add implicit temporal edges between episodic nodes

Compute parent/child (session→daily→weekly→monthly) and prev/next
(chronological ordering within each level) edges at graph build time
from node metadata. Parse dates from keys for digest nodes (whose
timestamps reflect creation, not covered date) and prefer key-parsed
dates over timestamp-derived dates for sessions (timezone fix).

Result: ~9185 implicit edges, communities halved, gini improved.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-03-14 02:40:00 -04:00
parent 958cf9d041
commit 510f448f10
2 changed files with 209 additions and 0 deletions

View file

@ -426,9 +426,192 @@ fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashS
});
});
add_implicit_temporal_edges(store, &keys, &mut adj);
(adj, keys)
}
/// Add implicit edges for the temporal/digest hierarchy.
///
/// These edges are derived from node types and dates — they don't
/// need to be stored. Two kinds:
/// - parent/child: session→daily→weekly→monthly (by date containment)
/// - prev/next: chronological ordering within each level
///
/// Sessions use their timestamp for date. Digest nodes (daily/weekly/monthly)
/// extract the date they *cover* from the key name, since their timestamp
/// is when the digest was created, not what period it covers.
fn add_implicit_temporal_edges(
store: &impl StoreView,
keys: &HashSet<String>,
adj: &mut HashMap<String, Vec<Edge>>,
) {
use crate::store::NodeType::*;
use chrono::{Datelike, DateTime, NaiveDate};
// Extract the covered date from a key name.
// Patterns: "daily-2026-03-06", "daily-2026-03-06-identity",
// "weekly-2026-W09", "monthly-2026-02"
// "journal#j-2026-03-13t...", "journal#2026-03-13-..."
fn date_from_key(key: &str) -> Option<NaiveDate> {
// Try extracting YYYY-MM-DD after known prefixes
for prefix in ["daily-", "journal#j-", "journal#"] {
if let Some(rest) = key.strip_prefix(prefix) {
if rest.len() >= 10 {
if let Ok(d) = NaiveDate::parse_from_str(&rest[..10], "%Y-%m-%d") {
return Some(d);
}
}
}
}
None
}
fn week_from_key(key: &str) -> Option<(i32, u32)> {
// "weekly-2026-W09" → (2026, 9)
let rest = key.strip_prefix("weekly-")?;
let (year_str, w_str) = rest.split_once("-W")?;
let year: i32 = year_str.parse().ok()?;
// Week string might have a suffix like "-foo"
let week_str = w_str.split('-').next()?;
let week: u32 = week_str.parse().ok()?;
Some((year, week))
}
fn month_from_key(key: &str) -> Option<(i32, u32)> {
// "monthly-2026-02" → (2026, 2)
let rest = key.strip_prefix("monthly-")?;
let (year_str, month_str) = rest.split_once('-')?;
let year: i32 = year_str.parse().ok()?;
let month_str = month_str.split('-').next()?;
let month: u32 = month_str.parse().ok()?;
Some((year, month))
}
// Collect episodic nodes by type
struct Dated { key: String, ts: i64, date: NaiveDate }
let mut sessions: Vec<Dated> = Vec::new();
let mut dailies: Vec<(String, NaiveDate)> = Vec::new();
let mut weeklies: Vec<(String, (i32, u32))> = Vec::new();
let mut monthlies: Vec<(String, (i32, u32))> = Vec::new();
store.for_each_node_meta(|key, node_type, ts| {
if !keys.contains(key) { return; }
match node_type {
EpisodicSession => {
// Prefer date from key (local time) over timestamp (UTC)
// to avoid timezone mismatches
let date = date_from_key(key).or_else(|| {
DateTime::from_timestamp(ts, 0).map(|dt| dt.date_naive())
});
if let Some(date) = date {
sessions.push(Dated { key: key.to_owned(), ts, date });
}
}
EpisodicDaily => {
if let Some(date) = date_from_key(key) {
dailies.push((key.to_owned(), date));
}
}
EpisodicWeekly => {
if let Some(yw) = week_from_key(key) {
weeklies.push((key.to_owned(), yw));
}
}
EpisodicMonthly => {
if let Some(ym) = month_from_key(key) {
monthlies.push((key.to_owned(), ym));
}
}
_ => {}
}
});
sessions.sort_by_key(|d| d.ts);
dailies.sort_by_key(|(_, d)| *d);
weeklies.sort_by_key(|(_, yw)| *yw);
monthlies.sort_by_key(|(_, ym)| *ym);
let add_edge = |adj: &mut HashMap<String, Vec<Edge>>, a: &str, b: &str| {
if let Some(edges) = adj.get(a) {
if edges.iter().any(|e| e.target == b) { return; }
}
adj.entry(a.to_owned()).or_default().push(Edge {
target: b.to_owned(),
strength: 1.0,
rel_type: RelationType::Auto,
});
adj.entry(b.to_owned()).or_default().push(Edge {
target: a.to_owned(),
strength: 1.0,
rel_type: RelationType::Auto,
});
};
// Build indexes: date→dailies, (year,week)→weekly, (year,month)→monthly
// Note: multiple dailies can share a date (e.g. daily-2026-03-06-identity,
// daily-2026-03-06-technical), so we collect all of them.
let mut date_to_dailies: HashMap<NaiveDate, Vec<String>> = HashMap::new();
for (key, date) in &dailies {
date_to_dailies.entry(*date).or_default().push(key.clone());
}
let mut yw_to_weekly: HashMap<(i32, u32), String> = HashMap::new();
for (key, yw) in &weeklies {
yw_to_weekly.insert(*yw, key.clone());
}
let mut ym_to_monthly: HashMap<(i32, u32), String> = HashMap::new();
for (key, ym) in &monthlies {
ym_to_monthly.insert(*ym, key.clone());
}
// Session → Daily (parent): each session links to all dailies for its date
for sess in &sessions {
if let Some(daily_keys) = date_to_dailies.get(&sess.date) {
for daily in daily_keys {
add_edge(adj, &sess.key, daily);
}
}
}
// Daily → Weekly (parent)
for (key, date) in &dailies {
let yw = (date.iso_week().year(), date.iso_week().week());
if let Some(weekly) = yw_to_weekly.get(&yw) {
add_edge(adj, key, weekly);
}
}
// Weekly → Monthly (parent)
for (key, yw) in &weeklies {
// A week can span two months; use the Thursday date (ISO week convention)
let thursday = NaiveDate::from_isoywd_opt(yw.0, yw.1, chrono::Weekday::Thu);
if let Some(d) = thursday {
let ym = (d.year(), d.month());
if let Some(monthly) = ym_to_monthly.get(&ym) {
add_edge(adj, key, monthly);
}
}
}
// Prev/next within each level
for pair in sessions.windows(2) {
add_edge(adj, &pair[0].key, &pair[1].key);
}
for pair in dailies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
for pair in weeklies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
for pair in monthlies.windows(2) {
add_edge(adj, &pair[0].0, &pair[1].0);
}
}
/// Label propagation community detection.
///
/// Each node starts with its own label. Each iteration: adopt the most

View file

@ -19,6 +19,9 @@ pub trait StoreView {
/// Iterate all nodes. Callback receives (key, content, weight).
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F);
/// Iterate all nodes with metadata. Callback receives (key, node_type, timestamp).
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, f: F);
/// Iterate all relations. Callback receives (source_key, target_key, strength, rel_type).
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F);
@ -39,6 +42,12 @@ impl StoreView for Store {
}
}
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, mut f: F) {
for (key, node) in &self.nodes {
f(key, node.node_type, node.timestamp);
}
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
for rel in &self.relations {
if rel.deleted { continue; }
@ -110,6 +119,20 @@ impl StoreView for MmapView {
}
}
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, mut f: F) {
let snap = self.snapshot();
for (key, node) in snap.nodes.iter() {
let nt = match node.node_type {
ArchivedNodeType::EpisodicSession => NodeType::EpisodicSession,
ArchivedNodeType::EpisodicDaily => NodeType::EpisodicDaily,
ArchivedNodeType::EpisodicWeekly => NodeType::EpisodicWeekly,
ArchivedNodeType::EpisodicMonthly => NodeType::EpisodicMonthly,
ArchivedNodeType::Semantic => NodeType::Semantic,
};
f(key, nt, node.timestamp);
}
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
let snap = self.snapshot();
for rel in snap.relations.iter() {
@ -176,6 +199,9 @@ impl StoreView for AnyView {
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F) {
match self { AnyView::Mmap(v) => v.for_each_node(f), AnyView::Owned(s) => s.for_each_node(f) }
}
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, f: F) {
match self { AnyView::Mmap(v) => v.for_each_node_meta(f), AnyView::Owned(s) => s.for_each_node_meta(f) }
}
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F) {
match self { AnyView::Mmap(v) => v.for_each_relation(f), AnyView::Owned(s) => s.for_each_relation(f) }
}