graph: add implicit temporal edges between episodic nodes
Compute parent/child (session→daily→weekly→monthly) and prev/next (chronological ordering within each level) edges at graph build time from node metadata. Parse dates from keys for digest nodes (whose timestamps reflect creation, not covered date) and prefer key-parsed dates over timestamp-derived dates for sessions (timezone fix). Result: ~9185 implicit edges, communities halved, gini improved. Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
958cf9d041
commit
510f448f10
2 changed files with 209 additions and 0 deletions
|
|
@ -426,9 +426,192 @@ fn build_adjacency(store: &impl StoreView) -> (HashMap<String, Vec<Edge>>, HashS
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
add_implicit_temporal_edges(store, &keys, &mut adj);
|
||||||
|
|
||||||
(adj, keys)
|
(adj, keys)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add implicit edges for the temporal/digest hierarchy.
|
||||||
|
///
|
||||||
|
/// These edges are derived from node types and dates — they don't
|
||||||
|
/// need to be stored. Two kinds:
|
||||||
|
/// - parent/child: session→daily→weekly→monthly (by date containment)
|
||||||
|
/// - prev/next: chronological ordering within each level
|
||||||
|
///
|
||||||
|
/// Sessions use their timestamp for date. Digest nodes (daily/weekly/monthly)
|
||||||
|
/// extract the date they *cover* from the key name, since their timestamp
|
||||||
|
/// is when the digest was created, not what period it covers.
|
||||||
|
fn add_implicit_temporal_edges(
|
||||||
|
store: &impl StoreView,
|
||||||
|
keys: &HashSet<String>,
|
||||||
|
adj: &mut HashMap<String, Vec<Edge>>,
|
||||||
|
) {
|
||||||
|
use crate::store::NodeType::*;
|
||||||
|
use chrono::{Datelike, DateTime, NaiveDate};
|
||||||
|
|
||||||
|
// Extract the covered date from a key name.
|
||||||
|
// Patterns: "daily-2026-03-06", "daily-2026-03-06-identity",
|
||||||
|
// "weekly-2026-W09", "monthly-2026-02"
|
||||||
|
// "journal#j-2026-03-13t...", "journal#2026-03-13-..."
|
||||||
|
fn date_from_key(key: &str) -> Option<NaiveDate> {
|
||||||
|
// Try extracting YYYY-MM-DD after known prefixes
|
||||||
|
for prefix in ["daily-", "journal#j-", "journal#"] {
|
||||||
|
if let Some(rest) = key.strip_prefix(prefix) {
|
||||||
|
if rest.len() >= 10 {
|
||||||
|
if let Ok(d) = NaiveDate::parse_from_str(&rest[..10], "%Y-%m-%d") {
|
||||||
|
return Some(d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn week_from_key(key: &str) -> Option<(i32, u32)> {
|
||||||
|
// "weekly-2026-W09" → (2026, 9)
|
||||||
|
let rest = key.strip_prefix("weekly-")?;
|
||||||
|
let (year_str, w_str) = rest.split_once("-W")?;
|
||||||
|
let year: i32 = year_str.parse().ok()?;
|
||||||
|
// Week string might have a suffix like "-foo"
|
||||||
|
let week_str = w_str.split('-').next()?;
|
||||||
|
let week: u32 = week_str.parse().ok()?;
|
||||||
|
Some((year, week))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn month_from_key(key: &str) -> Option<(i32, u32)> {
|
||||||
|
// "monthly-2026-02" → (2026, 2)
|
||||||
|
let rest = key.strip_prefix("monthly-")?;
|
||||||
|
let (year_str, month_str) = rest.split_once('-')?;
|
||||||
|
let year: i32 = year_str.parse().ok()?;
|
||||||
|
let month_str = month_str.split('-').next()?;
|
||||||
|
let month: u32 = month_str.parse().ok()?;
|
||||||
|
Some((year, month))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect episodic nodes by type
|
||||||
|
struct Dated { key: String, ts: i64, date: NaiveDate }
|
||||||
|
|
||||||
|
let mut sessions: Vec<Dated> = Vec::new();
|
||||||
|
let mut dailies: Vec<(String, NaiveDate)> = Vec::new();
|
||||||
|
let mut weeklies: Vec<(String, (i32, u32))> = Vec::new();
|
||||||
|
let mut monthlies: Vec<(String, (i32, u32))> = Vec::new();
|
||||||
|
|
||||||
|
store.for_each_node_meta(|key, node_type, ts| {
|
||||||
|
if !keys.contains(key) { return; }
|
||||||
|
match node_type {
|
||||||
|
EpisodicSession => {
|
||||||
|
// Prefer date from key (local time) over timestamp (UTC)
|
||||||
|
// to avoid timezone mismatches
|
||||||
|
let date = date_from_key(key).or_else(|| {
|
||||||
|
DateTime::from_timestamp(ts, 0).map(|dt| dt.date_naive())
|
||||||
|
});
|
||||||
|
if let Some(date) = date {
|
||||||
|
sessions.push(Dated { key: key.to_owned(), ts, date });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EpisodicDaily => {
|
||||||
|
if let Some(date) = date_from_key(key) {
|
||||||
|
dailies.push((key.to_owned(), date));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EpisodicWeekly => {
|
||||||
|
if let Some(yw) = week_from_key(key) {
|
||||||
|
weeklies.push((key.to_owned(), yw));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EpisodicMonthly => {
|
||||||
|
if let Some(ym) = month_from_key(key) {
|
||||||
|
monthlies.push((key.to_owned(), ym));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
sessions.sort_by_key(|d| d.ts);
|
||||||
|
dailies.sort_by_key(|(_, d)| *d);
|
||||||
|
weeklies.sort_by_key(|(_, yw)| *yw);
|
||||||
|
monthlies.sort_by_key(|(_, ym)| *ym);
|
||||||
|
|
||||||
|
let add_edge = |adj: &mut HashMap<String, Vec<Edge>>, a: &str, b: &str| {
|
||||||
|
if let Some(edges) = adj.get(a) {
|
||||||
|
if edges.iter().any(|e| e.target == b) { return; }
|
||||||
|
}
|
||||||
|
adj.entry(a.to_owned()).or_default().push(Edge {
|
||||||
|
target: b.to_owned(),
|
||||||
|
strength: 1.0,
|
||||||
|
rel_type: RelationType::Auto,
|
||||||
|
});
|
||||||
|
adj.entry(b.to_owned()).or_default().push(Edge {
|
||||||
|
target: a.to_owned(),
|
||||||
|
strength: 1.0,
|
||||||
|
rel_type: RelationType::Auto,
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// Build indexes: date→dailies, (year,week)→weekly, (year,month)→monthly
|
||||||
|
// Note: multiple dailies can share a date (e.g. daily-2026-03-06-identity,
|
||||||
|
// daily-2026-03-06-technical), so we collect all of them.
|
||||||
|
let mut date_to_dailies: HashMap<NaiveDate, Vec<String>> = HashMap::new();
|
||||||
|
for (key, date) in &dailies {
|
||||||
|
date_to_dailies.entry(*date).or_default().push(key.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut yw_to_weekly: HashMap<(i32, u32), String> = HashMap::new();
|
||||||
|
for (key, yw) in &weeklies {
|
||||||
|
yw_to_weekly.insert(*yw, key.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut ym_to_monthly: HashMap<(i32, u32), String> = HashMap::new();
|
||||||
|
for (key, ym) in &monthlies {
|
||||||
|
ym_to_monthly.insert(*ym, key.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Session → Daily (parent): each session links to all dailies for its date
|
||||||
|
for sess in &sessions {
|
||||||
|
if let Some(daily_keys) = date_to_dailies.get(&sess.date) {
|
||||||
|
for daily in daily_keys {
|
||||||
|
add_edge(adj, &sess.key, daily);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Daily → Weekly (parent)
|
||||||
|
for (key, date) in &dailies {
|
||||||
|
let yw = (date.iso_week().year(), date.iso_week().week());
|
||||||
|
if let Some(weekly) = yw_to_weekly.get(&yw) {
|
||||||
|
add_edge(adj, key, weekly);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Weekly → Monthly (parent)
|
||||||
|
for (key, yw) in &weeklies {
|
||||||
|
// A week can span two months; use the Thursday date (ISO week convention)
|
||||||
|
let thursday = NaiveDate::from_isoywd_opt(yw.0, yw.1, chrono::Weekday::Thu);
|
||||||
|
if let Some(d) = thursday {
|
||||||
|
let ym = (d.year(), d.month());
|
||||||
|
if let Some(monthly) = ym_to_monthly.get(&ym) {
|
||||||
|
add_edge(adj, key, monthly);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prev/next within each level
|
||||||
|
for pair in sessions.windows(2) {
|
||||||
|
add_edge(adj, &pair[0].key, &pair[1].key);
|
||||||
|
}
|
||||||
|
for pair in dailies.windows(2) {
|
||||||
|
add_edge(adj, &pair[0].0, &pair[1].0);
|
||||||
|
}
|
||||||
|
for pair in weeklies.windows(2) {
|
||||||
|
add_edge(adj, &pair[0].0, &pair[1].0);
|
||||||
|
}
|
||||||
|
for pair in monthlies.windows(2) {
|
||||||
|
add_edge(adj, &pair[0].0, &pair[1].0);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/// Label propagation community detection.
|
/// Label propagation community detection.
|
||||||
///
|
///
|
||||||
/// Each node starts with its own label. Each iteration: adopt the most
|
/// Each node starts with its own label. Each iteration: adopt the most
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,9 @@ pub trait StoreView {
|
||||||
/// Iterate all nodes. Callback receives (key, content, weight).
|
/// Iterate all nodes. Callback receives (key, content, weight).
|
||||||
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F);
|
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F);
|
||||||
|
|
||||||
|
/// Iterate all nodes with metadata. Callback receives (key, node_type, timestamp).
|
||||||
|
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, f: F);
|
||||||
|
|
||||||
/// Iterate all relations. Callback receives (source_key, target_key, strength, rel_type).
|
/// Iterate all relations. Callback receives (source_key, target_key, strength, rel_type).
|
||||||
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F);
|
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F);
|
||||||
|
|
||||||
|
|
@ -39,6 +42,12 @@ impl StoreView for Store {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, mut f: F) {
|
||||||
|
for (key, node) in &self.nodes {
|
||||||
|
f(key, node.node_type, node.timestamp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
|
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
|
||||||
for rel in &self.relations {
|
for rel in &self.relations {
|
||||||
if rel.deleted { continue; }
|
if rel.deleted { continue; }
|
||||||
|
|
@ -110,6 +119,20 @@ impl StoreView for MmapView {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, mut f: F) {
|
||||||
|
let snap = self.snapshot();
|
||||||
|
for (key, node) in snap.nodes.iter() {
|
||||||
|
let nt = match node.node_type {
|
||||||
|
ArchivedNodeType::EpisodicSession => NodeType::EpisodicSession,
|
||||||
|
ArchivedNodeType::EpisodicDaily => NodeType::EpisodicDaily,
|
||||||
|
ArchivedNodeType::EpisodicWeekly => NodeType::EpisodicWeekly,
|
||||||
|
ArchivedNodeType::EpisodicMonthly => NodeType::EpisodicMonthly,
|
||||||
|
ArchivedNodeType::Semantic => NodeType::Semantic,
|
||||||
|
};
|
||||||
|
f(key, nt, node.timestamp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
|
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, mut f: F) {
|
||||||
let snap = self.snapshot();
|
let snap = self.snapshot();
|
||||||
for rel in snap.relations.iter() {
|
for rel in snap.relations.iter() {
|
||||||
|
|
@ -176,6 +199,9 @@ impl StoreView for AnyView {
|
||||||
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F) {
|
fn for_each_node<F: FnMut(&str, &str, f32)>(&self, f: F) {
|
||||||
match self { AnyView::Mmap(v) => v.for_each_node(f), AnyView::Owned(s) => s.for_each_node(f) }
|
match self { AnyView::Mmap(v) => v.for_each_node(f), AnyView::Owned(s) => s.for_each_node(f) }
|
||||||
}
|
}
|
||||||
|
fn for_each_node_meta<F: FnMut(&str, NodeType, i64)>(&self, f: F) {
|
||||||
|
match self { AnyView::Mmap(v) => v.for_each_node_meta(f), AnyView::Owned(s) => s.for_each_node_meta(f) }
|
||||||
|
}
|
||||||
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F) {
|
fn for_each_relation<F: FnMut(&str, &str, f32, RelationType)>(&self, f: F) {
|
||||||
match self { AnyView::Mmap(v) => v.for_each_relation(f), AnyView::Owned(s) => s.for_each_relation(f) }
|
match self { AnyView::Mmap(v) => v.for_each_relation(f), AnyView::Owned(s) => s.for_each_relation(f) }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue