2026-03-03 17:18:18 -05:00
|
|
|
// Journal enrichment and experience mining
|
|
|
|
|
//
|
|
|
|
|
// Two modes of processing conversation transcripts:
|
|
|
|
|
// journal_enrich — enrich a specific journal entry with source location and links
|
|
|
|
|
// experience_mine — retroactively find experiential moments not yet journaled
|
|
|
|
|
//
|
|
|
|
|
// Both extract conversation from JSONL transcripts, build prompts, call Sonnet,
|
|
|
|
|
// and apply results to the store.
|
|
|
|
|
|
move LLM-dependent modules into agents/ subdir
Separate the agent layer (everything that calls external LLMs or
orchestrates sequences of such calls) from core graph infrastructure.
agents/: llm, prompts, audit, consolidate, knowledge, enrich,
fact_mine, digest, daemon
Root: store/, graph, spectral, search, similarity, lookups, query,
config, util, migrate, neuro/ (scoring + rewrite)
Re-exports at crate root preserve backwards compatibility so
`crate::llm`, `crate::digest` etc. continue to work.
2026-03-08 21:27:41 -04:00
|
|
|
use super::llm::{call_sonnet, parse_json_response, semantic_keys};
|
2026-03-03 17:18:18 -05:00
|
|
|
use crate::neuro;
|
|
|
|
|
use crate::store::{self, Store, new_node, new_relation};
|
|
|
|
|
|
|
|
|
|
use std::collections::hash_map::DefaultHasher;
|
2026-03-05 15:30:57 -05:00
|
|
|
use std::collections::HashSet;
|
2026-03-03 17:18:18 -05:00
|
|
|
use std::fs;
|
|
|
|
|
use std::hash::{Hash, Hasher};
|
|
|
|
|
|
2026-03-05 15:30:57 -05:00
|
|
|
use crate::store::StoreView;
|
|
|
|
|
|
2026-03-08 21:22:05 -04:00
|
|
|
use crate::util::parse_timestamp_to_epoch;
|
2026-03-06 22:09:44 -05:00
|
|
|
|
2026-03-05 15:30:57 -05:00
|
|
|
/// Compute the store dedup key for a transcript file.
|
|
|
|
|
/// This is the same key experience_mine uses to mark a transcript as mined.
|
2026-03-08 21:36:47 -04:00
|
|
|
fn transcript_dedup_key(path: &str) -> Result<String, String> {
|
2026-03-05 15:30:57 -05:00
|
|
|
let bytes = fs::read(path).map_err(|e| format!("read {}: {}", path, e))?;
|
|
|
|
|
let mut hasher = DefaultHasher::new();
|
|
|
|
|
bytes.hash(&mut hasher);
|
2026-03-08 19:41:26 -04:00
|
|
|
Ok(format!("_mined-transcripts#h-{:016x}", hasher.finish()))
|
2026-03-05 15:30:57 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Check if a transcript has already been mined (dedup key exists in store).
|
|
|
|
|
pub fn is_transcript_mined(store: &impl StoreView, path: &str) -> bool {
|
|
|
|
|
match transcript_dedup_key(path) {
|
|
|
|
|
Ok(key) => store.node_content(&key).is_some(),
|
|
|
|
|
Err(_) => false,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Dedup key for a transcript based on its filename (UUID).
|
|
|
|
|
/// Used by the daemon reconcile loop — no file reads needed.
|
|
|
|
|
pub fn transcript_filename_key(path: &str) -> String {
|
|
|
|
|
let filename = std::path::Path::new(path)
|
|
|
|
|
.file_stem()
|
|
|
|
|
.map(|s| s.to_string_lossy().to_string())
|
|
|
|
|
.unwrap_or_else(|| path.to_string());
|
2026-03-08 19:41:26 -04:00
|
|
|
format!("_mined-transcripts#f-{}", filename)
|
2026-03-05 15:30:57 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Get the set of all mined transcript keys (both content-hash and filename)
|
|
|
|
|
/// from the store. Load once per daemon tick, check many.
|
|
|
|
|
pub fn mined_transcript_keys() -> HashSet<String> {
|
|
|
|
|
use crate::store::AnyView;
|
|
|
|
|
let Ok(view) = AnyView::load() else { return HashSet::new() };
|
|
|
|
|
let mut keys = HashSet::new();
|
|
|
|
|
view.for_each_node(|key, _, _| {
|
2026-03-08 19:41:26 -04:00
|
|
|
if key.starts_with("_mined-transcripts#") {
|
2026-03-05 15:30:57 -05:00
|
|
|
keys.insert(key.to_string());
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
keys
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Check if a transcript has been mined, given a pre-loaded set of mined keys.
|
|
|
|
|
/// Checks filename-based key only (no file read). Sessions mined before the
|
|
|
|
|
/// filename key was added will pass through and short-circuit in experience_mine
|
|
|
|
|
/// via the content hash check — a one-time cost on first restart after this change.
|
|
|
|
|
pub fn is_transcript_mined_with_keys(mined: &HashSet<String>, path: &str) -> bool {
|
|
|
|
|
mined.contains(&transcript_filename_key(path))
|
|
|
|
|
}
|
2026-03-03 17:18:18 -05:00
|
|
|
|
|
|
|
|
/// Extract user/assistant messages with line numbers from a JSONL transcript.
|
|
|
|
|
/// (line_number, role, text, timestamp)
|
2026-03-07 12:01:38 -05:00
|
|
|
pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
|
2026-03-08 21:42:53 -04:00
|
|
|
let path = std::path::Path::new(jsonl_path);
|
|
|
|
|
let messages = super::transcript::parse_transcript(path)?;
|
|
|
|
|
Ok(messages.into_iter()
|
|
|
|
|
.map(|m| (m.line, m.role, m.text, m.timestamp))
|
|
|
|
|
.collect())
|
2026-03-03 17:18:18 -05:00
|
|
|
}
|
|
|
|
|
|
2026-03-07 12:01:38 -05:00
|
|
|
pub const COMPACTION_MARKER: &str = "This session is being continued from a previous conversation that ran out of context";
|
|
|
|
|
|
|
|
|
|
/// Split extracted messages into segments at compaction boundaries.
|
|
|
|
|
/// Each segment represents one continuous conversation before context was compacted.
|
|
|
|
|
pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Vec<Vec<(usize, String, String, String)>> {
|
|
|
|
|
let mut segments: Vec<Vec<(usize, String, String, String)>> = Vec::new();
|
|
|
|
|
let mut current = Vec::new();
|
|
|
|
|
|
|
|
|
|
for msg in messages {
|
|
|
|
|
if msg.1 == "user" && msg.2.starts_with(COMPACTION_MARKER) {
|
|
|
|
|
if !current.is_empty() {
|
|
|
|
|
segments.push(current);
|
|
|
|
|
current = Vec::new();
|
|
|
|
|
}
|
|
|
|
|
// The continuation message itself is part of the new segment
|
|
|
|
|
current.push(msg);
|
|
|
|
|
} else {
|
|
|
|
|
current.push(msg);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if !current.is_empty() {
|
|
|
|
|
segments.push(current);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
segments
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-03 17:18:18 -05:00
|
|
|
/// Format conversation messages for the prompt (truncating long messages).
|
|
|
|
|
fn format_conversation(messages: &[(usize, String, String, String)]) -> String {
|
|
|
|
|
messages.iter()
|
|
|
|
|
.map(|(line, role, text, ts)| {
|
2026-03-08 21:13:02 -04:00
|
|
|
let text = crate::util::truncate(text, 1800, "...[truncated]");
|
2026-03-03 17:18:18 -05:00
|
|
|
if ts.is_empty() {
|
|
|
|
|
format!("L{} [{}]: {}", line, role, text)
|
|
|
|
|
} else {
|
|
|
|
|
format!("L{} [{}] {}: {}", line, role, &ts[..ts.len().min(19)], text)
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
.join("\n\n")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn build_journal_prompt(
|
|
|
|
|
entry_text: &str,
|
|
|
|
|
conversation: &str,
|
|
|
|
|
keys: &[String],
|
|
|
|
|
grep_line: usize,
|
|
|
|
|
) -> Result<String, String> {
|
|
|
|
|
let keys_text: String = keys.iter()
|
|
|
|
|
.map(|k| format!(" - {}", k))
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
.join("\n");
|
|
|
|
|
|
move LLM-dependent modules into agents/ subdir
Separate the agent layer (everything that calls external LLMs or
orchestrates sequences of such calls) from core graph infrastructure.
agents/: llm, prompts, audit, consolidate, knowledge, enrich,
fact_mine, digest, daemon
Root: store/, graph, spectral, search, similarity, lookups, query,
config, util, migrate, neuro/ (scoring + rewrite)
Re-exports at crate root preserve backwards compatibility so
`crate::llm`, `crate::digest` etc. continue to work.
2026-03-08 21:27:41 -04:00
|
|
|
super::prompts::load_prompt("journal-enrich", &[
|
2026-03-03 17:18:18 -05:00
|
|
|
("{{GREP_LINE}}", &grep_line.to_string()),
|
|
|
|
|
("{{ENTRY_TEXT}}", entry_text),
|
|
|
|
|
("{{KEYS}}", &keys_text),
|
|
|
|
|
("{{CONVERSATION}}", conversation),
|
|
|
|
|
])
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Enrich a journal entry with conversation context and link proposals.
|
|
|
|
|
pub fn journal_enrich(
|
|
|
|
|
store: &mut Store,
|
|
|
|
|
jsonl_path: &str,
|
|
|
|
|
entry_text: &str,
|
|
|
|
|
grep_line: usize,
|
|
|
|
|
) -> Result<(), String> {
|
|
|
|
|
println!("Extracting conversation from {}...", jsonl_path);
|
|
|
|
|
let messages = extract_conversation(jsonl_path)?;
|
|
|
|
|
let conversation = format_conversation(&messages);
|
|
|
|
|
println!(" {} messages, {} chars", messages.len(), conversation.len());
|
|
|
|
|
|
|
|
|
|
let keys = semantic_keys(store);
|
|
|
|
|
println!(" {} semantic keys", keys.len());
|
|
|
|
|
|
|
|
|
|
let prompt = build_journal_prompt(entry_text, &conversation, &keys, grep_line)?;
|
|
|
|
|
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
|
|
|
|
|
|
|
|
|
|
println!(" Calling Sonnet...");
|
llm: full per-agent usage logging with prompts and responses
Log every model call to ~/.claude/memory/llm-logs/YYYY-MM-DD.md with
full prompt, response, agent type, model, duration, and status. One
file per day, markdown formatted for easy reading.
Agent types: fact-mine, experience-mine, consolidate, knowledge,
digest, enrich, audit. This gives visibility into what each agent
is doing and whether to adjust prompts or frequency.
2026-03-05 22:52:08 -05:00
|
|
|
let response = call_sonnet("enrich", &prompt)?;
|
2026-03-03 17:18:18 -05:00
|
|
|
|
|
|
|
|
let result = parse_json_response(&response)?;
|
|
|
|
|
|
|
|
|
|
// Report results
|
|
|
|
|
let source_start = result.get("source_start").and_then(|v| v.as_u64()).unwrap_or(0);
|
|
|
|
|
let source_end = result.get("source_end").and_then(|v| v.as_u64()).unwrap_or(0);
|
|
|
|
|
let links = result.get("links").and_then(|v| v.as_array());
|
|
|
|
|
let insights = result.get("missed_insights").and_then(|v| v.as_array());
|
|
|
|
|
|
|
|
|
|
println!(" Source: L{}-L{}", source_start, source_end);
|
|
|
|
|
println!(" Links: {}", links.map_or(0, |l| l.len()));
|
|
|
|
|
println!(" Missed insights: {}", insights.map_or(0, |l| l.len()));
|
|
|
|
|
|
|
|
|
|
// Apply links
|
|
|
|
|
if let Some(links) = links {
|
|
|
|
|
for link in links {
|
|
|
|
|
let target = link.get("target").and_then(|v| v.as_str()).unwrap_or("");
|
|
|
|
|
let reason = link.get("reason").and_then(|v| v.as_str()).unwrap_or("");
|
|
|
|
|
if target.is_empty() || target.starts_with("NOTE:") {
|
|
|
|
|
if let Some(note) = target.strip_prefix("NOTE:") {
|
|
|
|
|
println!(" NOTE: {} — {}", note, reason);
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Resolve target and find journal node
|
|
|
|
|
let resolved = match store.resolve_key(target) {
|
|
|
|
|
Ok(r) => r,
|
|
|
|
|
Err(_) => { println!(" SKIP {} (not in graph)", target); continue; }
|
|
|
|
|
};
|
|
|
|
|
let source_key = match store.find_journal_node(entry_text) {
|
|
|
|
|
Some(k) => k,
|
|
|
|
|
None => { println!(" SKIP {} (no matching journal node)", target); continue; }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Refine target to best-matching section
|
|
|
|
|
let source_content = store.nodes.get(&source_key)
|
|
|
|
|
.map(|n| n.content.as_str()).unwrap_or("");
|
|
|
|
|
let resolved = neuro::refine_target(store, source_content, &resolved);
|
|
|
|
|
|
|
|
|
|
let source_uuid = match store.nodes.get(&source_key) {
|
|
|
|
|
Some(n) => n.uuid,
|
|
|
|
|
None => continue,
|
|
|
|
|
};
|
|
|
|
|
let target_uuid = match store.nodes.get(&resolved) {
|
|
|
|
|
Some(n) => n.uuid,
|
|
|
|
|
None => continue,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let rel = new_relation(
|
|
|
|
|
source_uuid, target_uuid,
|
|
|
|
|
store::RelationType::Link,
|
|
|
|
|
0.5,
|
|
|
|
|
&source_key, &resolved,
|
|
|
|
|
);
|
|
|
|
|
if store.add_relation(rel).is_ok() {
|
|
|
|
|
println!(" LINK {} → {} ({})", source_key, resolved, reason);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
store.save()?;
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Mine a conversation transcript for experiential moments not yet journaled.
|
2026-03-07 12:01:38 -05:00
|
|
|
/// If `segment` is Some, only process that compaction segment of the file.
|
2026-03-03 17:18:18 -05:00
|
|
|
pub fn experience_mine(
|
|
|
|
|
store: &mut Store,
|
|
|
|
|
jsonl_path: &str,
|
2026-03-07 12:01:38 -05:00
|
|
|
segment: Option<usize>,
|
2026-03-03 17:18:18 -05:00
|
|
|
) -> Result<usize, String> {
|
|
|
|
|
println!("Experience mining: {}", jsonl_path);
|
|
|
|
|
|
|
|
|
|
// Transcript-level dedup: hash the file content and check if already mined
|
|
|
|
|
let transcript_bytes = fs::read(jsonl_path)
|
|
|
|
|
.map_err(|e| format!("reading transcript: {}", e))?;
|
|
|
|
|
let mut hasher = DefaultHasher::new();
|
|
|
|
|
transcript_bytes.hash(&mut hasher);
|
|
|
|
|
let hash = hasher.finish();
|
2026-03-08 19:41:26 -04:00
|
|
|
let dedup_key = format!("_mined-transcripts#h-{:016x}", hash);
|
2026-03-03 17:18:18 -05:00
|
|
|
|
|
|
|
|
if store.nodes.contains_key(&dedup_key) {
|
2026-03-06 23:43:34 -05:00
|
|
|
// Backfill filename key if missing (transcripts mined before this key existed)
|
|
|
|
|
let fname_key = transcript_filename_key(jsonl_path);
|
|
|
|
|
if !store.nodes.contains_key(&fname_key) {
|
|
|
|
|
let mut node = new_node(&fname_key, &format!("Backfilled from {}", dedup_key));
|
|
|
|
|
node.provenance = store::Provenance::AgentExperienceMine;
|
|
|
|
|
let _ = store.upsert_node(node);
|
|
|
|
|
store.save()?;
|
|
|
|
|
}
|
2026-03-03 17:18:18 -05:00
|
|
|
println!(" Already mined this transcript ({}), skipping.", &dedup_key[24..]);
|
|
|
|
|
return Ok(0);
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-07 12:01:38 -05:00
|
|
|
let all_messages = extract_conversation(jsonl_path)?;
|
|
|
|
|
|
|
|
|
|
// If segment is specified, extract just that segment; otherwise process all messages
|
|
|
|
|
let messages = match segment {
|
|
|
|
|
Some(idx) => {
|
|
|
|
|
let segments = split_on_compaction(all_messages);
|
|
|
|
|
segments.into_iter().nth(idx)
|
|
|
|
|
.ok_or_else(|| format!("segment {} out of range", idx))?
|
|
|
|
|
}
|
|
|
|
|
None => all_messages,
|
|
|
|
|
};
|
|
|
|
|
|
2026-03-03 17:18:18 -05:00
|
|
|
let conversation = format_conversation(&messages);
|
|
|
|
|
println!(" {} messages, {} chars", messages.len(), conversation.len());
|
|
|
|
|
|
2026-03-08 20:25:09 -04:00
|
|
|
// Load core identity nodes for context
|
|
|
|
|
let cfg = crate::config::get();
|
|
|
|
|
let identity: String = cfg.core_nodes.iter()
|
|
|
|
|
.filter_map(|k| store.nodes.get(k).map(|n| n.content.as_str()))
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
.join("\n\n");
|
2026-03-03 17:18:18 -05:00
|
|
|
|
query by NodeType instead of key prefix
Replace key prefix matching (journal#j-, daily-, weekly-, monthly-)
with NodeType filters (EpisodicSession, EpisodicDaily, EpisodicWeekly,
EpisodicMonthly) for all queries: journal-tail, digest gathering,
digest auto-detection, experience mining dedup, and find_journal_node.
Add EpisodicMonthly to NodeType enum and capnp schema.
Key naming conventions (journal#j-TIMESTAMP-slug, daily-DATE, etc.)
are retained for key generation — the fix is about how we find nodes,
not how we name them.
Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
2026-03-08 20:14:37 -04:00
|
|
|
// Get recent episodic entries to avoid duplication
|
2026-03-03 17:18:18 -05:00
|
|
|
let mut journal: Vec<_> = store.nodes.values()
|
query by NodeType instead of key prefix
Replace key prefix matching (journal#j-, daily-, weekly-, monthly-)
with NodeType filters (EpisodicSession, EpisodicDaily, EpisodicWeekly,
EpisodicMonthly) for all queries: journal-tail, digest gathering,
digest auto-detection, experience mining dedup, and find_journal_node.
Add EpisodicMonthly to NodeType enum and capnp schema.
Key naming conventions (journal#j-TIMESTAMP-slug, daily-DATE, etc.)
are retained for key generation — the fix is about how we find nodes,
not how we name them.
Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
2026-03-08 20:14:37 -04:00
|
|
|
.filter(|node| matches!(node.node_type, store::NodeType::EpisodicSession))
|
2026-03-03 17:18:18 -05:00
|
|
|
.collect();
|
query by NodeType instead of key prefix
Replace key prefix matching (journal#j-, daily-, weekly-, monthly-)
with NodeType filters (EpisodicSession, EpisodicDaily, EpisodicWeekly,
EpisodicMonthly) for all queries: journal-tail, digest gathering,
digest auto-detection, experience mining dedup, and find_journal_node.
Add EpisodicMonthly to NodeType enum and capnp schema.
Key naming conventions (journal#j-TIMESTAMP-slug, daily-DATE, etc.)
are retained for key generation — the fix is about how we find nodes,
not how we name them.
Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
2026-03-08 20:14:37 -04:00
|
|
|
journal.sort_by_key(|n| n.timestamp);
|
2026-03-03 17:18:18 -05:00
|
|
|
let recent: String = journal.iter().rev().take(10)
|
|
|
|
|
.map(|n| format!("---\n{}\n", n.content))
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
let keys = semantic_keys(store);
|
|
|
|
|
let keys_text: String = keys.iter()
|
|
|
|
|
.map(|k| format!(" - {}", k))
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
.join("\n");
|
|
|
|
|
|
move LLM-dependent modules into agents/ subdir
Separate the agent layer (everything that calls external LLMs or
orchestrates sequences of such calls) from core graph infrastructure.
agents/: llm, prompts, audit, consolidate, knowledge, enrich,
fact_mine, digest, daemon
Root: store/, graph, spectral, search, similarity, lookups, query,
config, util, migrate, neuro/ (scoring + rewrite)
Re-exports at crate root preserve backwards compatibility so
`crate::llm`, `crate::digest` etc. continue to work.
2026-03-08 21:27:41 -04:00
|
|
|
let prompt = super::prompts::load_prompt("experience", &[
|
2026-03-03 17:18:18 -05:00
|
|
|
("{{IDENTITY}}", &identity),
|
|
|
|
|
("{{RECENT_JOURNAL}}", &recent),
|
|
|
|
|
("{{KEYS}}", &keys_text),
|
|
|
|
|
("{{CONVERSATION}}", &conversation),
|
|
|
|
|
])?;
|
2026-03-07 12:01:38 -05:00
|
|
|
let est_tokens = prompt.len() / 4;
|
|
|
|
|
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), est_tokens);
|
|
|
|
|
|
|
|
|
|
if est_tokens > 150_000 {
|
|
|
|
|
println!(" Skipping: prompt too large ({} tokens > 150k limit)", est_tokens);
|
|
|
|
|
return Ok(0);
|
|
|
|
|
}
|
2026-03-03 17:18:18 -05:00
|
|
|
|
|
|
|
|
println!(" Calling Sonnet...");
|
llm: full per-agent usage logging with prompts and responses
Log every model call to ~/.claude/memory/llm-logs/YYYY-MM-DD.md with
full prompt, response, agent type, model, duration, and status. One
file per day, markdown formatted for easy reading.
Agent types: fact-mine, experience-mine, consolidate, knowledge,
digest, enrich, audit. This gives visibility into what each agent
is doing and whether to adjust prompts or frequency.
2026-03-05 22:52:08 -05:00
|
|
|
let response = call_sonnet("experience-mine", &prompt)?;
|
2026-03-03 17:18:18 -05:00
|
|
|
|
|
|
|
|
let entries = parse_json_response(&response)?;
|
|
|
|
|
let entries = match entries.as_array() {
|
|
|
|
|
Some(arr) => arr.clone(),
|
|
|
|
|
None => return Err("expected JSON array".to_string()),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if entries.is_empty() {
|
|
|
|
|
println!(" No missed experiences found.");
|
2026-03-07 00:09:35 -05:00
|
|
|
} else {
|
|
|
|
|
println!(" Found {} experiential moments:", entries.len());
|
2026-03-03 17:18:18 -05:00
|
|
|
}
|
|
|
|
|
let mut count = 0;
|
|
|
|
|
for entry in &entries {
|
|
|
|
|
let ts = entry.get("timestamp").and_then(|v| v.as_str()).unwrap_or("");
|
|
|
|
|
let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
|
|
|
|
|
if content.is_empty() { continue; }
|
|
|
|
|
|
|
|
|
|
// Format with timestamp header
|
|
|
|
|
let full_content = if ts.is_empty() {
|
|
|
|
|
content.to_string()
|
|
|
|
|
} else {
|
|
|
|
|
format!("## {}\n\n{}", ts, content)
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Generate key from timestamp
|
|
|
|
|
let key_slug: String = content.chars()
|
|
|
|
|
.filter(|c| c.is_alphanumeric() || *c == ' ')
|
|
|
|
|
.take(50)
|
|
|
|
|
.collect::<String>()
|
|
|
|
|
.trim()
|
|
|
|
|
.to_lowercase()
|
|
|
|
|
.replace(' ', "-");
|
|
|
|
|
let key = if ts.is_empty() {
|
2026-03-08 19:41:26 -04:00
|
|
|
format!("journal#j-mined-{}", key_slug)
|
2026-03-03 17:18:18 -05:00
|
|
|
} else {
|
2026-03-08 19:41:26 -04:00
|
|
|
format!("journal#j-{}-{}", ts.to_lowercase().replace(':', "-"), key_slug)
|
2026-03-03 17:18:18 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Check for duplicate
|
|
|
|
|
if store.nodes.contains_key(&key) {
|
|
|
|
|
println!(" SKIP {} (duplicate)", key);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-06 22:09:44 -05:00
|
|
|
// Write to store — use event timestamp, not mining time
|
2026-03-03 17:18:18 -05:00
|
|
|
let mut node = new_node(&key, &full_content);
|
|
|
|
|
node.node_type = store::NodeType::EpisodicSession;
|
2026-03-05 15:30:57 -05:00
|
|
|
node.provenance = store::Provenance::AgentExperienceMine;
|
2026-03-06 22:09:44 -05:00
|
|
|
if !ts.is_empty() {
|
|
|
|
|
if let Some(epoch) = parse_timestamp_to_epoch(ts) {
|
|
|
|
|
node.created_at = epoch;
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-03-03 17:18:18 -05:00
|
|
|
let _ = store.upsert_node(node);
|
|
|
|
|
count += 1;
|
|
|
|
|
|
2026-03-08 21:13:02 -04:00
|
|
|
let preview = crate::util::truncate(content, 77, "...");
|
|
|
|
|
println!(" + [{}] {}", ts, preview);
|
2026-03-03 17:18:18 -05:00
|
|
|
}
|
|
|
|
|
|
2026-03-07 12:01:38 -05:00
|
|
|
// Record this transcript/segment as mined (even if count == 0, to prevent re-runs)
|
|
|
|
|
let fname_key = match segment {
|
|
|
|
|
Some(idx) => format!("{}.{}", transcript_filename_key(jsonl_path), idx),
|
|
|
|
|
None => transcript_filename_key(jsonl_path),
|
|
|
|
|
};
|
2026-03-03 17:18:18 -05:00
|
|
|
let dedup_content = format!("Mined {} ({} entries)", jsonl_path, count);
|
2026-03-05 15:30:57 -05:00
|
|
|
let mut fname_node = new_node(&fname_key, &dedup_content);
|
|
|
|
|
fname_node.provenance = store::Provenance::AgentExperienceMine;
|
|
|
|
|
let _ = store.upsert_node(fname_node);
|
|
|
|
|
|
2026-03-07 12:01:38 -05:00
|
|
|
// For unsegmented calls, also write the content-hash key for backwards compat
|
|
|
|
|
if segment.is_none() {
|
|
|
|
|
let mut dedup_node = new_node(&dedup_key, &dedup_content);
|
|
|
|
|
dedup_node.provenance = store::Provenance::AgentExperienceMine;
|
|
|
|
|
let _ = store.upsert_node(dedup_node);
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-03 17:18:18 -05:00
|
|
|
if count > 0 {
|
|
|
|
|
println!(" Saved {} new journal entries.", count);
|
|
|
|
|
}
|
|
|
|
|
store.save()?;
|
|
|
|
|
println!("Done: {} new entries mined.", count);
|
|
|
|
|
Ok(count)
|
|
|
|
|
}
|