diff --git a/src/lookups.rs b/src/lookups.rs new file mode 100644 index 0000000..fb0a522 --- /dev/null +++ b/src/lookups.rs @@ -0,0 +1,204 @@ +// Daily lookup counters — mmap'd open-addressing hash table. +// +// Records which memory keys are retrieved each day. The knowledge loop +// uses this to focus extraction on actively-used graph neighborhoods, +// like hippocampal replay preferentially consolidating recent experience. +// +// Format: 16-byte header + 4096 entries of (u64 hash, u32 count). +// Total: ~49KB per day. Fast path: hash key → probe → bump counter. +// No store loading required. + +use std::fs; +use std::path::PathBuf; + +use crate::util::memory_subdir; + +const MAGIC: [u8; 4] = *b"LKUP"; +const VERSION: u32 = 1; +const CAPACITY: u32 = 4096; +const HEADER_SIZE: usize = 16; +const ENTRY_SIZE: usize = 12; // u64 hash + u32 count +const FILE_SIZE: usize = HEADER_SIZE + CAPACITY as usize * ENTRY_SIZE; + +// FNV-1a hash — simple, fast, no dependencies +fn fnv1a(key: &str) -> u64 { + let mut h: u64 = 0xcbf29ce484222325; + for b in key.as_bytes() { + h ^= *b as u64; + h = h.wrapping_mul(0x100000001b3); + } + h +} + +fn daily_path(date: &str) -> Result { + let dir = memory_subdir("lookups")?; + Ok(dir.join(format!("{}.bin", date))) +} + +fn today() -> String { + chrono::Local::now().format("%Y-%m-%d").to_string() +} + +/// Read or create the daily file, returning its contents as a mutable Vec. +fn load_file(date: &str) -> Result, String> { + let path = daily_path(date)?; + + if path.exists() { + let data = fs::read(&path) + .map_err(|e| format!("read {}: {}", path.display(), e))?; + if data.len() == FILE_SIZE && data[0..4] == MAGIC { + return Ok(data); + } + // Corrupt or wrong size — reinitialize + } + + // Create fresh file + let mut buf = vec![0u8; FILE_SIZE]; + buf[0..4].copy_from_slice(&MAGIC); + buf[4..8].copy_from_slice(&VERSION.to_le_bytes()); + buf[8..12].copy_from_slice(&CAPACITY.to_le_bytes()); + // count = 0 (already zero) + Ok(buf) +} + +fn write_file(date: &str, data: &[u8]) -> Result<(), String> { + let path = daily_path(date)?; + fs::write(&path, data) + .map_err(|e| format!("write {}: {}", path.display(), e)) +} + +fn entry_offset(slot: usize) -> usize { + HEADER_SIZE + slot * ENTRY_SIZE +} + +fn read_entry(data: &[u8], slot: usize) -> (u64, u32) { + let off = entry_offset(slot); + let hash = u64::from_le_bytes(data[off..off + 8].try_into().unwrap()); + let count = u32::from_le_bytes(data[off + 8..off + 12].try_into().unwrap()); + (hash, count) +} + +fn write_entry(data: &mut [u8], slot: usize, hash: u64, count: u32) { + let off = entry_offset(slot); + data[off..off + 8].copy_from_slice(&hash.to_le_bytes()); + data[off + 8..off + 12].copy_from_slice(&count.to_le_bytes()); +} + +fn read_count(data: &[u8]) -> u32 { + u32::from_le_bytes(data[12..16].try_into().unwrap()) +} + +fn write_count(data: &mut [u8], count: u32) { + data[12..16].copy_from_slice(&count.to_le_bytes()); +} + +/// Bump the counter for a key. Fast path — no store needed. +pub fn bump(key: &str) -> Result<(), String> { + let date = today(); + let mut data = load_file(&date)?; + let hash = fnv1a(key); + let cap = CAPACITY as usize; + + let mut slot = (hash as usize) % cap; + for _ in 0..cap { + let (h, c) = read_entry(&data, slot); + if h == hash { + // Existing entry — bump + write_entry(&mut data, slot, hash, c + 1); + write_file(&date, &data)?; + return Ok(()); + } + if h == 0 && c == 0 { + // Empty slot — new entry + write_entry(&mut data, slot, hash, 1); + let c = read_count(&data); + write_count(&mut data, c + 1); + write_file(&date, &data)?; + return Ok(()); + } + slot = (slot + 1) % cap; + } + + // Table full (shouldn't happen with 4096 slots) + Err("lookup table full".into()) +} + +/// Bump counters for multiple keys at once (single file read/write). +pub fn bump_many(keys: &[&str]) -> Result<(), String> { + if keys.is_empty() { return Ok(()); } + + let date = today(); + let mut data = load_file(&date)?; + let cap = CAPACITY as usize; + let mut used = read_count(&data); + + for key in keys { + let hash = fnv1a(key); + let mut slot = (hash as usize) % cap; + let mut found = false; + + for _ in 0..cap { + let (h, c) = read_entry(&data, slot); + if h == hash { + write_entry(&mut data, slot, hash, c + 1); + found = true; + break; + } + if h == 0 && c == 0 { + write_entry(&mut data, slot, hash, 1); + used += 1; + found = true; + break; + } + slot = (slot + 1) % cap; + } + if !found { + // Table full — stop, don't lose what we have + break; + } + } + + write_count(&mut data, used); + write_file(&date, &data) +} + +/// Dump all lookups for a date as (hash, count) pairs, sorted by count descending. +pub fn dump_raw(date: &str) -> Result, String> { + let data = load_file(date)?; + let mut entries = Vec::new(); + + for slot in 0..CAPACITY as usize { + let (hash, count) = read_entry(&data, slot); + if hash != 0 || count != 0 { + entries.push((hash, count)); + } + } + + entries.sort_by(|a, b| b.1.cmp(&a.1)); + Ok(entries) +} + +/// Resolve hashes to keys by scanning the store. Returns (key, count) pairs. +pub fn dump_resolved(date: &str, keys: &[String]) -> Result, String> { + let raw = dump_raw(date)?; + + // Build hash → key map from known keys + let hash_map: std::collections::HashMap = keys.iter() + .map(|k| (fnv1a(k), k.as_str())) + .collect(); + + let mut resolved = Vec::new(); + for (hash, count) in raw { + let key = hash_map.get(&hash) + .map(|k| k.to_string()) + .unwrap_or_else(|| format!("#{:016x}", hash)); + resolved.push((key, count)); + } + + Ok(resolved) +} + +/// Hash a key (exposed for testing/external use). +pub fn hash_key(key: &str) -> u64 { + fnv1a(key) +} diff --git a/src/main.rs b/src/main.rs index 56903fc..fada520 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,6 +27,7 @@ mod migrate; mod neuro; mod query; mod spectral; +mod lookups; pub mod memory_capnp { include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs")); @@ -125,6 +126,8 @@ fn main() { "journal-write" => cmd_journal_write(&args[2..]), "journal-tail" => cmd_journal_tail(&args[2..]), "query" => cmd_query(&args[2..]), + "lookup-bump" => cmd_lookup_bump(&args[2..]), + "lookups" => cmd_lookups(&args[2..]), _ => { eprintln!("Unknown command: {}", args[1]); usage(); @@ -202,7 +205,9 @@ Commands: journal-tail [N] [--full] Show last N journal entries (default 20, --full for content) query 'EXPR | stages' Query the memory graph Stages: sort F [asc], limit N, select F,F, count - Ex: \"degree > 15 | sort degree | limit 10\""); + Ex: \"degree > 15 | sort degree | limit 10\" + lookup-bump KEY [KEY...] Bump daily lookup counter for keys (fast, no store) + lookups [DATE] Show daily lookup counts (default: today)"); } fn cmd_search(args: &[String]) -> Result<(), String> { @@ -225,6 +230,10 @@ fn cmd_search(args: &[String]) -> Result<(), String> { store::Store::log_retrieval_static(&query, &results.iter().map(|r| r.key.clone()).collect::>()); + // Bump daily lookup counters (fast path, no store needed) + let bump_keys: Vec<&str> = results.iter().take(15).map(|r| r.key.as_str()).collect(); + let _ = lookups::bump_many(&bump_keys); + // Show text results let text_keys: std::collections::HashSet = results.iter() .take(15).map(|r| r.key.clone()).collect(); @@ -1593,3 +1602,37 @@ Pipe stages:\n \ let graph = store.build_graph(); query::run_query(&store, &graph, &query_str) } + +fn cmd_lookup_bump(args: &[String]) -> Result<(), String> { + if args.is_empty() { + return Err("Usage: poc-memory lookup-bump KEY [KEY...]".into()); + } + let keys: Vec<&str> = args.iter().map(|s| s.as_str()).collect(); + lookups::bump_many(&keys) +} + +fn cmd_lookups(args: &[String]) -> Result<(), String> { + let date = if args.is_empty() { + chrono::Local::now().format("%Y-%m-%d").to_string() + } else { + args[0].clone() + }; + + let store = store::Store::load()?; + let keys: Vec = store.nodes.values().map(|n| n.key.clone()).collect(); + let resolved = lookups::dump_resolved(&date, &keys)?; + + if resolved.is_empty() { + println!("No lookups for {}", date); + return Ok(()); + } + + println!("Lookups for {}:", date); + for (key, count) in &resolved { + println!(" {:4} {}", count, key); + } + println!("\n{} distinct keys, {} total lookups", + resolved.len(), + resolved.iter().map(|(_, c)| *c as u64).sum::()); + Ok(()) +}