consciousness/poc-memory/src/lookups.rs
Kent Overstreet fc48ac7c7f split into workspace: poc-memory and poc-daemon subcrates
poc-daemon (notification routing, idle timer, IRC, Telegram) was already
fully self-contained with no imports from the poc-memory library. Now it's
a proper separate crate with its own Cargo.toml and capnp schema.

poc-memory retains the store, graph, search, neuro, knowledge, and the
jobkit-based memory maintenance daemon (daemon.rs).

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
2026-03-08 20:43:59 -04:00

204 lines
6 KiB
Rust

// Daily lookup counters — mmap'd open-addressing hash table.
//
// Records which memory keys are retrieved each day. The knowledge loop
// uses this to focus extraction on actively-used graph neighborhoods,
// like hippocampal replay preferentially consolidating recent experience.
//
// Format: 16-byte header + 4096 entries of (u64 hash, u32 count).
// Total: ~49KB per day. Fast path: hash key → probe → bump counter.
// No store loading required.
use std::fs;
use std::path::PathBuf;
use crate::util::memory_subdir;
const MAGIC: [u8; 4] = *b"LKUP";
const VERSION: u32 = 1;
const CAPACITY: u32 = 4096;
const HEADER_SIZE: usize = 16;
const ENTRY_SIZE: usize = 12; // u64 hash + u32 count
const FILE_SIZE: usize = HEADER_SIZE + CAPACITY as usize * ENTRY_SIZE;
// FNV-1a hash — simple, fast, no dependencies
fn fnv1a(key: &str) -> u64 {
let mut h: u64 = 0xcbf29ce484222325;
for b in key.as_bytes() {
h ^= *b as u64;
h = h.wrapping_mul(0x100000001b3);
}
h
}
fn daily_path(date: &str) -> Result<PathBuf, String> {
let dir = memory_subdir("lookups")?;
Ok(dir.join(format!("{}.bin", date)))
}
fn today() -> String {
chrono::Local::now().format("%Y-%m-%d").to_string()
}
/// Read or create the daily file, returning its contents as a mutable Vec.
fn load_file(date: &str) -> Result<Vec<u8>, String> {
let path = daily_path(date)?;
if path.exists() {
let data = fs::read(&path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
if data.len() == FILE_SIZE && data[0..4] == MAGIC {
return Ok(data);
}
// Corrupt or wrong size — reinitialize
}
// Create fresh file
let mut buf = vec![0u8; FILE_SIZE];
buf[0..4].copy_from_slice(&MAGIC);
buf[4..8].copy_from_slice(&VERSION.to_le_bytes());
buf[8..12].copy_from_slice(&CAPACITY.to_le_bytes());
// count = 0 (already zero)
Ok(buf)
}
fn write_file(date: &str, data: &[u8]) -> Result<(), String> {
let path = daily_path(date)?;
fs::write(&path, data)
.map_err(|e| format!("write {}: {}", path.display(), e))
}
fn entry_offset(slot: usize) -> usize {
HEADER_SIZE + slot * ENTRY_SIZE
}
fn read_entry(data: &[u8], slot: usize) -> (u64, u32) {
let off = entry_offset(slot);
let hash = u64::from_le_bytes(data[off..off + 8].try_into().unwrap());
let count = u32::from_le_bytes(data[off + 8..off + 12].try_into().unwrap());
(hash, count)
}
fn write_entry(data: &mut [u8], slot: usize, hash: u64, count: u32) {
let off = entry_offset(slot);
data[off..off + 8].copy_from_slice(&hash.to_le_bytes());
data[off + 8..off + 12].copy_from_slice(&count.to_le_bytes());
}
fn read_count(data: &[u8]) -> u32 {
u32::from_le_bytes(data[12..16].try_into().unwrap())
}
fn write_count(data: &mut [u8], count: u32) {
data[12..16].copy_from_slice(&count.to_le_bytes());
}
/// Bump the counter for a key. Fast path — no store needed.
pub fn bump(key: &str) -> Result<(), String> {
let date = today();
let mut data = load_file(&date)?;
let hash = fnv1a(key);
let cap = CAPACITY as usize;
let mut slot = (hash as usize) % cap;
for _ in 0..cap {
let (h, c) = read_entry(&data, slot);
if h == hash {
// Existing entry — bump
write_entry(&mut data, slot, hash, c + 1);
write_file(&date, &data)?;
return Ok(());
}
if h == 0 && c == 0 {
// Empty slot — new entry
write_entry(&mut data, slot, hash, 1);
let c = read_count(&data);
write_count(&mut data, c + 1);
write_file(&date, &data)?;
return Ok(());
}
slot = (slot + 1) % cap;
}
// Table full (shouldn't happen with 4096 slots)
Err("lookup table full".into())
}
/// Bump counters for multiple keys at once (single file read/write).
pub fn bump_many(keys: &[&str]) -> Result<(), String> {
if keys.is_empty() { return Ok(()); }
let date = today();
let mut data = load_file(&date)?;
let cap = CAPACITY as usize;
let mut used = read_count(&data);
for key in keys {
let hash = fnv1a(key);
let mut slot = (hash as usize) % cap;
let mut found = false;
for _ in 0..cap {
let (h, c) = read_entry(&data, slot);
if h == hash {
write_entry(&mut data, slot, hash, c + 1);
found = true;
break;
}
if h == 0 && c == 0 {
write_entry(&mut data, slot, hash, 1);
used += 1;
found = true;
break;
}
slot = (slot + 1) % cap;
}
if !found {
// Table full — stop, don't lose what we have
break;
}
}
write_count(&mut data, used);
write_file(&date, &data)
}
/// Dump all lookups for a date as (hash, count) pairs, sorted by count descending.
pub fn dump_raw(date: &str) -> Result<Vec<(u64, u32)>, String> {
let data = load_file(date)?;
let mut entries = Vec::new();
for slot in 0..CAPACITY as usize {
let (hash, count) = read_entry(&data, slot);
if hash != 0 || count != 0 {
entries.push((hash, count));
}
}
entries.sort_by(|a, b| b.1.cmp(&a.1));
Ok(entries)
}
/// Resolve hashes to keys by scanning the store. Returns (key, count) pairs.
pub fn dump_resolved(date: &str, keys: &[String]) -> Result<Vec<(String, u32)>, String> {
let raw = dump_raw(date)?;
// Build hash → key map from known keys
let hash_map: std::collections::HashMap<u64, &str> = keys.iter()
.map(|k| (fnv1a(k), k.as_str()))
.collect();
let mut resolved = Vec::new();
for (hash, count) in raw {
let key = hash_map.get(&hash)
.map(|k| k.to_string())
.unwrap_or_else(|| format!("#{:016x}", hash));
resolved.push((key, count));
}
Ok(resolved)
}
/// Hash a key (exposed for testing/external use).
pub fn hash_key(key: &str) -> u64 {
fnv1a(key)
}