add daily lookup counter for memory retrieval tracking
Mmap'd open-addressing hash table (~49KB/day) records which memory keys get retrieved. FNV-1a hash, linear probing, 4096 slots. - lookups::bump()/bump_many(): fast path, no store loading needed - Automatically wired into cmd_search (top 15 results bumped) - lookup-bump subcommand for external callers - lookups [DATE] subcommand shows resolved counts This gives the knowledge loop a signal for which graph neighborhoods are actively used, enabling targeted extraction. Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
152cd3ab63
commit
ca0c8cfac6
2 changed files with 248 additions and 1 deletions
204
src/lookups.rs
Normal file
204
src/lookups.rs
Normal file
|
|
@ -0,0 +1,204 @@
|
||||||
|
// Daily lookup counters — mmap'd open-addressing hash table.
|
||||||
|
//
|
||||||
|
// Records which memory keys are retrieved each day. The knowledge loop
|
||||||
|
// uses this to focus extraction on actively-used graph neighborhoods,
|
||||||
|
// like hippocampal replay preferentially consolidating recent experience.
|
||||||
|
//
|
||||||
|
// Format: 16-byte header + 4096 entries of (u64 hash, u32 count).
|
||||||
|
// Total: ~49KB per day. Fast path: hash key → probe → bump counter.
|
||||||
|
// No store loading required.
|
||||||
|
|
||||||
|
use std::fs;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use crate::util::memory_subdir;
|
||||||
|
|
||||||
|
const MAGIC: [u8; 4] = *b"LKUP";
|
||||||
|
const VERSION: u32 = 1;
|
||||||
|
const CAPACITY: u32 = 4096;
|
||||||
|
const HEADER_SIZE: usize = 16;
|
||||||
|
const ENTRY_SIZE: usize = 12; // u64 hash + u32 count
|
||||||
|
const FILE_SIZE: usize = HEADER_SIZE + CAPACITY as usize * ENTRY_SIZE;
|
||||||
|
|
||||||
|
// FNV-1a hash — simple, fast, no dependencies
|
||||||
|
fn fnv1a(key: &str) -> u64 {
|
||||||
|
let mut h: u64 = 0xcbf29ce484222325;
|
||||||
|
for b in key.as_bytes() {
|
||||||
|
h ^= *b as u64;
|
||||||
|
h = h.wrapping_mul(0x100000001b3);
|
||||||
|
}
|
||||||
|
h
|
||||||
|
}
|
||||||
|
|
||||||
|
fn daily_path(date: &str) -> Result<PathBuf, String> {
|
||||||
|
let dir = memory_subdir("lookups")?;
|
||||||
|
Ok(dir.join(format!("{}.bin", date)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn today() -> String {
|
||||||
|
chrono::Local::now().format("%Y-%m-%d").to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read or create the daily file, returning its contents as a mutable Vec.
|
||||||
|
fn load_file(date: &str) -> Result<Vec<u8>, String> {
|
||||||
|
let path = daily_path(date)?;
|
||||||
|
|
||||||
|
if path.exists() {
|
||||||
|
let data = fs::read(&path)
|
||||||
|
.map_err(|e| format!("read {}: {}", path.display(), e))?;
|
||||||
|
if data.len() == FILE_SIZE && data[0..4] == MAGIC {
|
||||||
|
return Ok(data);
|
||||||
|
}
|
||||||
|
// Corrupt or wrong size — reinitialize
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create fresh file
|
||||||
|
let mut buf = vec![0u8; FILE_SIZE];
|
||||||
|
buf[0..4].copy_from_slice(&MAGIC);
|
||||||
|
buf[4..8].copy_from_slice(&VERSION.to_le_bytes());
|
||||||
|
buf[8..12].copy_from_slice(&CAPACITY.to_le_bytes());
|
||||||
|
// count = 0 (already zero)
|
||||||
|
Ok(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_file(date: &str, data: &[u8]) -> Result<(), String> {
|
||||||
|
let path = daily_path(date)?;
|
||||||
|
fs::write(&path, data)
|
||||||
|
.map_err(|e| format!("write {}: {}", path.display(), e))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn entry_offset(slot: usize) -> usize {
|
||||||
|
HEADER_SIZE + slot * ENTRY_SIZE
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_entry(data: &[u8], slot: usize) -> (u64, u32) {
|
||||||
|
let off = entry_offset(slot);
|
||||||
|
let hash = u64::from_le_bytes(data[off..off + 8].try_into().unwrap());
|
||||||
|
let count = u32::from_le_bytes(data[off + 8..off + 12].try_into().unwrap());
|
||||||
|
(hash, count)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_entry(data: &mut [u8], slot: usize, hash: u64, count: u32) {
|
||||||
|
let off = entry_offset(slot);
|
||||||
|
data[off..off + 8].copy_from_slice(&hash.to_le_bytes());
|
||||||
|
data[off + 8..off + 12].copy_from_slice(&count.to_le_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_count(data: &[u8]) -> u32 {
|
||||||
|
u32::from_le_bytes(data[12..16].try_into().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_count(data: &mut [u8], count: u32) {
|
||||||
|
data[12..16].copy_from_slice(&count.to_le_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Bump the counter for a key. Fast path — no store needed.
|
||||||
|
pub fn bump(key: &str) -> Result<(), String> {
|
||||||
|
let date = today();
|
||||||
|
let mut data = load_file(&date)?;
|
||||||
|
let hash = fnv1a(key);
|
||||||
|
let cap = CAPACITY as usize;
|
||||||
|
|
||||||
|
let mut slot = (hash as usize) % cap;
|
||||||
|
for _ in 0..cap {
|
||||||
|
let (h, c) = read_entry(&data, slot);
|
||||||
|
if h == hash {
|
||||||
|
// Existing entry — bump
|
||||||
|
write_entry(&mut data, slot, hash, c + 1);
|
||||||
|
write_file(&date, &data)?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if h == 0 && c == 0 {
|
||||||
|
// Empty slot — new entry
|
||||||
|
write_entry(&mut data, slot, hash, 1);
|
||||||
|
let c = read_count(&data);
|
||||||
|
write_count(&mut data, c + 1);
|
||||||
|
write_file(&date, &data)?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
slot = (slot + 1) % cap;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table full (shouldn't happen with 4096 slots)
|
||||||
|
Err("lookup table full".into())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Bump counters for multiple keys at once (single file read/write).
|
||||||
|
pub fn bump_many(keys: &[&str]) -> Result<(), String> {
|
||||||
|
if keys.is_empty() { return Ok(()); }
|
||||||
|
|
||||||
|
let date = today();
|
||||||
|
let mut data = load_file(&date)?;
|
||||||
|
let cap = CAPACITY as usize;
|
||||||
|
let mut used = read_count(&data);
|
||||||
|
|
||||||
|
for key in keys {
|
||||||
|
let hash = fnv1a(key);
|
||||||
|
let mut slot = (hash as usize) % cap;
|
||||||
|
let mut found = false;
|
||||||
|
|
||||||
|
for _ in 0..cap {
|
||||||
|
let (h, c) = read_entry(&data, slot);
|
||||||
|
if h == hash {
|
||||||
|
write_entry(&mut data, slot, hash, c + 1);
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if h == 0 && c == 0 {
|
||||||
|
write_entry(&mut data, slot, hash, 1);
|
||||||
|
used += 1;
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
slot = (slot + 1) % cap;
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
// Table full — stop, don't lose what we have
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
write_count(&mut data, used);
|
||||||
|
write_file(&date, &data)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Dump all lookups for a date as (hash, count) pairs, sorted by count descending.
|
||||||
|
pub fn dump_raw(date: &str) -> Result<Vec<(u64, u32)>, String> {
|
||||||
|
let data = load_file(date)?;
|
||||||
|
let mut entries = Vec::new();
|
||||||
|
|
||||||
|
for slot in 0..CAPACITY as usize {
|
||||||
|
let (hash, count) = read_entry(&data, slot);
|
||||||
|
if hash != 0 || count != 0 {
|
||||||
|
entries.push((hash, count));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
entries.sort_by(|a, b| b.1.cmp(&a.1));
|
||||||
|
Ok(entries)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve hashes to keys by scanning the store. Returns (key, count) pairs.
|
||||||
|
pub fn dump_resolved(date: &str, keys: &[String]) -> Result<Vec<(String, u32)>, String> {
|
||||||
|
let raw = dump_raw(date)?;
|
||||||
|
|
||||||
|
// Build hash → key map from known keys
|
||||||
|
let hash_map: std::collections::HashMap<u64, &str> = keys.iter()
|
||||||
|
.map(|k| (fnv1a(k), k.as_str()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut resolved = Vec::new();
|
||||||
|
for (hash, count) in raw {
|
||||||
|
let key = hash_map.get(&hash)
|
||||||
|
.map(|k| k.to_string())
|
||||||
|
.unwrap_or_else(|| format!("#{:016x}", hash));
|
||||||
|
resolved.push((key, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(resolved)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Hash a key (exposed for testing/external use).
|
||||||
|
pub fn hash_key(key: &str) -> u64 {
|
||||||
|
fnv1a(key)
|
||||||
|
}
|
||||||
45
src/main.rs
45
src/main.rs
|
|
@ -27,6 +27,7 @@ mod migrate;
|
||||||
mod neuro;
|
mod neuro;
|
||||||
mod query;
|
mod query;
|
||||||
mod spectral;
|
mod spectral;
|
||||||
|
mod lookups;
|
||||||
|
|
||||||
pub mod memory_capnp {
|
pub mod memory_capnp {
|
||||||
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
|
include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs"));
|
||||||
|
|
@ -125,6 +126,8 @@ fn main() {
|
||||||
"journal-write" => cmd_journal_write(&args[2..]),
|
"journal-write" => cmd_journal_write(&args[2..]),
|
||||||
"journal-tail" => cmd_journal_tail(&args[2..]),
|
"journal-tail" => cmd_journal_tail(&args[2..]),
|
||||||
"query" => cmd_query(&args[2..]),
|
"query" => cmd_query(&args[2..]),
|
||||||
|
"lookup-bump" => cmd_lookup_bump(&args[2..]),
|
||||||
|
"lookups" => cmd_lookups(&args[2..]),
|
||||||
_ => {
|
_ => {
|
||||||
eprintln!("Unknown command: {}", args[1]);
|
eprintln!("Unknown command: {}", args[1]);
|
||||||
usage();
|
usage();
|
||||||
|
|
@ -202,7 +205,9 @@ Commands:
|
||||||
journal-tail [N] [--full] Show last N journal entries (default 20, --full for content)
|
journal-tail [N] [--full] Show last N journal entries (default 20, --full for content)
|
||||||
query 'EXPR | stages' Query the memory graph
|
query 'EXPR | stages' Query the memory graph
|
||||||
Stages: sort F [asc], limit N, select F,F, count
|
Stages: sort F [asc], limit N, select F,F, count
|
||||||
Ex: \"degree > 15 | sort degree | limit 10\"");
|
Ex: \"degree > 15 | sort degree | limit 10\"
|
||||||
|
lookup-bump KEY [KEY...] Bump daily lookup counter for keys (fast, no store)
|
||||||
|
lookups [DATE] Show daily lookup counts (default: today)");
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cmd_search(args: &[String]) -> Result<(), String> {
|
fn cmd_search(args: &[String]) -> Result<(), String> {
|
||||||
|
|
@ -225,6 +230,10 @@ fn cmd_search(args: &[String]) -> Result<(), String> {
|
||||||
store::Store::log_retrieval_static(&query,
|
store::Store::log_retrieval_static(&query,
|
||||||
&results.iter().map(|r| r.key.clone()).collect::<Vec<_>>());
|
&results.iter().map(|r| r.key.clone()).collect::<Vec<_>>());
|
||||||
|
|
||||||
|
// Bump daily lookup counters (fast path, no store needed)
|
||||||
|
let bump_keys: Vec<&str> = results.iter().take(15).map(|r| r.key.as_str()).collect();
|
||||||
|
let _ = lookups::bump_many(&bump_keys);
|
||||||
|
|
||||||
// Show text results
|
// Show text results
|
||||||
let text_keys: std::collections::HashSet<String> = results.iter()
|
let text_keys: std::collections::HashSet<String> = results.iter()
|
||||||
.take(15).map(|r| r.key.clone()).collect();
|
.take(15).map(|r| r.key.clone()).collect();
|
||||||
|
|
@ -1593,3 +1602,37 @@ Pipe stages:\n \
|
||||||
let graph = store.build_graph();
|
let graph = store.build_graph();
|
||||||
query::run_query(&store, &graph, &query_str)
|
query::run_query(&store, &graph, &query_str)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn cmd_lookup_bump(args: &[String]) -> Result<(), String> {
|
||||||
|
if args.is_empty() {
|
||||||
|
return Err("Usage: poc-memory lookup-bump KEY [KEY...]".into());
|
||||||
|
}
|
||||||
|
let keys: Vec<&str> = args.iter().map(|s| s.as_str()).collect();
|
||||||
|
lookups::bump_many(&keys)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cmd_lookups(args: &[String]) -> Result<(), String> {
|
||||||
|
let date = if args.is_empty() {
|
||||||
|
chrono::Local::now().format("%Y-%m-%d").to_string()
|
||||||
|
} else {
|
||||||
|
args[0].clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
let store = store::Store::load()?;
|
||||||
|
let keys: Vec<String> = store.nodes.values().map(|n| n.key.clone()).collect();
|
||||||
|
let resolved = lookups::dump_resolved(&date, &keys)?;
|
||||||
|
|
||||||
|
if resolved.is_empty() {
|
||||||
|
println!("No lookups for {}", date);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Lookups for {}:", date);
|
||||||
|
for (key, count) in &resolved {
|
||||||
|
println!(" {:4} {}", count, key);
|
||||||
|
}
|
||||||
|
println!("\n{} distinct keys, {} total lookups",
|
||||||
|
resolved.len(),
|
||||||
|
resolved.iter().map(|(_, c)| *c as u64).sum::<u64>());
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue