Fast startup: mmap backward scan instead of reading full log
Uses JsonlBackwardIter (SIMD memrchr3) to scan the conversation log newest-first without reading/parsing the whole file. Stops as soon as the conversation budget is full. Only the kept nodes get retokenized and pushed into context. 18MB log → only tokenize the ~50 nodes that fit in the budget. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
7da3efc5df
commit
949dacd861
2 changed files with 35 additions and 32 deletions
|
|
@ -1,8 +1,10 @@
|
|||
use anyhow::{Context, Result};
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{BufRead, BufReader, Seek, SeekFrom, Write};
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use crate::agent::context::AstNode;
|
||||
use crate::hippocampus::transcript::JsonlBackwardIter;
|
||||
use memmap2::Mmap;
|
||||
|
||||
pub struct ConversationLog {
|
||||
path: PathBuf,
|
||||
|
|
@ -33,32 +35,19 @@ impl ConversationLog {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn read_nodes(&self, max_bytes: u64) -> Result<Vec<AstNode>> {
|
||||
/// Read nodes from the tail of the log, newest first.
|
||||
/// Caller decides when to stop (budget, count, etc).
|
||||
pub fn read_tail(&self) -> Result<TailNodes> {
|
||||
if !self.path.exists() {
|
||||
return Ok(Vec::new());
|
||||
anyhow::bail!("log does not exist");
|
||||
}
|
||||
let file = File::open(&self.path)
|
||||
.with_context(|| format!("opening log {}", self.path.display()))?;
|
||||
let file_len = file.metadata()?.len();
|
||||
let mut reader = BufReader::new(file);
|
||||
|
||||
if file_len > max_bytes {
|
||||
reader.seek(SeekFrom::Start(file_len - max_bytes))?;
|
||||
let mut discard = String::new();
|
||||
reader.read_line(&mut discard)?;
|
||||
if file.metadata()?.len() == 0 {
|
||||
anyhow::bail!("log is empty");
|
||||
}
|
||||
|
||||
let mut nodes = Vec::new();
|
||||
for line in reader.lines() {
|
||||
let line = line.context("reading log tail")?;
|
||||
let line = line.trim();
|
||||
if line.is_empty() { continue; }
|
||||
if let Ok(node) = serde_json::from_str::<AstNode>(line) {
|
||||
nodes.push(node);
|
||||
}
|
||||
// Old format entries silently skipped — journal has the context
|
||||
}
|
||||
Ok(nodes)
|
||||
let mmap = unsafe { Mmap::map(&file)? };
|
||||
Ok(TailNodes { _file: file, mmap })
|
||||
}
|
||||
|
||||
pub fn path(&self) -> &Path {
|
||||
|
|
@ -66,12 +55,13 @@ impl ConversationLog {
|
|||
}
|
||||
|
||||
pub fn oldest_timestamp(&self) -> Option<chrono::DateTime<chrono::Utc>> {
|
||||
// Read forward from the start to find first timestamp
|
||||
let file = File::open(&self.path).ok()?;
|
||||
let reader = BufReader::new(file);
|
||||
for line in reader.lines().flatten() {
|
||||
let line = line.trim().to_string();
|
||||
let mmap = unsafe { Mmap::map(&file).ok()? };
|
||||
// Find first { ... } and parse
|
||||
for line in mmap.split(|&b| b == b'\n') {
|
||||
if line.is_empty() { continue; }
|
||||
if let Ok(node) = serde_json::from_str::<AstNode>(&line) {
|
||||
if let Ok(node) = serde_json::from_slice::<AstNode>(line) {
|
||||
if let Some(leaf) = node.leaf() {
|
||||
if let Some(ts) = leaf.timestamp() {
|
||||
return Some(ts);
|
||||
|
|
@ -82,3 +72,16 @@ impl ConversationLog {
|
|||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterates over conversation log nodes newest-first, using mmap + backward scan.
|
||||
pub struct TailNodes {
|
||||
_file: File,
|
||||
mmap: Mmap,
|
||||
}
|
||||
|
||||
impl TailNodes {
|
||||
pub fn iter(&self) -> impl Iterator<Item = AstNode> + '_ {
|
||||
JsonlBackwardIter::new(&self.mmap)
|
||||
.filter_map(|bytes| serde_json::from_slice::<AstNode>(bytes).ok())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue