forked from kent/consciousness
split capnp_store.rs into src/store/ module hierarchy
capnp_store.rs (1772 lines) → four focused modules: store/types.rs — types, macros, constants, path helpers store/parse.rs — markdown parsing (MemoryUnit, parse_units) store/view.rs — StoreView trait, MmapView, AnyView store/mod.rs — Store impl methods, re-exports new_node/new_relation become free functions in types.rs. All callers updated: capnp_store:: → store::
This commit is contained in:
parent
e34c0ccf4c
commit
635da6d3e2
11 changed files with 980 additions and 978 deletions
167
src/store/parse.rs
Normal file
167
src/store/parse.rs
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
// Markdown parsing for memory files
|
||||
//
|
||||
// Splits markdown files into MemoryUnit structs based on `<!-- mem: ... -->`
|
||||
// markers. Each marker starts a new section; content before the first marker
|
||||
// becomes the file-level unit. Links and causal edges are extracted from
|
||||
// both marker attributes and inline markdown links.
|
||||
|
||||
use super::NodeType;
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
pub struct MemoryUnit {
|
||||
pub key: String,
|
||||
pub content: String,
|
||||
pub marker_links: Vec<String>,
|
||||
pub md_links: Vec<String>,
|
||||
pub causes: Vec<String>,
|
||||
pub state: Option<String>,
|
||||
pub source_ref: Option<String>,
|
||||
}
|
||||
|
||||
pub fn classify_filename(filename: &str) -> NodeType {
|
||||
if filename.starts_with("daily-") { NodeType::EpisodicDaily }
|
||||
else if filename.starts_with("weekly-") { NodeType::EpisodicWeekly }
|
||||
else if filename == "journal.md" { NodeType::EpisodicSession }
|
||||
else { NodeType::Semantic }
|
||||
}
|
||||
|
||||
pub fn parse_units(filename: &str, content: &str) -> Vec<MemoryUnit> {
|
||||
static MARKER_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static SOURCE_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static MD_LINK_RE: OnceLock<Regex> = OnceLock::new();
|
||||
|
||||
let marker_re = MARKER_RE.get_or_init(||
|
||||
Regex::new(r"<!--\s*mem:\s*((?:id|links|tags|causes|state)\s*=\s*[^\s].*?)-->").unwrap());
|
||||
let source_re = SOURCE_RE.get_or_init(||
|
||||
Regex::new(r"<!--\s*source:\s*(.+?)\s*-->").unwrap());
|
||||
let md_link_re = MD_LINK_RE.get_or_init(||
|
||||
Regex::new(r"\[[^\]]*\]\(([^)]*\.md(?:#[^)]*)?)\)").unwrap());
|
||||
|
||||
let markers: Vec<_> = marker_re.captures_iter(content)
|
||||
.map(|cap| {
|
||||
let full_match = cap.get(0).unwrap();
|
||||
let attrs_str = &cap[1];
|
||||
(full_match.start(), full_match.end(), parse_marker_attrs(attrs_str))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let find_source = |text: &str| -> Option<String> {
|
||||
source_re.captures(text).map(|c| c[1].trim().to_string())
|
||||
};
|
||||
|
||||
if markers.is_empty() {
|
||||
let source_ref = find_source(content);
|
||||
let md_links = extract_md_links(content, md_link_re, filename);
|
||||
return vec![MemoryUnit {
|
||||
key: filename.to_string(),
|
||||
content: content.to_string(),
|
||||
marker_links: Vec::new(),
|
||||
md_links,
|
||||
causes: Vec::new(),
|
||||
state: None,
|
||||
source_ref,
|
||||
}];
|
||||
}
|
||||
|
||||
let mut units = Vec::new();
|
||||
|
||||
let first_start = markers[0].0;
|
||||
let pre_content = content[..first_start].trim();
|
||||
if !pre_content.is_empty() {
|
||||
let source_ref = find_source(pre_content);
|
||||
let md_links = extract_md_links(pre_content, md_link_re, filename);
|
||||
units.push(MemoryUnit {
|
||||
key: filename.to_string(),
|
||||
content: pre_content.to_string(),
|
||||
marker_links: Vec::new(),
|
||||
md_links,
|
||||
causes: Vec::new(),
|
||||
state: None,
|
||||
source_ref,
|
||||
});
|
||||
}
|
||||
|
||||
for (i, (_, end, attrs)) in markers.iter().enumerate() {
|
||||
let unit_end = if i + 1 < markers.len() {
|
||||
markers[i + 1].0
|
||||
} else {
|
||||
content.len()
|
||||
};
|
||||
let unit_content = content[*end..unit_end].trim();
|
||||
|
||||
let id = attrs.get("id").cloned().unwrap_or_default();
|
||||
let key = if id.is_empty() {
|
||||
format!("{}#unnamed-{}", filename, i)
|
||||
} else {
|
||||
format!("{}#{}", filename, id)
|
||||
};
|
||||
|
||||
let marker_links = attrs.get("links")
|
||||
.map(|l| l.split(',').map(|s| normalize_link(s.trim(), filename)).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let causes = attrs.get("causes")
|
||||
.map(|l| l.split(',').map(|s| normalize_link(s.trim(), filename)).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let state = attrs.get("state").cloned();
|
||||
let source_ref = find_source(unit_content);
|
||||
let md_links = extract_md_links(unit_content, md_link_re, filename);
|
||||
|
||||
units.push(MemoryUnit {
|
||||
key,
|
||||
content: unit_content.to_string(),
|
||||
marker_links,
|
||||
md_links,
|
||||
causes,
|
||||
state,
|
||||
source_ref,
|
||||
});
|
||||
}
|
||||
|
||||
units
|
||||
}
|
||||
|
||||
fn parse_marker_attrs(attrs_str: &str) -> HashMap<String, String> {
|
||||
static ATTR_RE: OnceLock<Regex> = OnceLock::new();
|
||||
let attr_re = ATTR_RE.get_or_init(|| Regex::new(r"(\w+)\s*=\s*(\S+)").unwrap());
|
||||
let mut attrs = HashMap::new();
|
||||
for cap in attr_re.captures_iter(attrs_str) {
|
||||
attrs.insert(cap[1].to_string(), cap[2].to_string());
|
||||
}
|
||||
attrs
|
||||
}
|
||||
|
||||
fn extract_md_links(content: &str, re: &Regex, source_file: &str) -> Vec<String> {
|
||||
re.captures_iter(content)
|
||||
.map(|cap| normalize_link(&cap[1], source_file))
|
||||
.filter(|link| !link.starts_with(source_file) || link.contains('#'))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn normalize_link(target: &str, source_file: &str) -> String {
|
||||
if target.starts_with('#') {
|
||||
return format!("{}{}", source_file, target);
|
||||
}
|
||||
|
||||
let (path_part, fragment) = if let Some(hash_pos) = target.find('#') {
|
||||
(&target[..hash_pos], Some(&target[hash_pos..]))
|
||||
} else {
|
||||
(target, None)
|
||||
};
|
||||
|
||||
let basename = Path::new(path_part)
|
||||
.file_name()
|
||||
.map(|f| f.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| path_part.to_string());
|
||||
|
||||
match fragment {
|
||||
Some(frag) => format!("{}{}", basename, frag),
|
||||
None => basename,
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue