capnp_store: cache compiled regexes with OnceLock

parse_units and parse_marker_attrs were recompiling 4 regexes on
every call. Since they're called per-file during init, this was
measurable overhead. Use std::sync::OnceLock to compile once.
This commit is contained in:
ProofOfConcept 2026-03-03 12:44:02 -05:00
parent a2ec8657d2
commit e34c0ccf4c

View file

@ -1606,11 +1606,18 @@ pub struct MemoryUnit {
} }
pub fn parse_units(filename: &str, content: &str) -> Vec<MemoryUnit> { pub fn parse_units(filename: &str, content: &str) -> Vec<MemoryUnit> {
let marker_re = Regex::new( use std::sync::OnceLock;
r"<!--\s*mem:\s*((?:id|links|tags|causes|state)\s*=\s*[^\s].*?)-->"
).unwrap(); static MARKER_RE: OnceLock<Regex> = OnceLock::new();
let source_re = Regex::new(r"<!--\s*source:\s*(.+?)\s*-->").unwrap(); static SOURCE_RE: OnceLock<Regex> = OnceLock::new();
let md_link_re = Regex::new(r"\[[^\]]*\]\(([^)]*\.md(?:#[^)]*)?)\)").unwrap(); static MD_LINK_RE: OnceLock<Regex> = OnceLock::new();
let marker_re = MARKER_RE.get_or_init(||
Regex::new(r"<!--\s*mem:\s*((?:id|links|tags|causes|state)\s*=\s*[^\s].*?)-->").unwrap());
let source_re = SOURCE_RE.get_or_init(||
Regex::new(r"<!--\s*source:\s*(.+?)\s*-->").unwrap());
let md_link_re = MD_LINK_RE.get_or_init(||
Regex::new(r"\[[^\]]*\]\(([^)]*\.md(?:#[^)]*)?)\)").unwrap());
let markers: Vec<_> = marker_re.captures_iter(content) let markers: Vec<_> = marker_re.captures_iter(content)
.map(|cap| { .map(|cap| {
@ -1699,7 +1706,9 @@ pub fn parse_units(filename: &str, content: &str) -> Vec<MemoryUnit> {
} }
fn parse_marker_attrs(attrs_str: &str) -> HashMap<String, String> { fn parse_marker_attrs(attrs_str: &str) -> HashMap<String, String> {
let attr_re = Regex::new(r"(\w+)\s*=\s*(\S+)").unwrap(); use std::sync::OnceLock;
static ATTR_RE: OnceLock<Regex> = OnceLock::new();
let attr_re = ATTR_RE.get_or_init(|| Regex::new(r"(\w+)\s*=\s*(\S+)").unwrap());
let mut attrs = HashMap::new(); let mut attrs = HashMap::new();
for cap in attr_re.captures_iter(attrs_str) { for cap in attr_re.captures_iter(attrs_str) {
attrs.insert(cap[1].to_string(), cap[2].to_string()); attrs.insert(cap[1].to_string(), cap[2].to_string());