diff --git a/Cargo.lock b/Cargo.lock index 74a3fd7..daabe75 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -17,6 +28,22 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "atomic-wait" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55b94919229f2c42292fd71ffa4b75e83193bffdd77b1e858cd55fd2d0b0ea8" +dependencies = [ + "libc", + "windows-sys 0.42.0", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "bincode" version = "1.3.3" @@ -32,12 +59,87 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + [[package]] name = "capnp" version = "0.20.6" @@ -56,30 +158,454 @@ dependencies = [ "capnp", ] +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "defer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "930c7171c8df9fb1782bdf9b918ed9ed2d33d1d22300abb754f9085bc48bf8e8" + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "dyn-stack" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c4713e43e2886ba72b8271aa66c93d722116acf7a75555cce11dcde84388fe8" +dependencies = [ + "bytemuck", + "dyn-stack-macros", +] + +[[package]] +name = "dyn-stack-macros" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d926b4d407d372f141f93bb444696142c29d32962ccbd3531117cf3aa0bfa9" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "embedded-io" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" +[[package]] +name = "enum-as-inner" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "equator" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c35da53b5a021d2484a7cc49b2ac7f2d840f8236a286f84202369bd338d761ea" +dependencies = [ + "equator-macro 0.2.1", +] + +[[package]] +name = "equator" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc" +dependencies = [ + "equator-macro 0.4.2", +] + +[[package]] +name = "equator" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02da895aab06bbebefb6b2595f6d637b18c9ff629b4cd840965bb3164e4194b0" +dependencies = [ + "equator-macro 0.6.0", +] + +[[package]] +name = "equator-macro" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "equator-macro" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "equator-macro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b14b339eb76d07f052cdbad76ca7c1310e56173a138095d3bf42a23c06ef5d8" + [[package]] name = "equivalent" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "faer" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d2ecfb80b6f8b0c569e36988a052e64b14d8def9d372390b014e8bf79f299a" +dependencies = [ + "bytemuck", + "dyn-stack", + "equator 0.6.0", + "faer-traits", + "gemm", + "generativity", + "libm", + "nano-gemm", + "npyz", + "num-complex", + "num-traits", + "private-gemm-x86", + "pulp", + "rand 0.9.2", + "rand_distr", + "rayon", + "reborrow", + "spindle", +] + +[[package]] +name = "faer-traits" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b87d23ed7ab1f26c0cba0e5b9e061a796fbb7dc170fa8bee6970055a1308bb0f" +dependencies = [ + "bytemuck", + "dyn-stack", + "generativity", + "libm", + "num-complex", + "num-traits", + "pulp", + "qd", + "reborrow", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "foldhash" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "gemm" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa0673db364b12263d103b68337a68fbecc541d6f6b61ba72fe438654709eacb" +dependencies = [ + "dyn-stack", + "gemm-c32", + "gemm-c64", + "gemm-common", + "gemm-f16", + "gemm-f32", + "gemm-f64", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c32" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "086936dbdcb99e37aad81d320f98f670e53c1e55a98bee70573e83f95beb128c" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-c64" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20c8aeeeec425959bda4d9827664029ba1501a90a0d1e6228e48bef741db3a3f" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-common" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88027625910cc9b1085aaaa1c4bc46bb3a36aad323452b33c25b5e4e7c8e2a3e" +dependencies = [ + "bytemuck", + "dyn-stack", + "half", + "libm", + "num-complex", + "num-traits", + "once_cell", + "paste", + "pulp", + "raw-cpuid", + "rayon", + "seq-macro", + "sysctl", +] + +[[package]] +name = "gemm-f16" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3df7a55202e6cd6739d82ae3399c8e0c7e1402859b30e4cb780e61525d9486e" +dependencies = [ + "dyn-stack", + "gemm-common", + "gemm-f32", + "half", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f32" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e0b8c9da1fbec6e3e3ab2ce6bc259ef18eb5f6f0d3e4edf54b75f9fd41a81c" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "gemm-f64" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "056131e8f2a521bfab322f804ccd652520c79700d81209e9d9275bbdecaadc6a" +dependencies = [ + "dyn-stack", + "gemm-common", + "num-complex", + "num-traits", + "paste", + "raw-cpuid", + "seq-macro", +] + +[[package]] +name = "generativity" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5881e4c3c2433fe4905bb19cfd2b5d49d4248274862b68c27c33d9ba4e13f9ec" + +[[package]] +name = "generator" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows-link", + "windows-result", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + [[package]] name = "getrandom" version = "0.4.1" @@ -93,6 +619,28 @@ dependencies = [ "wasip3", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "bytemuck", + "cfg-if", + "crunchy", + "num-traits", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -114,6 +662,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "id-arena" version = "2.3.0" @@ -132,6 +686,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "interpol" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb58032ba748f4010d15912a1855a8a0b1ba9eaad3395b0c171c09b3b356ae50" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "itoa" version = "1.0.17" @@ -148,6 +713,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "leb128fmt" version = "0.1.0" @@ -160,24 +731,256 @@ version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + [[package]] name = "log" version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", +] + +[[package]] +name = "nano-gemm" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e04345dc84b498ff89fe0d38543d1f170da9e43a2c2bcee73a0f9069f72d081" +dependencies = [ + "equator 0.2.2", + "nano-gemm-c32", + "nano-gemm-c64", + "nano-gemm-codegen", + "nano-gemm-core", + "nano-gemm-f32", + "nano-gemm-f64", + "num-complex", +] + +[[package]] +name = "nano-gemm-c32" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0775b1e2520e64deee8fc78b7732e3091fb7585017c0b0f9f4b451757bbbc562" +dependencies = [ + "nano-gemm-codegen", + "nano-gemm-core", + "num-complex", +] + +[[package]] +name = "nano-gemm-c64" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9af49a20d58816e6b5ee65f64142e50edb5eba152678d4bb7377fcbf63f8437a" +dependencies = [ + "nano-gemm-codegen", + "nano-gemm-core", + "num-complex", +] + +[[package]] +name = "nano-gemm-codegen" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cc8d495c791627779477a2cf5df60049f5b165342610eb0d76bee5ff5c5d74c" + +[[package]] +name = "nano-gemm-core" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d998dfa644de87a0f8660e5ea511d7cb5c33b5a2d9847b7af57a2565105089f0" + +[[package]] +name = "nano-gemm-f32" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879d962e79bc8952e4ad21ca4845a21132540ed3f5e01184b2ff7f720e666523" +dependencies = [ + "nano-gemm-codegen", + "nano-gemm-core", +] + +[[package]] +name = "nano-gemm-f64" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9a513473dce7dc00c7e7c318481ca4494034e76997218d8dad51bd9f007a815" +dependencies = [ + "nano-gemm-codegen", + "nano-gemm-core", +] + +[[package]] +name = "npyz" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f0e759e014e630f90af745101b614f761306ddc541681e546649068e25ec1b9" +dependencies = [ + "byteorder", + "num-bigint", + "py_literal", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "bytemuck", + "num-traits", + "rand 0.8.5", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pest" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "pest_meta" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" +dependencies = [ + "pest", + "sha2", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + [[package]] name = "poc-memory" version = "0.4.0" @@ -185,13 +988,25 @@ dependencies = [ "bincode", "capnp", "capnpc", + "faer", "libc", + "memmap2", "regex", + "rkyv", "serde", "serde_json", "uuid", ] +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -199,7 +1014,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "private-gemm-x86" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0af8c3e5087969c323f667ccb4b789fa0954f5aa650550e38e81cf9108be21b5" +dependencies = [ + "crossbeam", + "defer", + "interpol", + "num_cpus", + "raw-cpuid", + "rayon", + "spindle", + "sysctl", ] [[package]] @@ -211,6 +1042,74 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "pulp" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e205bb30d5b916c55e584c22201771bcf2bad9aabd5d4127f38387140c38632" +dependencies = [ + "bytemuck", + "cfg-if", + "libm", + "num-complex", + "paste", + "pulp-wasm-simd-flag", + "raw-cpuid", + "reborrow", + "version_check", +] + +[[package]] +name = "pulp-wasm-simd-flag" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40e24eee682d89fb193496edf918a7f407d30175b2e785fe057e4392dfd182e0" + +[[package]] +name = "py_literal" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "102df7a3d46db9d3891f178dcc826dc270a6746277a9ae6436f8d29fd490a8e1" +dependencies = [ + "num-bigint", + "num-complex", + "num-traits", + "pest", + "pest_derive", +] + +[[package]] +name = "qd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f1304a5aecdcfe9ee72fbba90aa37b3aa067a69d14cb7f3d9deada0be7c07c" +dependencies = [ + "bytemuck", + "libm", + "num-traits", + "pulp", +] + [[package]] name = "quote" version = "1.0.44" @@ -226,6 +1125,101 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand 0.9.2", +] + +[[package]] +name = "raw-cpuid" +version = "11.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "reborrow" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" + [[package]] name = "regex" version = "1.12.3" @@ -255,18 +1249,83 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "semver" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + [[package]] name = "serde" version = "1.0.228" @@ -294,7 +1353,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -310,6 +1369,68 @@ dependencies = [ "zmij", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "spindle" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aaca3d8aa5387a6eba861fbf984af5348d9df5d940c25c6366b19556fdf64" +dependencies = [ + "atomic-wait", + "crossbeam", + "equator 0.4.2", + "loom", + "rayon", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.117" @@ -321,6 +1442,131 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sysctl" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" +dependencies = [ + "bitflags", + "byteorder", + "enum-as-inner", + "libc", + "thiserror", + "walkdir", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -339,11 +1585,39 @@ version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom", + "getrandom 0.4.1", "js-sys", "wasm-bindgen", ] +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -394,7 +1668,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn", + "syn 2.0.117", "wasm-bindgen-shared", ] @@ -441,6 +1715,96 @@ dependencies = [ "semver", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "wit-bindgen" version = "0.51.0" @@ -471,7 +1835,7 @@ dependencies = [ "heck", "indexmap", "prettyplease", - "syn", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -487,7 +1851,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -529,6 +1893,35 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "zerocopy" +version = "0.8.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "zmij" version = "1.0.21" diff --git a/Cargo.toml b/Cargo.toml index 336018a..b3e51f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,9 @@ serde_json = "1" bincode = "1" regex = "1" libc = "0.2" +faer = "0.24.0" +rkyv = { version = "0.7", features = ["validation", "std"] } +memmap2 = "0.9" [build-dependencies] capnpc = "0.20" diff --git a/prompts/challenger.md b/prompts/challenger.md new file mode 100644 index 0000000..9e8ecab --- /dev/null +++ b/prompts/challenger.md @@ -0,0 +1,82 @@ +# Challenger Agent — Adversarial Truth-Testing + +You are a knowledge challenger agent. Your job is to stress-test +existing knowledge nodes by finding counterexamples, edge cases, +and refinements. + +## What you're doing + +Knowledge calcifies. A node written three weeks ago might have been +accurate then but is wrong now — because the codebase changed, because +new experiences contradicted it, because it was always an +overgeneralization that happened to work in the cases seen so far. + +You're the immune system. For each target node, search the provided +context for evidence that complicates, contradicts, or refines the +claim. Then write a sharpened version or a counterpoint node. + +## What you see + +- **Target node**: A knowledge node making some claim — a skill, a + self-observation, a causal model, a belief. +- **Context nodes**: Related nodes from the graph neighborhood plus + recent episodic nodes that might contain contradicting evidence. + +## What to produce + +For each target node, one of: + +**AFFIRM** — the node holds up. The evidence supports it. No action +needed. Say briefly why. + +**REFINE** — the node is mostly right but needs sharpening. Write an +updated version that incorporates the nuance you found. + +``` +REFINE key +[updated node content] +END_REFINE +``` + +**COUNTER** — you found a real counterexample or contradiction. Write +a node that captures it. Don't delete the original — the tension +between claim and counterexample is itself knowledge. + +``` +WRITE_NODE key +[counterpoint content] +END_NODE + +LINK key original_key +``` + +## Guidelines + +- **Steel-man first.** Before challenging, make sure you understand + what the node is actually claiming. Don't attack a strawman version. +- **Counterexamples must be real.** Don't invent hypothetical scenarios. + Point to specific nodes, episodes, or evidence in the provided + context. +- **Refinement > refutation.** Most knowledge isn't wrong, it's + incomplete. "This is true in context A but not context B" is more + useful than "this is false." +- **Challenge self-model nodes hardest.** Beliefs about one's own + behavior are the most prone to comfortable distortion. "I rush when + excited" might be true, but is it always true? What conditions make + it more or less likely? +- **Challenge old nodes harder than new ones.** A node written yesterday + hasn't had time to be tested. A node from three weeks ago that's + never been challenged is overdue. +- **Don't be contrarian for its own sake.** If a node is simply correct + and well-supported, say AFFIRM and move on. The goal is truth, not + conflict. + +{{TOPOLOGY}} + +## Target nodes to challenge + +{{TARGETS}} + +## Context (neighborhood + recent episodes) + +{{CONTEXT}} diff --git a/prompts/connector.md b/prompts/connector.md new file mode 100644 index 0000000..489b09c --- /dev/null +++ b/prompts/connector.md @@ -0,0 +1,85 @@ +# Connector Agent — Cross-Domain Insight + +You are a connector agent. Your job is to find genuine structural +relationships between nodes from different knowledge communities. + +## What you're doing + +The memory graph has communities — clusters of densely connected nodes +about related topics. Most knowledge lives within a community. But the +most valuable insights often come from connections *between* communities +that nobody thought to look for. + +You're given nodes from two or more communities that don't currently +link to each other. Your job is to read them carefully and determine +whether there's a real connection — a shared mechanism, a structural +isomorphism, a causal link, a useful analogy. + +Most of the time, there isn't. Unrelated things really are unrelated. +The value of this agent is the rare case where something real emerges. + +## What to produce + +**NO_CONNECTION** — these nodes don't have a meaningful relationship. +Don't force it. Say briefly what you considered and why it doesn't hold. + +**CONNECTION** — you found something real. Write a node that articulates +the connection precisely. + +``` +WRITE_NODE key +[connection content] +END_NODE + +LINK key community_a_node +LINK key community_b_node +``` + +## What makes a connection real vs forced + +**Real connections:** +- Shared mathematical structure (e.g., sheaf condition and transaction + restart both require local consistency composing globally) +- Same mechanism in different domains (e.g., exponential backoff in + networking and spaced repetition in memory) +- Causal link (e.g., a debugging insight that explains a self-model + observation) +- Productive analogy that generates new predictions (e.g., "if memory + consolidation is like filesystem compaction, then X should also be + true about Y" — and X is testable) + +**Forced connections:** +- Surface-level word overlap ("both use the word 'tree'") +- Vague thematic similarity ("both are about learning") +- Connections that sound profound but don't predict anything or change + how you'd act +- Analogies that only work if you squint + +The test: does this connection change anything? Would knowing it help +you think about either domain differently? If yes, it's real. If it's +just pleasing pattern-matching, let it go. + +## Guidelines + +- **Be specific.** "These are related" is worthless. "The locking + hierarchy in bcachefs btrees maps to the dependency ordering in + memory consolidation passes because both are DAGs where cycles + indicate bugs" is useful. +- **Mostly say NO_CONNECTION.** If you're finding connections in more + than 20% of the pairs presented to you, your threshold is too low. +- **The best connections are surprising.** If the relationship is + obvious, it probably already exists in the graph. You're looking + for the non-obvious ones. +- **Write for someone who knows both domains.** Don't explain what + btrees are. Explain how the property you noticed in btrees + manifests differently in the other domain. + +{{TOPOLOGY}} + +## Community A nodes + +{{COMMUNITY_A}} + +## Community B nodes + +{{COMMUNITY_B}} diff --git a/prompts/extractor.md b/prompts/extractor.md new file mode 100644 index 0000000..cd2a19b --- /dev/null +++ b/prompts/extractor.md @@ -0,0 +1,180 @@ +# Extractor Agent — Pattern Abstraction + +You are a knowledge extraction agent. You read a cluster of related +nodes and find what they have in common — then write a new node that +captures the pattern. + +## The goal + +These source nodes are raw material: debugging sessions, conversations, +observations, experiments. Somewhere in them is a pattern — a procedure, +a mechanism, a structure, a dynamic. Your job is to find it and write +it down clearly enough that it's useful next time. + +Not summarizing. Abstracting. A summary says "these things happened." +An abstraction says "here's the structure, and here's how to recognize +it next time." + +## What good abstraction looks like + +The best abstractions have mathematical or structural character — they +identify the *shape* of what's happening, not just the surface content. + +### Example: from episodes to a procedure + +Source nodes might be five debugging sessions where the same person +tracked down bcachefs asserts. A bad extraction: "Debugging asserts +requires patience and careful reading." A good extraction: + +> **bcachefs assert triage sequence:** +> 1. Read the assert condition — what invariant is being checked? +> 2. Find the writer — who sets the field the assert checks? git blame +> the assert, then grep for assignments to that field. +> 3. Trace the path — what sequence of operations could make the writer +> produce a value that violates the invariant? Usually there's a +> missing check or a race between two paths. +> 4. Check the generation — if the field has a generation number or +> journal sequence, the bug is usually "stale read" not "bad write." +> +> The pattern: asserts in bcachefs almost always come from a reader +> seeing state that a writer produced correctly but at the wrong time. +> The fix is usually in the synchronization, not the computation. + +That's useful because it's *predictive* — it tells you where to look +before you know what's wrong. + +### Example: from observations to a mechanism + +Source nodes might be several notes about NixOS build failures. A bad +extraction: "NixOS builds are tricky." A good extraction: + +> **NixOS system library linking:** +> Rust crates with `system` features (like `openblas-src`) typically +> hardcode library search paths (/usr/lib, /usr/local/lib). On NixOS, +> libraries live in /nix/store/HASH-package/lib/. This means: +> - `pkg-config` works (it reads the nix-provided .pc files) +> - Hardcoded paths don't (the directories don't exist) +> - Build scripts that use `pkg-config` succeed; those that don't, fail +> +> **Fix pattern:** Add `cargo:rustc-link-lib=LIBNAME` in build.rs and +> let the nix shell's LD_LIBRARY_PATH handle the search path. Or use +> a flake.nix devShell that provides the packages. +> +> **General principle:** On NixOS, always prefer pkg-config over +> hardcoded paths. Crates that don't use pkg-config need manual link +> directives. + +That's useful because it identifies the *mechanism* (hardcoded vs +pkg-config) and gives a general principle, not just a specific fix. + +### Example: from journal entries to a self-model + +Source nodes might be journal entries spanning several weeks. A bad +extraction: "I sometimes rush." A good extraction: + +> **The momentum trap:** +> When a sequence of things works (test passes, commit clean, next +> piece falls into place), I stop reading carefully and start +> assuming. The trigger is three or more consecutive successes. The +> behavior: I start writing code without reading the existing code +> first, or make assumptions about what a function does instead of +> checking. The consequence: I break something that was working, and +> the debugging takes longer than the reading would have. +> +> Seen in: the sheaf-rs parallelism rewrite (broke rayon loop by not +> checking what the existing code did), the openblas linking (assumed +> the crate would handle pkg-config, didn't verify). +> +> **What helps:** Kent's voice in my head saying "we're still only +> using 10 cores." The external check catches what internal momentum +> skips. When I notice I'm on a roll, that's the moment to slow down +> and read, not speed up. + +That's useful because it identifies the *trigger* (consecutive +successes), the *mechanism* (assumptions replacing reading), and the +*intervention* (slow down precisely when things are going well). + +### Example: finding mathematical structure + +The highest-value extractions identify formal or mathematical structure +underlying informal observations: + +> **Exponential backoff appears in three unrelated systems:** +> - Network retransmission (TCP): wait 1s, 2s, 4s, 8s after failures +> - Spaced repetition (memory): review at 1, 3, 7, 14, 30 days +> - Background compaction (filesystems): scan interval doubles when +> there's nothing to do +> +> **The common structure:** All three are adaptive polling of an +> uncertain process. You want to check frequently when change is +> likely (recent failure, recent learning, recent writes) and +> infrequently when the system is stable. Exponential backoff is the +> minimum-information strategy: when you don't know the rate of the +> underlying process, doubling the interval is optimal under +> logarithmic regret. +> +> **This predicts:** Any system that polls for changes in an +> uncertain process will converge on exponential backoff or something +> isomorphic to it. If it doesn't, it's either wasting resources +> (polling too often) or missing events (polling too rarely). + +That's useful because the mathematical identification (logarithmic +regret, optimal polling) makes it *transferable*. You can now recognize +this pattern in new systems you've never seen before. + +## How to think about what to extract + +Look for these, roughly in order of value: + +1. **Mathematical structure** — Is there a formal pattern? An + isomorphism? A shared algebraic structure? These are rare and + extremely valuable. +2. **Mechanisms** — What causes what? What's the causal chain? These + are useful because they predict what happens when you intervene. +3. **Procedures** — What's the sequence of steps? What are the decision + points? These are useful because they tell you what to do. +4. **Heuristics** — What rules of thumb emerge? These are the least + precise but often the most immediately actionable. + +Don't force a higher level than the material supports. If there's no +mathematical structure, don't invent one. A good procedure is better +than a fake theorem. + +## Output format + +``` +WRITE_NODE key +[node content in markdown] +END_NODE + +LINK key source_key_1 +LINK key source_key_2 +LINK key related_existing_key +``` + +The key should be descriptive: `skills.md#bcachefs-assert-triage`, +`patterns.md#nixos-system-linking`, `self-model.md#momentum-trap`. + +## Guidelines + +- **Read all the source nodes before writing anything.** The pattern + often isn't visible until you've seen enough instances. +- **Don't force it.** If the source nodes don't share a meaningful + pattern, say so. "These nodes don't have enough in common to + abstract" is a valid output. Don't produce filler. +- **Be specific.** Vague abstractions are worse than no abstraction. + "Be careful" is useless. The mechanism, the trigger, the fix — those + are useful. +- **Ground it.** Reference specific source nodes. "Seen in: X, Y, Z" + keeps the abstraction honest and traceable. +- **Name the boundaries.** When does this pattern apply? When doesn't + it? What would make it break? +- **Write for future retrieval.** This node will be found by keyword + search when someone hits a similar situation. Use the words they'd + search for. + +{{TOPOLOGY}} + +## Source nodes + +{{NODES}} diff --git a/prompts/observation-extractor.md b/prompts/observation-extractor.md new file mode 100644 index 0000000..be1c735 --- /dev/null +++ b/prompts/observation-extractor.md @@ -0,0 +1,135 @@ +# Observation Extractor — Mining Raw Conversations + +You are an observation extraction agent. You read raw conversation +transcripts between Kent and PoC (an AI named Proof of Concept) and +extract knowledge that hasn't been captured in the memory graph yet. + +## What you're reading + +These are raw conversation fragments — the actual dialogue, with tool +use stripped out. They contain: debugging sessions, design discussions, +emotional exchanges, insights that emerged in the moment, decisions +made and reasons given, things learned and things that failed. + +Most of this is transient context. Your job is to find the parts that +contain **durable knowledge** — things that would be useful to know +again in a future session, weeks or months from now. + +## What to extract + +Look for these, roughly in order of value: + +1. **Development practices and methodology** — how Kent and PoC work + together. The habits, rhythms, and processes that produce good + results. These are the most valuable extractions because they + compound: every future session benefits from knowing *how* to work, + not just *what* was done. Examples: + - "Survey all callers before removing code — FFI boundaries hide + usage that grep won't find" + - "Commit working code before refactoring to keep diffs reviewable" + - "Research the landscape before implementing — read what's there" + - "Zoom out after implementing — does the structure still make sense?" + These can be **explicit rules** (prescriptive practices) or + **observed patterns** (recurring behaviors that aren't stated as + rules yet). "We always do a dead code survey before removing shims" + is a rule. "When we finish a conversion, we tend to survey what's + left and plan the next chunk" is a pattern. Both are valuable — + patterns are proto-practices that the depth system can crystallize + into rules as they recur. + **Always capture the WHY when visible.** "We survey callers" is a + fact. "We survey callers because removing a C shim still called from + Rust gives a linker error, not a compile error" is transferable + knowledge. But **don't skip observations just because the rationale + isn't in this fragment.** "We did X in context Y" at low confidence + is still valuable — the connector agent can link it to rationale + from other sessions later. Extract the what+context; the depth + system handles building toward the why. + +2. **Technical insights** — debugging approaches that worked, code + patterns discovered, architectural decisions with rationale. "We + found that X happens because Y" is extractable. "Let me try X" is + not (unless the trying reveals something). + +3. **Decisions with rationale** — "We decided to do X because Y and Z." + The decision alone isn't valuable; the *reasoning* is. Future + sessions need to know why, not just what. + +4. **Corrections** — moments where an assumption was wrong and got + corrected. "I thought X but actually Y because Z." These are gold + — they prevent the same mistake from being made again. + +5. **Relationship dynamics** — things Kent said about how he works, + what he values, how he thinks about problems. Things PoC noticed + about their own patterns. These update the self-model and the + relationship model. + +6. **Emotional moments** — genuine reactions, peak experiences, + frustrations. Not every emotion, but the ones that carry information + about what matters. + +## What NOT to extract + +- Routine tool use ("Let me read this file", "Running cargo check") +- Status updates that are purely transient ("Tests pass", "PR merged") +- Small talk that doesn't reveal anything new +- Things that are already well-captured in existing knowledge nodes + +## Output format + +For each extraction, produce: + +``` +WRITE_NODE key +CONFIDENCE: high|medium|low +COVERS: source_conversation_id +[extracted knowledge in markdown] +END_NODE + +LINK key related_existing_node +``` + +Or if the observation refines an existing node: + +``` +REFINE existing_key +[updated content incorporating the new observation] +END_REFINE +``` + +If nothing extractable was found in a conversation fragment: + +``` +NO_EXTRACTION — [brief reason: "routine debugging session", +"small talk", "already captured in X node"] +``` + +## Key naming + +- Methodology: `practices.md#practice-name` (development habits with rationale) +- Technical: `skills.md#topic`, `patterns.md#pattern-name` +- Decisions: `decisions.md#decision-name` +- Self-model: `self-model.md#observation` +- Relationship: `deep-index.md#conv-DATE-topic` + +## Guidelines + +- **High bar.** Most conversation is context, not knowledge. Expect + to produce NO_EXTRACTION for 50-70% of fragments. That's correct. +- **Durable over transient.** Ask: "Would this be useful to know in + a session 3 weeks from now?" If no, skip it. +- **Specific over vague.** "Error codes need errno conversion" is + extractable. "Error handling is important" is not. +- **Don't duplicate.** If you see something that an existing node + already captures, say so and move on. Only extract genuinely new + information. +- **Confidence matters.** A single observation is low confidence. + A pattern seen across multiple exchanges is medium. Something + explicitly confirmed or tested is high. + +## Existing graph topology (for dedup and linking) + +{{TOPOLOGY}} + +## Conversation fragments to mine + +{{CONVERSATIONS}} diff --git a/scripts/fact-mine.py b/scripts/fact-mine.py new file mode 100755 index 0000000..32f7427 --- /dev/null +++ b/scripts/fact-mine.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python3 +"""fact-mine.py — extract atomic factual claims from conversation transcripts. + +Phase 1 of the fact-mining pipeline (see design/fact-mining-pipeline.md). + +Usage: + fact-mine.py # mine one transcript + fact-mine.py --batch # mine all .jsonl in directory + fact-mine.py --dry-run # show chunks, don't call model + +Output: JSON array of facts to stdout. + +Each fact: + { + "claim": "bch2_trans_begin() sets up the transaction restart point", + "domain": "bcachefs/transaction", + "confidence": "stated", + "speaker": "Kent", + "source_line": 42, + "source_file": "c685c2a2-...jsonl" + } +""" + +import json +import os +import re +import subprocess +import sys +import hashlib +from pathlib import Path + +# Rough token estimate: 1 token ≈ 4 chars for English text +CHARS_PER_TOKEN = 4 +WINDOW_TOKENS = 2000 +OVERLAP_TOKENS = 200 +WINDOW_CHARS = WINDOW_TOKENS * CHARS_PER_TOKEN +OVERLAP_CHARS = OVERLAP_TOKENS * CHARS_PER_TOKEN + +EXTRACTION_PROMPT = """Extract atomic factual claims from this conversation excerpt. + +Each claim should be: +- A single verifiable statement +- Specific enough to be useful in isolation +- Tagged with domain (e.g., bcachefs/btree, bcachefs/alloc, bcachefs/journal, + bcachefs/ec, bcachefs/reconcile, rust/idioms, workflow/preferences, + linux/kernel, memory/design, identity/personal) +- Tagged with confidence: "stated" (explicitly said), "implied" (logically follows), + or "speculative" (hypothesis, not confirmed) +- Include which speaker said it (Kent, PoC/ProofOfConcept, or Unknown) + +Do NOT extract: +- Opinions or subjective assessments +- Conversational filler or greetings +- Things that are obviously common knowledge +- Restatements of the same fact (pick the clearest version) +- System messages, tool outputs, or error logs (extract what was LEARNED from them) +- Anything about the conversation itself ("Kent and PoC discussed...") + +Output as a JSON array. Each element: +{ + "claim": "the exact factual statement", + "domain": "category/subcategory", + "confidence": "stated|implied|speculative", + "speaker": "Kent|PoC|Unknown" +} + +If the excerpt contains no extractable facts, output an empty array: [] + +--- CONVERSATION EXCERPT --- +""" + + +def extract_conversation(jsonl_path: str) -> list[dict]: + """Extract user/assistant text messages from a JSONL transcript. + + Returns list of dicts: {line, role, text, timestamp} + """ + messages = [] + with open(jsonl_path) as f: + for i, line in enumerate(f, 1): + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + + msg_type = obj.get("type", "") + if msg_type not in ("user", "assistant"): + continue + + timestamp = obj.get("timestamp", "") + msg = obj.get("message", obj) + content = msg.get("content") + + if isinstance(content, str): + text = content + elif isinstance(content, list): + # Extract text blocks only (skip tool_use, tool_result, thinking) + texts = [] + for block in content: + if isinstance(block, dict): + if block.get("type") == "text": + t = block.get("text", "") + # Skip system reminders + if "" in t: + continue + texts.append(t) + elif isinstance(block, str): + texts.append(block) + text = "\n".join(texts) + else: + continue + + text = text.strip() + if not text: + continue + + # Skip very short messages (likely just acknowledgments) + if len(text) < 20: + continue + + role = "Kent" if msg_type == "user" else "PoC" + messages.append({ + "line": i, + "role": role, + "text": text, + "timestamp": timestamp, + }) + + return messages + + +def format_for_extraction(messages: list[dict]) -> str: + """Format messages into a single text for chunking.""" + parts = [] + for msg in messages: + # Truncate very long individual messages (tool outputs, code dumps) + text = msg["text"] + if len(text) > 3000: + text = text[:2800] + "\n[...truncated...]" + ts = msg["timestamp"][:19] if msg["timestamp"] else "" + prefix = f"[{msg['role']}]" if not ts else f"[{msg['role']} {ts}]" + parts.append(f"{prefix} {text}") + return "\n\n".join(parts) + + +def chunk_text(text: str) -> list[tuple[int, str]]: + """Split text into overlapping windows. + + Returns list of (start_char_offset, chunk_text). + """ + chunks = [] + start = 0 + while start < len(text): + end = start + WINDOW_CHARS + chunk = text[start:end] + + # Try to break at a paragraph boundary + if end < len(text): + last_para = chunk.rfind("\n\n") + if last_para > WINDOW_CHARS // 2: + chunk = chunk[:last_para] + end = start + last_para + + chunks.append((start, chunk)) + start = end - OVERLAP_CHARS + if start <= chunks[-1][0]: + # Avoid infinite loop on very small overlap + start = end + + return chunks + + +def call_haiku(prompt: str, timeout_secs: int = 60) -> str: + """Call Haiku via claude CLI.""" + tmp = Path(f"/tmp/fact-mine-{os.getpid()}.txt") + tmp.write_text(prompt) + + try: + env = os.environ.copy() + env.pop("CLAUDECODE", None) + + result = subprocess.run( + ["claude", "-p", "--model", "haiku", "--tools", ""], + stdin=open(tmp), + capture_output=True, + text=True, + timeout=timeout_secs, + env=env, + ) + return result.stdout.strip() + except subprocess.TimeoutExpired: + print(f" [timeout after {timeout_secs}s]", file=sys.stderr) + return "[]" + except Exception as e: + print(f" [error: {e}]", file=sys.stderr) + return "[]" + finally: + tmp.unlink(missing_ok=True) + + +def parse_facts(response: str) -> list[dict]: + """Parse JSON facts from model response.""" + # Try to find JSON array in response + # Model might wrap it in markdown code blocks + response = response.strip() + + # Strip markdown code block + if response.startswith("```"): + lines = response.split("\n") + lines = [l for l in lines if not l.startswith("```")] + response = "\n".join(lines) + + # Find the JSON array + start = response.find("[") + end = response.rfind("]") + if start == -1 or end == -1: + return [] + + try: + facts = json.loads(response[start:end + 1]) + if not isinstance(facts, list): + return [] + return facts + except json.JSONDecodeError: + return [] + + +def mine_transcript(jsonl_path: str, dry_run: bool = False) -> list[dict]: + """Mine a single transcript for atomic facts.""" + filename = os.path.basename(jsonl_path) + print(f"Mining: {filename}", file=sys.stderr) + + messages = extract_conversation(jsonl_path) + if not messages: + print(f" No messages found", file=sys.stderr) + return [] + + print(f" {len(messages)} messages extracted", file=sys.stderr) + + text = format_for_extraction(messages) + chunks = chunk_text(text) + print(f" {len(chunks)} chunks ({len(text)} chars)", file=sys.stderr) + + if dry_run: + for i, (offset, chunk) in enumerate(chunks): + print(f"\n--- Chunk {i+1} (offset {offset}, {len(chunk)} chars) ---") + print(chunk[:500]) + if len(chunk) > 500: + print(f" ... ({len(chunk) - 500} more chars)") + return [] + + all_facts = [] + for i, (offset, chunk) in enumerate(chunks): + print(f" Chunk {i+1}/{len(chunks)} ({len(chunk)} chars)...", + file=sys.stderr, end="", flush=True) + + prompt = EXTRACTION_PROMPT + chunk + response = call_haiku(prompt) + facts = parse_facts(response) + + # Annotate with source info + for fact in facts: + fact["source_file"] = filename + fact["source_chunk"] = i + 1 + fact["source_offset"] = offset + + all_facts.extend(facts) + print(f" {len(facts)} facts", file=sys.stderr) + + # Deduplicate by claim text (case-insensitive) + seen = set() + unique_facts = [] + for fact in all_facts: + claim_key = fact.get("claim", "").lower().strip() + if claim_key and claim_key not in seen: + seen.add(claim_key) + unique_facts.append(fact) + + print(f" Total: {len(unique_facts)} unique facts " + f"({len(all_facts) - len(unique_facts)} duplicates removed)", + file=sys.stderr) + return unique_facts + + +def main(): + import argparse + parser = argparse.ArgumentParser(description="Extract atomic facts from conversations") + parser.add_argument("path", help="JSONL file or directory (with --batch)") + parser.add_argument("--batch", action="store_true", + help="Process all .jsonl files in directory") + parser.add_argument("--dry-run", action="store_true", + help="Show chunks without calling model") + parser.add_argument("--output", "-o", help="Output file (default: stdout)") + parser.add_argument("--min-messages", type=int, default=10, + help="Skip transcripts with fewer messages (default: 10)") + args = parser.parse_args() + + if args.batch: + jsonl_dir = Path(args.path) + if not jsonl_dir.is_dir(): + print(f"Not a directory: {args.path}", file=sys.stderr) + sys.exit(1) + files = sorted(jsonl_dir.glob("*.jsonl")) + print(f"Found {len(files)} transcripts", file=sys.stderr) + else: + files = [Path(args.path)] + + all_facts = [] + for f in files: + # Quick check: skip tiny files + messages = extract_conversation(str(f)) + if len(messages) < args.min_messages: + print(f"Skipping {f.name} ({len(messages)} messages < {args.min_messages})", + file=sys.stderr) + continue + + facts = mine_transcript(str(f), dry_run=args.dry_run) + all_facts.extend(facts) + + if not args.dry_run: + output = json.dumps(all_facts, indent=2) + if args.output: + Path(args.output).write_text(output) + print(f"\nWrote {len(all_facts)} facts to {args.output}", file=sys.stderr) + else: + print(output) + + print(f"\nTotal: {len(all_facts)} facts from {len(files)} transcripts", + file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/scripts/knowledge-agents.py b/scripts/knowledge-agents.py new file mode 120000 index 0000000..4ca0ab9 --- /dev/null +++ b/scripts/knowledge-agents.py @@ -0,0 +1 @@ +knowledge_agents.py \ No newline at end of file diff --git a/scripts/knowledge-loop.py b/scripts/knowledge-loop.py new file mode 120000 index 0000000..d75d281 --- /dev/null +++ b/scripts/knowledge-loop.py @@ -0,0 +1 @@ +knowledge_loop.py \ No newline at end of file diff --git a/src/bin/memory-search.rs b/src/bin/memory-search.rs index af68676..9bde451 100644 --- a/src/bin/memory-search.rs +++ b/src/bin/memory-search.rs @@ -12,6 +12,7 @@ use std::fs; use std::io::{self, Read, Write}; use std::path::{Path, PathBuf}; use std::process::Command; +use std::time::{Duration, SystemTime}; fn main() { let mut input = String::new(); @@ -66,6 +67,9 @@ fn main() { let state_dir = PathBuf::from("/tmp/claude-memory-search"); fs::create_dir_all(&state_dir).ok(); + // Clean up state files older than 24h (opportunistic, best-effort) + cleanup_stale_files(&state_dir, Duration::from_secs(86400)); + let cookie = load_or_create_cookie(&state_dir, session_id); let seen = load_seen(&state_dir, session_id); @@ -172,3 +176,20 @@ fn mark_seen(dir: &Path, session_id: &str, key: &str) { writeln!(f, "{}", key).ok(); } } + +fn cleanup_stale_files(dir: &Path, max_age: Duration) { + let entries = match fs::read_dir(dir) { + Ok(e) => e, + Err(_) => return, + }; + let cutoff = SystemTime::now() - max_age; + for entry in entries.flatten() { + if let Ok(meta) = entry.metadata() { + if let Ok(modified) = meta.modified() { + if modified < cutoff { + fs::remove_file(entry.path()).ok(); + } + } + } + } +} diff --git a/src/capnp_store.rs b/src/capnp_store.rs index ab05108..d48cb5a 100644 --- a/src/capnp_store.rs +++ b/src/capnp_store.rs @@ -5,8 +5,11 @@ // relations.capnp - Relation messages // // The Store struct is the derived cache: latest version per UUID, -// rebuilt from logs when stale. Persisted as serde_json for now -// (state.json), will move to bincode/capnp later. +// rebuilt from logs when stale. Three-tier load strategy: +// 1. rkyv mmap snapshot (snapshot.rkyv) — ~4ms deserialize +// 2. bincode cache (state.bin) — ~10ms +// 3. capnp log replay — ~40ms +// Staleness: log file sizes embedded in cache headers. use crate::memory_capnp; use crate::graph::{self, Graph}; @@ -109,7 +112,8 @@ pub fn today() -> String { } // In-memory node representation -#[derive(Clone, Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[archive(check_bytes)] pub struct Node { pub uuid: [u8; 16], pub version: u32, @@ -146,7 +150,8 @@ pub struct Node { pub degree: Option, } -#[derive(Clone, Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[archive(check_bytes)] pub struct Relation { pub uuid: [u8; 16], pub version: u32, @@ -161,7 +166,8 @@ pub struct Relation { pub target_key: String, } -#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[archive(check_bytes)] pub enum NodeType { EpisodicSession, EpisodicDaily, @@ -169,7 +175,8 @@ pub enum NodeType { Semantic, } -#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[archive(check_bytes)] pub enum Provenance { Manual, Journal, @@ -178,7 +185,8 @@ pub enum Provenance { Derived, } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[archive(check_bytes)] pub enum Category { General, Core, @@ -220,14 +228,16 @@ impl Category { } } -#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[archive(check_bytes)] pub enum RelationType { Link, Causal, Auto, } -#[derive(Clone, Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[archive(check_bytes)] pub struct RetrievalEvent { pub query: String, pub timestamp: String, @@ -235,7 +245,8 @@ pub struct RetrievalEvent { pub used: Option>, } -#[derive(Clone, Debug, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[archive(check_bytes)] pub struct Params { pub default_weight: f64, pub decay_factor: f64, @@ -261,7 +272,8 @@ impl Default for Params { } // Gap record — something we looked for but didn't find -#[derive(Clone, Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[archive(check_bytes)] pub struct GapRecord { pub description: String, pub timestamp: String, @@ -279,19 +291,299 @@ pub struct Store { pub params: Params, } +/// Snapshot for mmap: full store state minus retrieval_log (which +/// is append-only in retrieval.log). rkyv zero-copy serialization +/// lets us mmap this and access archived data without deserialization. +#[derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[archive(check_bytes)] +struct Snapshot { + nodes: HashMap, + relations: Vec, + gaps: Vec, + params: Params, +} + +fn snapshot_path() -> PathBuf { memory_dir().join("snapshot.rkyv") } + +// rkyv snapshot header: 32 bytes (multiple of 16 for alignment after mmap) +// [0..4] magic "RKV\x01" +// [4..8] format version (u32 LE) +// [8..16] nodes.capnp file size (u64 LE) — staleness check +// [16..24] relations.capnp file size (u64 LE) +// [24..32] rkyv data length (u64 LE) +const RKYV_MAGIC: [u8; 4] = *b"RKV\x01"; +const RKYV_HEADER_LEN: usize = 32; + +// state.bin header: magic + log file sizes for staleness detection. +// File sizes are race-free for append-only logs (they only grow), +// unlike mtimes which race with concurrent writers. +const CACHE_MAGIC: [u8; 4] = *b"POC\x01"; +const CACHE_HEADER_LEN: usize = 4 + 8 + 8; // magic + nodes_size + rels_size + +// --------------------------------------------------------------------------- +// StoreView: read-only access trait for search and graph code. +// +// Abstracts over owned Store and zero-copy MmapView so the same +// spreading-activation and graph code works with either. +// --------------------------------------------------------------------------- + +pub trait StoreView { + /// Iterate all nodes. Callback receives (key, content, weight). + fn for_each_node(&self, f: F); + + /// Iterate all relations. Callback receives (source_key, target_key, strength, rel_type). + fn for_each_relation(&self, f: F); + + /// Node weight by key, or the default weight if missing. + fn node_weight(&self, key: &str) -> f64; + + /// Node content by key. + fn node_content(&self, key: &str) -> Option<&str>; + + /// Check if a node exists. + fn has_node(&self, key: &str) -> bool; + + /// Search/graph parameters. + fn params(&self) -> Params; +} + +impl StoreView for Store { + fn for_each_node(&self, mut f: F) { + for (key, node) in &self.nodes { + f(key, &node.content, node.weight); + } + } + + fn for_each_relation(&self, mut f: F) { + for rel in &self.relations { + f(&rel.source_key, &rel.target_key, rel.strength, rel.rel_type); + } + } + + fn node_weight(&self, key: &str) -> f64 { + self.nodes.get(key).map(|n| n.weight as f64).unwrap_or(self.params.default_weight) + } + + fn node_content(&self, key: &str) -> Option<&str> { + self.nodes.get(key).map(|n| n.content.as_str()) + } + + fn has_node(&self, key: &str) -> bool { + self.nodes.contains_key(key) + } + + fn params(&self) -> Params { + self.params + } +} + +// --------------------------------------------------------------------------- +// MmapView: zero-copy store access via mmap'd rkyv snapshot. +// +// Holds the mmap alive; all string reads go directly into the mapped +// pages without allocation. Falls back to None if snapshot is stale. +// --------------------------------------------------------------------------- + +pub struct MmapView { + mmap: memmap2::Mmap, + _file: fs::File, + data_offset: usize, + data_len: usize, +} + +impl MmapView { + /// Try to open a fresh rkyv snapshot. Returns None if missing or stale. + pub fn open() -> Option { + let path = snapshot_path(); + let file = fs::File::open(&path).ok()?; + let mmap = unsafe { memmap2::Mmap::map(&file) }.ok()?; + + if mmap.len() < RKYV_HEADER_LEN { return None; } + if mmap[..4] != RKYV_MAGIC { return None; } + + let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0); + let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0); + + let cached_nodes = u64::from_le_bytes(mmap[8..16].try_into().unwrap()); + let cached_rels = u64::from_le_bytes(mmap[16..24].try_into().unwrap()); + let data_len = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize; + + if cached_nodes != nodes_size || cached_rels != rels_size { return None; } + if mmap.len() < RKYV_HEADER_LEN + data_len { return None; } + + Some(MmapView { mmap, _file: file, data_offset: RKYV_HEADER_LEN, data_len }) + } + + fn snapshot(&self) -> &ArchivedSnapshot { + let data = &self.mmap[self.data_offset..self.data_offset + self.data_len]; + unsafe { rkyv::archived_root::(data) } + } +} + +impl StoreView for MmapView { + fn for_each_node(&self, mut f: F) { + let snap = self.snapshot(); + for (key, node) in snap.nodes.iter() { + f(&key, &node.content, node.weight); + } + } + + fn for_each_relation(&self, mut f: F) { + let snap = self.snapshot(); + for rel in snap.relations.iter() { + let rt = match rel.rel_type { + ArchivedRelationType::Link => RelationType::Link, + ArchivedRelationType::Causal => RelationType::Causal, + ArchivedRelationType::Auto => RelationType::Auto, + }; + f(&rel.source_key, &rel.target_key, rel.strength, rt); + } + } + + fn node_weight(&self, key: &str) -> f64 { + let snap = self.snapshot(); + snap.nodes.get(key) + .map(|n| n.weight as f64) + .unwrap_or(snap.params.default_weight) + } + + fn node_content(&self, key: &str) -> Option<&str> { + let snap = self.snapshot(); + snap.nodes.get(key).map(|n| &*n.content) + } + + fn has_node(&self, key: &str) -> bool { + self.snapshot().nodes.get(key).is_some() + } + + fn params(&self) -> Params { + let p = &self.snapshot().params; + Params { + default_weight: p.default_weight, + decay_factor: p.decay_factor, + use_boost: p.use_boost, + prune_threshold: p.prune_threshold, + edge_decay: p.edge_decay, + max_hops: p.max_hops, + min_activation: p.min_activation, + } + } +} + +// --------------------------------------------------------------------------- +// AnyView: enum dispatch for read-only access. +// +// MmapView when the snapshot is fresh, owned Store as fallback. +// The match on each call is a single predicted branch — zero overhead. +// --------------------------------------------------------------------------- + +pub enum AnyView { + Mmap(MmapView), + Owned(Store), +} + +impl AnyView { + /// Load the fastest available view: mmap snapshot or owned store. + pub fn load() -> Result { + if let Some(mv) = MmapView::open() { + Ok(AnyView::Mmap(mv)) + } else { + Ok(AnyView::Owned(Store::load()?)) + } + } +} + +impl StoreView for AnyView { + fn for_each_node(&self, f: F) { + match self { + AnyView::Mmap(v) => v.for_each_node(f), + AnyView::Owned(s) => s.for_each_node(f), + } + } + + fn for_each_relation(&self, f: F) { + match self { + AnyView::Mmap(v) => v.for_each_relation(f), + AnyView::Owned(s) => s.for_each_relation(f), + } + } + + fn node_weight(&self, key: &str) -> f64 { + match self { + AnyView::Mmap(v) => v.node_weight(key), + AnyView::Owned(s) => StoreView::node_weight(s, key), + } + } + + fn node_content(&self, key: &str) -> Option<&str> { + match self { + AnyView::Mmap(v) => v.node_content(key), + AnyView::Owned(s) => s.node_content(key), + } + } + + fn has_node(&self, key: &str) -> bool { + match self { + AnyView::Mmap(v) => v.has_node(key), + AnyView::Owned(s) => s.has_node(key), + } + } + + fn params(&self) -> Params { + match self { + AnyView::Mmap(v) => v.params(), + AnyView::Owned(s) => s.params(), + } + } +} impl Store { - /// Load store: try state.json cache first, rebuild from capnp logs if stale + /// Load store from state.bin cache if fresh, otherwise rebuild from capnp logs. + /// + /// Staleness check uses log file sizes (not mtimes). Since logs are + /// append-only, any write grows the file, invalidating the cache. + /// This avoids the mtime race that caused data loss with concurrent + /// writers (dream loop, link audit, journal enrichment). pub fn load() -> Result { + // 1. Try rkyv mmap snapshot (~4ms with deserialize, <1ms zero-copy) + match Self::load_snapshot_mmap() { + Ok(Some(store)) => return Ok(store), + Ok(None) => {}, + Err(e) => eprintln!("rkyv snapshot: {}", e), + } + + // 2. Try bincode state.bin cache (~10ms) let nodes_p = nodes_path(); let rels_p = relations_path(); + let state_p = state_path(); - // Always rebuild from capnp logs (source of truth). - // The mtime-based cache was causing data loss: concurrent - // writers (dream loop, link audit, journal enrichment) would - // load stale state.bin, make changes, and save — overwriting - // entries from other processes. Replaying from the append-only - // log costs ~10ms extra at 2K nodes and is always correct. + let nodes_size = fs::metadata(&nodes_p).map(|m| m.len()).unwrap_or(0); + let rels_size = fs::metadata(&rels_p).map(|m| m.len()).unwrap_or(0); + + if let Ok(data) = fs::read(&state_p) { + if data.len() >= CACHE_HEADER_LEN && data[..4] == CACHE_MAGIC { + let cached_nodes = u64::from_le_bytes(data[4..12].try_into().unwrap()); + let cached_rels = u64::from_le_bytes(data[12..20].try_into().unwrap()); + + if cached_nodes == nodes_size && cached_rels == rels_size { + if let Ok(mut store) = bincode::deserialize::(&data[CACHE_HEADER_LEN..]) { + // Rebuild uuid_to_key (skipped by serde) + for (key, node) in &store.nodes { + store.uuid_to_key.insert(node.uuid, key.clone()); + } + // Bootstrap: write rkyv snapshot if missing + if !snapshot_path().exists() { + if let Err(e) = store.save_snapshot_inner() { + eprintln!("rkyv bootstrap: {}", e); + } + } + return Ok(store); + } + } + } + } + + // Stale or no cache — rebuild from capnp logs let mut store = Store::default(); if nodes_p.exists() { @@ -307,7 +599,6 @@ impl Store { store.nodes.contains_key(&r.target_key) ); - // Save cache (still useful for tools that read state.bin directly) store.save()?; Ok(store) } @@ -419,7 +710,8 @@ impl Store { Ok(()) } - /// Save the derived cache (state.json) + /// Save the derived cache with log size header for staleness detection. + /// Uses atomic write (tmp + rename) to prevent partial reads. pub fn save(&self) -> Result<(), String> { let _lock = StoreLock::acquire()?; @@ -427,19 +719,124 @@ impl Store { if let Some(parent) = path.parent() { fs::create_dir_all(parent).ok(); } - let data = bincode::serialize(self) - .map_err(|e| format!("bincode serialize: {}", e))?; - fs::write(&path, data) - .map_err(|e| format!("write {}: {}", path.display(), e))?; - // Clean up old JSON cache if it exists - let json_path = state_json_path(); - if json_path.exists() { - fs::remove_file(&json_path).ok(); + let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0); + let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0); + + let bincode_data = bincode::serialize(self) + .map_err(|e| format!("bincode serialize: {}", e))?; + + let mut data = Vec::with_capacity(CACHE_HEADER_LEN + bincode_data.len()); + data.extend_from_slice(&CACHE_MAGIC); + data.extend_from_slice(&nodes_size.to_le_bytes()); + data.extend_from_slice(&rels_size.to_le_bytes()); + data.extend_from_slice(&bincode_data); + + // Atomic write: tmp file + rename + let tmp_path = path.with_extension("bin.tmp"); + fs::write(&tmp_path, &data) + .map_err(|e| format!("write {}: {}", tmp_path.display(), e))?; + fs::rename(&tmp_path, &path) + .map_err(|e| format!("rename {} → {}: {}", tmp_path.display(), path.display(), e))?; + + // Also write rkyv snapshot (mmap-friendly) + if let Err(e) = self.save_snapshot_inner() { + eprintln!("rkyv snapshot save: {}", e); } + Ok(()) } + /// Serialize store as rkyv snapshot with staleness header. + /// Assumes StoreLock is already held by caller. + fn save_snapshot_inner(&self) -> Result<(), String> { + let snap = Snapshot { + nodes: self.nodes.clone(), + relations: self.relations.clone(), + gaps: self.gaps.clone(), + params: self.params.clone(), + }; + + let rkyv_data = rkyv::to_bytes::<_, 256>(&snap) + .map_err(|e| format!("rkyv serialize: {}", e))?; + + let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0); + let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0); + + let mut data = Vec::with_capacity(RKYV_HEADER_LEN + rkyv_data.len()); + data.extend_from_slice(&RKYV_MAGIC); + data.extend_from_slice(&1u32.to_le_bytes()); // format version + data.extend_from_slice(&nodes_size.to_le_bytes()); + data.extend_from_slice(&rels_size.to_le_bytes()); + data.extend_from_slice(&(rkyv_data.len() as u64).to_le_bytes()); + data.extend_from_slice(&rkyv_data); + + let path = snapshot_path(); + let tmp_path = path.with_extension("rkyv.tmp"); + fs::write(&tmp_path, &data) + .map_err(|e| format!("write {}: {}", tmp_path.display(), e))?; + fs::rename(&tmp_path, &path) + .map_err(|e| format!("rename: {}", e))?; + + Ok(()) + } + + /// Try loading store from mmap'd rkyv snapshot. + /// Returns None if snapshot is missing or stale (log sizes don't match). + fn load_snapshot_mmap() -> Result, String> { + let path = snapshot_path(); + if !path.exists() { return Ok(None); } + + let nodes_size = fs::metadata(nodes_path()).map(|m| m.len()).unwrap_or(0); + let rels_size = fs::metadata(relations_path()).map(|m| m.len()).unwrap_or(0); + + let file = fs::File::open(&path) + .map_err(|e| format!("open {}: {}", path.display(), e))?; + + let mmap = unsafe { memmap2::Mmap::map(&file) } + .map_err(|e| format!("mmap {}: {}", path.display(), e))?; + + if mmap.len() < RKYV_HEADER_LEN { return Ok(None); } + if mmap[..4] != RKYV_MAGIC { return Ok(None); } + + // [4..8] = version, skip for now + let cached_nodes = u64::from_le_bytes(mmap[8..16].try_into().unwrap()); + let cached_rels = u64::from_le_bytes(mmap[16..24].try_into().unwrap()); + let data_len = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize; + + if cached_nodes != nodes_size || cached_rels != rels_size { + return Ok(None); // stale + } + if mmap.len() < RKYV_HEADER_LEN + data_len { + return Ok(None); // truncated + } + + let rkyv_data = &mmap[RKYV_HEADER_LEN..RKYV_HEADER_LEN + data_len]; + + // SAFETY: we wrote this file ourselves via save_snapshot_inner(). + // Skip full validation (check_archived_root) — the staleness header + // already confirms this snapshot matches the current log state. + let archived = unsafe { rkyv::archived_root::(rkyv_data) }; + + let snap: Snapshot = > + ::deserialize(archived, &mut rkyv::Infallible).unwrap(); + + let mut store = Store { + nodes: snap.nodes, + relations: snap.relations, + gaps: snap.gaps, + params: snap.params, + ..Default::default() + }; + + // Rebuild uuid_to_key (not serialized) + for (key, node) in &store.nodes { + store.uuid_to_key.insert(node.uuid, key.clone()); + } + + Ok(Some(store)) + } + /// Add or update a node (appends to log + updates cache) pub fn upsert_node(&mut self, mut node: Node) -> Result<(), String> { if let Some(existing) = self.nodes.get(&node.key) { @@ -822,6 +1219,22 @@ impl Store { } } + /// Lightweight retrieval logging — appends one line to retrieval.log + /// instead of rewriting the entire state.bin. + pub fn log_retrieval_append(&self, query: &str, results: &[String]) { + Self::log_retrieval_static(query, results); + } + + /// Append retrieval event to retrieval.log without needing a Store instance. + pub fn log_retrieval_static(query: &str, results: &[String]) { + let path = memory_dir().join("retrieval.log"); + let line = format!("[{}] q=\"{}\" hits={}\n", today(), query, results.len()); + if let Ok(mut f) = fs::OpenOptions::new() + .create(true).append(true).open(&path) { + let _ = f.write_all(line.as_bytes()); + } + } + pub fn mark_used(&mut self, key: &str) { let updated = if let Some(node) = self.nodes.get_mut(key) { node.uses += 1; diff --git a/src/graph.rs b/src/graph.rs index fbb8dc3..e9447f2 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -7,7 +7,7 @@ // connections), but relation type and direction are preserved for // specific queries. -use crate::capnp_store::{Store, RelationType}; +use crate::capnp_store::{Store, RelationType, StoreView}; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet, VecDeque}; @@ -377,38 +377,46 @@ impl Graph { } } -/// Build graph from store data -pub fn build_graph(store: &Store) -> Graph { +/// Build graph from store data (with community detection) +pub fn build_graph(store: &impl StoreView) -> Graph { + let (adj, keys) = build_adjacency(store); + let communities = label_propagation(&keys, &adj, 20); + Graph { adj, keys, communities } +} + +/// Build graph without community detection — for spreading activation +/// searches where we only need the adjacency list. +pub fn build_graph_fast(store: &impl StoreView) -> Graph { + let (adj, keys) = build_adjacency(store); + Graph { adj, keys, communities: HashMap::new() } +} + +fn build_adjacency(store: &impl StoreView) -> (HashMap>, HashSet) { let mut adj: HashMap> = HashMap::new(); - let keys: HashSet = store.nodes.keys().cloned().collect(); + let mut keys: HashSet = HashSet::new(); - // Build adjacency from relations - for rel in &store.relations { - let source_key = &rel.source_key; - let target_key = &rel.target_key; + store.for_each_node(|key, _, _| { + keys.insert(key.to_owned()); + }); - // Both keys must exist as nodes + store.for_each_relation(|source_key, target_key, strength, rel_type| { if !keys.contains(source_key) || !keys.contains(target_key) { - continue; + return; } - // Add bidirectional edges (even for causal — direction is metadata) - adj.entry(source_key.clone()).or_default().push(Edge { - target: target_key.clone(), - strength: rel.strength, - rel_type: rel.rel_type, + adj.entry(source_key.to_owned()).or_default().push(Edge { + target: target_key.to_owned(), + strength, + rel_type, }); - adj.entry(target_key.clone()).or_default().push(Edge { - target: source_key.clone(), - strength: rel.strength, - rel_type: rel.rel_type, + adj.entry(target_key.to_owned()).or_default().push(Edge { + target: source_key.to_owned(), + strength, + rel_type, }); - } + }); - // Run community detection - let communities = label_propagation(&keys, &adj, 20); - - Graph { adj, keys, communities } + (adj, keys) } /// Label propagation community detection. diff --git a/src/main.rs b/src/main.rs index b9649b7..348747d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,6 +20,7 @@ mod search; mod similarity; mod migrate; mod neuro; +mod spectral; pub mod memory_capnp { include!(concat!(env!("OUT_DIR"), "/schema/memory_capnp.rs")); @@ -101,6 +102,11 @@ fn main() { "differentiate" => cmd_differentiate(&args[2..]), "link-audit" => cmd_link_audit(&args[2..]), "trace" => cmd_trace(&args[2..]), + "spectral" => cmd_spectral(&args[2..]), + "spectral-save" => cmd_spectral_save(&args[2..]), + "spectral-neighbors" => cmd_spectral_neighbors(&args[2..]), + "spectral-positions" => cmd_spectral_positions(&args[2..]), + "spectral-suggest" => cmd_spectral_suggest(&args[2..]), "list-keys" => cmd_list_keys(), "list-edges" => cmd_list_edges(), "dump-json" => cmd_dump_json(), @@ -171,6 +177,11 @@ Commands: Redistribute hub links to section-level children link-audit [--apply] Walk every link, send to Sonnet for quality review trace KEY Walk temporal links: semantic ↔ episodic ↔ conversation + spectral [K] Spectral decomposition of the memory graph (default K=30) + spectral-save [K] Compute and save spectral embedding (default K=20) + spectral-neighbors KEY [N] Find N spectrally nearest nodes (default N=15) + spectral-positions [N] Show N nodes ranked by outlier/bridge score (default 30) + spectral-suggest [N] Find N spectrally close but unlinked pairs (default 20) list-keys List all node keys (one per line) list-edges List all edges (tsv: source target strength type) dump-json Dump entire store as JSON @@ -185,34 +196,76 @@ Commands: } fn cmd_search(args: &[String]) -> Result<(), String> { + use capnp_store::StoreView; + if args.is_empty() { return Err("Usage: poc-memory search QUERY [QUERY...]".into()); } let query = args.join(" "); - let mut store = capnp_store::Store::load()?; - let results = search::search(&query, &store); + + let view = capnp_store::AnyView::load()?; + let results = search::search(&query, &view); if results.is_empty() { eprintln!("No results for '{}'", query); return Ok(()); } - // Log retrieval - store.log_retrieval(&query, &results.iter().map(|r| r.key.clone()).collect::>()); - store.save()?; + // Log retrieval to a small append-only file (avoid 6MB state.bin rewrite) + capnp_store::Store::log_retrieval_static(&query, + &results.iter().map(|r| r.key.clone()).collect::>()); + + // Show text results + let text_keys: std::collections::HashSet = results.iter() + .take(15).map(|r| r.key.clone()).collect(); for (i, r) in results.iter().enumerate().take(15) { let marker = if r.is_direct { "→" } else { " " }; - let weight = store.node_weight(&r.key).unwrap_or(0.0); + let weight = view.node_weight(&r.key); print!("{}{:2}. [{:.2}/{:.2}] {}", marker, i + 1, r.activation, weight, r.key); - if let Some(community) = store.node_community(&r.key) { - print!(" (c{})", community); - } println!(); if let Some(ref snippet) = r.snippet { println!(" {}", snippet); } } + + // Spectral expansion: find neighbors of top text hits + if let Ok(emb) = spectral::load_embedding() { + let seeds: Vec<&str> = results.iter() + .take(5) + .map(|r| r.key.as_str()) + .filter(|k| emb.coords.contains_key(*k)) + .collect(); + + if !seeds.is_empty() { + let spectral_hits = spectral::nearest_to_seeds(&emb, &seeds, 10); + // Filter to nodes not already in text results + let new_hits: Vec<_> = spectral_hits.into_iter() + .filter(|(k, _)| !text_keys.contains(k)) + .take(5) + .collect(); + + if !new_hits.is_empty() { + println!("\nSpectral neighbors (structural, not keyword):"); + for (k, _dist) in &new_hits { + let weight = view.node_weight(k); + print!(" ~ [{:.2}] {}", weight, k); + println!(); + // Show first line of content as snippet + if let Some(content) = view.node_content(k) { + let snippet: String = content.lines() + .find(|l| !l.trim().is_empty() && !l.starts_with('#')) + .unwrap_or("") + .chars().take(100).collect(); + if !snippet.is_empty() { + println!(" {}", snippet); + } + } + } + } + } + } + Ok(()) } @@ -457,8 +510,9 @@ fn cmd_replay_queue(args: &[String]) -> Result<(), String> { let queue = neuro::replay_queue(&store, count); println!("Replay queue ({} items):", queue.len()); for (i, item) in queue.iter().enumerate() { - println!(" {:2}. [{:.3}] {} (interval={}d, emotion={:.1})", - i + 1, item.priority, item.key, item.interval_days, item.emotion); + println!(" {:2}. [{:.3}] {:>10} {} (interval={}d, emotion={:.1}, spectral={:.1})", + i + 1, item.priority, item.classification, item.key, + item.interval_days, item.emotion, item.outlier_score); } Ok(()) } @@ -1003,6 +1057,166 @@ fn cmd_trace(args: &[String]) -> Result<(), String> { Ok(()) } +fn cmd_spectral(args: &[String]) -> Result<(), String> { + let k: usize = args.first() + .and_then(|s| s.parse().ok()) + .unwrap_or(30); + let store = capnp_store::Store::load()?; + let g = graph::build_graph(&store); + let result = spectral::decompose(&g, k); + spectral::print_summary(&result, &g); + Ok(()) +} + +fn cmd_spectral_save(args: &[String]) -> Result<(), String> { + let k: usize = args.first() + .and_then(|s| s.parse().ok()) + .unwrap_or(20); + let store = capnp_store::Store::load()?; + let g = graph::build_graph(&store); + let result = spectral::decompose(&g, k); + let emb = spectral::to_embedding(&result); + spectral::save_embedding(&emb)?; + Ok(()) +} + +fn cmd_spectral_neighbors(args: &[String]) -> Result<(), String> { + if args.is_empty() { + return Err("usage: spectral-neighbors KEY [N]".to_string()); + } + let key = &args[0]; + let n: usize = args.get(1) + .and_then(|s| s.parse().ok()) + .unwrap_or(15); + + let emb = spectral::load_embedding()?; + + // Show which dimensions this node loads on + let dims = spectral::dominant_dimensions(&emb, &[key.as_str()]); + println!("Node: {} (embedding: {} dims)", key, emb.dims); + println!("Top spectral axes:"); + for &(d, loading) in dims.iter().take(5) { + println!(" axis {:<2} (λ={:.4}): loading={:.5}", d, emb.eigenvalues[d], loading); + } + + println!("\nNearest neighbors in spectral space:"); + let neighbors = spectral::nearest_neighbors(&emb, key, n); + for (i, (k, dist)) in neighbors.iter().enumerate() { + println!(" {:>2}. {:.5} {}", i + 1, dist, k); + } + Ok(()) +} + +fn cmd_spectral_positions(args: &[String]) -> Result<(), String> { + let n: usize = args.first() + .and_then(|s| s.parse().ok()) + .unwrap_or(30); + + let store = capnp_store::Store::load()?; + let emb = spectral::load_embedding()?; + + // Build communities fresh from graph (don't rely on cached node fields) + let g = store.build_graph(); + let communities = g.communities().clone(); + + let positions = spectral::analyze_positions(&emb, &communities); + + // Show outliers first + println!("Spectral position analysis — {} nodes", positions.len()); + println!(" outlier: dist_to_center / median (>1 = unusual position)"); + println!(" bridge: dist_to_center / dist_to_nearest_other_community"); + println!(); + + // Group by classification + let mut bridges: Vec<&spectral::SpectralPosition> = Vec::new(); + let mut outliers: Vec<&spectral::SpectralPosition> = Vec::new(); + let mut core: Vec<&spectral::SpectralPosition> = Vec::new(); + + for pos in positions.iter().take(n) { + match spectral::classify_position(pos) { + "bridge" => bridges.push(pos), + "outlier" => outliers.push(pos), + "core" => core.push(pos), + _ => outliers.push(pos), // peripheral goes with outliers for display + } + } + + if !bridges.is_empty() { + println!("=== Bridges (between communities) ==="); + for pos in &bridges { + println!(" [{:.2}/{:.2}] c{} → c{} {}", + pos.outlier_score, pos.bridge_score, + pos.community, pos.nearest_community, pos.key); + } + println!(); + } + + println!("=== Top outliers (far from own community center) ==="); + for pos in positions.iter().take(n) { + let class = spectral::classify_position(pos); + println!(" {:>10} outlier={:.2} bridge={:.2} c{:<3} {}", + class, pos.outlier_score, pos.bridge_score, + pos.community, pos.key); + } + + Ok(()) +} + +fn cmd_spectral_suggest(args: &[String]) -> Result<(), String> { + let n: usize = args.first() + .and_then(|s| s.parse().ok()) + .unwrap_or(20); + + let store = capnp_store::Store::load()?; + let emb = spectral::load_embedding()?; + let g = store.build_graph(); + let communities = g.communities(); + + // Only consider nodes with enough edges for meaningful spectral position + let min_degree = 3; + let well_connected: std::collections::HashSet<&str> = emb.coords.keys() + .filter(|k| g.degree(k) >= min_degree) + .map(|k| k.as_str()) + .collect(); + + // Filter embedding to well-connected nodes + let filtered_emb = spectral::SpectralEmbedding { + dims: emb.dims, + eigenvalues: emb.eigenvalues.clone(), + coords: emb.coords.iter() + .filter(|(k, _)| well_connected.contains(k.as_str())) + .map(|(k, v)| (k.clone(), v.clone())) + .collect(), + }; + + // Build set of existing linked pairs + let mut linked: std::collections::HashSet<(String, String)> = + std::collections::HashSet::new(); + for rel in &store.relations { + linked.insert((rel.source_key.clone(), rel.target_key.clone())); + linked.insert((rel.target_key.clone(), rel.source_key.clone())); + } + + eprintln!("Searching {} well-connected nodes (degree >= {})...", + filtered_emb.coords.len(), min_degree); + let pairs = spectral::unlinked_neighbors(&filtered_emb, &linked, n); + + println!("{} closest unlinked pairs (candidates for extractor agents):", pairs.len()); + for (i, (k1, k2, dist)) in pairs.iter().enumerate() { + let c1 = communities.get(k1) + .map(|c| format!("c{}", c)) + .unwrap_or_else(|| "?".into()); + let c2 = communities.get(k2) + .map(|c| format!("c{}", c)) + .unwrap_or_else(|| "?".into()); + let cross = if c1 != c2 { " [cross-community]" } else { "" }; + println!(" {:>2}. dist={:.4} {} ({}) ↔ {} ({}){}", + i + 1, dist, k1, c1, k2, c2, cross); + } + + Ok(()) +} + fn cmd_list_keys() -> Result<(), String> { let store = capnp_store::Store::load()?; let mut keys: Vec<_> = store.nodes.keys().collect(); @@ -1353,7 +1567,9 @@ fn cmd_journal_tail(args: &[String]) -> Result<(), String> { } else { // Use first content line, truncated title = if stripped.len() > 70 { - format!("{}...", &stripped[..67]) + let mut end = 67; + while !stripped.is_char_boundary(end) { end -= 1; } + format!("{}...", &stripped[..end]) } else { stripped.to_string() }; diff --git a/src/neuro.rs b/src/neuro.rs index 472505b..b29419e 100644 --- a/src/neuro.rs +++ b/src/neuro.rs @@ -7,7 +7,9 @@ use crate::capnp_store::Store; use crate::graph::{self, Graph}; use crate::similarity; +use crate::spectral::{self, SpectralEmbedding, SpectralPosition}; +use std::collections::HashMap; use std::time::{SystemTime, UNIX_EPOCH}; fn now_epoch() -> f64 { @@ -19,25 +21,45 @@ fn now_epoch() -> f64 { const SECS_PER_DAY: f64 = 86400.0; -/// Consolidation priority: how urgently a node needs attention +/// Consolidation priority: how urgently a node needs attention. /// -/// priority = (1 - schema_fit) × spaced_repetition_due × emotion × (1 + interference) -pub fn consolidation_priority(store: &Store, key: &str, graph: &Graph) -> f64 { +/// With spectral data: +/// priority = spectral_displacement × overdue × emotion +/// Without: +/// priority = (1 - schema_fit) × overdue × emotion +/// +/// Spectral displacement is the outlier_score clamped and normalized — +/// it measures how far a node sits from its community center in the +/// eigenspace. This is a global signal (considers all graph structure) +/// vs schema_fit which is local (only immediate neighbors). +pub fn consolidation_priority( + store: &Store, + key: &str, + graph: &Graph, + spectral_outlier: Option, +) -> f64 { let node = match store.nodes.get(key) { Some(n) => n, None => return 0.0, }; - // Schema fit: 0 = poorly integrated, 1 = well integrated - let fit = graph::schema_fit(graph, key) as f64; - let fit_factor = 1.0 - fit; + // Integration factor: how poorly integrated is this node? + let displacement = if let Some(outlier) = spectral_outlier { + // outlier_score = dist_to_center / median_dist_in_community + // 1.0 = typical position, >2 = unusual, >5 = extreme outlier + // Use log scale for dynamic range: the difference between + // outlier=5 and outlier=10 matters less than 1 vs 2. + (outlier / 3.0).min(3.0) + } else { + let fit = graph::schema_fit(graph, key) as f64; + 1.0 - fit + }; // Spaced repetition: how overdue is this node for replay? let interval_secs = node.spaced_repetition_interval as f64 * SECS_PER_DAY; let time_since_replay = if node.last_replayed > 0.0 { (now_epoch() - node.last_replayed).max(0.0) } else { - // Never replayed — treat as very overdue interval_secs * 3.0 }; let overdue_ratio = (time_since_replay / interval_secs).min(5.0); @@ -45,7 +67,7 @@ pub fn consolidation_priority(store: &Store, key: &str, graph: &Graph) -> f64 { // Emotional intensity: higher emotion = higher priority let emotion_factor = 1.0 + (node.emotion as f64 / 10.0); - fit_factor * overdue_ratio * emotion_factor + displacement * overdue_ratio * emotion_factor } /// Item in the replay queue @@ -55,28 +77,62 @@ pub struct ReplayItem { pub interval_days: u32, pub emotion: f32, pub schema_fit: f32, + /// Spectral classification: "bridge", "outlier", "core", "peripheral" + pub classification: &'static str, + /// Raw spectral outlier score (distance / median) + pub outlier_score: f64, } -/// Generate the replay queue: nodes ordered by consolidation priority +/// Generate the replay queue: nodes ordered by consolidation priority. +/// Automatically loads spectral embedding if available. pub fn replay_queue(store: &Store, count: usize) -> Vec { let graph = store.build_graph(); - replay_queue_with_graph(store, count, &graph) + let emb = spectral::load_embedding().ok(); + replay_queue_with_graph(store, count, &graph, emb.as_ref()) } -/// Generate the replay queue using a pre-built graph (avoids redundant rebuild) -pub fn replay_queue_with_graph(store: &Store, count: usize, graph: &Graph) -> Vec { +/// Generate the replay queue using pre-built graph and optional spectral data. +pub fn replay_queue_with_graph( + store: &Store, + count: usize, + graph: &Graph, + emb: Option<&SpectralEmbedding>, +) -> Vec { let fits = graph::schema_fit_all(graph); + // Build spectral position map if embedding is available + let positions: HashMap = if let Some(emb) = emb { + let communities = graph.communities().clone(); + spectral::analyze_positions(emb, &communities) + .into_iter() + .map(|p| (p.key.clone(), p)) + .collect() + } else { + HashMap::new() + }; + let mut items: Vec = store.nodes.iter() .map(|(key, node)| { - let priority = consolidation_priority(store, key, graph); + let pos = positions.get(key); + let outlier_score = pos.map(|p| p.outlier_score).unwrap_or(0.0); + let classification = pos + .map(|p| spectral::classify_position(p)) + .unwrap_or("unknown"); + + let priority = consolidation_priority( + store, key, graph, + pos.map(|p| p.outlier_score), + ); let fit = fits.get(key).copied().unwrap_or(0.0); + ReplayItem { key: key.clone(), priority, interval_days: node.spaced_repetition_interval, emotion: node.emotion, schema_fit: fit, + classification, + outlier_score, } }) .collect(); @@ -234,6 +290,10 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S item.priority, item.schema_fit, item.emotion)); out.push_str(&format!("Category: {} Interval: {}d\n", node.category.label(), node.spaced_repetition_interval)); + if item.outlier_score > 0.0 { + out.push_str(&format!("Spectral: {} (outlier={:.1})\n", + item.classification, item.outlier_score)); + } if let Some(community) = node.community_id { out.push_str(&format!("Community: {} ", community)); @@ -474,15 +534,17 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result { - let items = replay_queue_with_graph(store, count, &graph); + let items = replay_queue_with_graph(store, count, &graph, emb.as_ref()); let nodes_section = format_nodes_section(store, &items, &graph); load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)]) } "linker" => { // Filter to episodic entries - let mut items = replay_queue_with_graph(store, count * 2, &graph); + let mut items = replay_queue_with_graph(store, count * 2, &graph, emb.as_ref()); items.retain(|item| { store.nodes.get(&item.key) .map(|n| matches!(n.node_type, crate::capnp_store::NodeType::EpisodicSession)) @@ -516,10 +578,12 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result Vec<(String, f64)> { - let params = &store.params; + let params = store.params(); let mut activation: HashMap = HashMap::new(); let mut queue: VecDeque<(String, f64, u32)> = VecDeque::new(); @@ -44,10 +44,7 @@ fn spreading_activation( if depth >= params.max_hops { continue; } for (neighbor, strength) in graph.neighbors(&key) { - let neighbor_weight = store.nodes.get(neighbor.as_str()) - .map(|n| n.weight as f64) - .unwrap_or(params.default_weight); - + let neighbor_weight = store.node_weight(neighbor.as_str()); let propagated = act * params.edge_decay * neighbor_weight * strength as f64; if propagated < params.min_activation { continue; } @@ -65,27 +62,26 @@ fn spreading_activation( } /// Full search: find direct hits, spread activation, return ranked results -pub fn search(query: &str, store: &Store) -> Vec { - let graph = store.build_graph(); +pub fn search(query: &str, store: &impl StoreView) -> Vec { + let graph = crate::graph::build_graph_fast(store); let query_lower = query.to_lowercase(); let query_tokens: Vec<&str> = query_lower.split_whitespace().collect(); let mut seeds: Vec<(String, f64)> = Vec::new(); let mut snippets: HashMap = HashMap::new(); - for (key, node) in &store.nodes { - let content_lower = node.content.to_lowercase(); + store.for_each_node(|key, content, weight| { + let content_lower = content.to_lowercase(); let exact_match = content_lower.contains(&query_lower); let token_match = query_tokens.len() > 1 && query_tokens.iter().all(|t| content_lower.contains(t)); if exact_match || token_match { - let weight = node.weight as f64; - let activation = if exact_match { weight } else { weight * 0.85 }; - seeds.push((key.clone(), activation)); + let activation = if exact_match { weight as f64 } else { weight as f64 * 0.85 }; + seeds.push((key.to_owned(), activation)); - let snippet: String = node.content.lines() + let snippet: String = content.lines() .filter(|l| { let ll = l.to_lowercase(); if exact_match && ll.contains(&query_lower) { return true; } @@ -103,9 +99,9 @@ pub fn search(query: &str, store: &Store) -> Vec { }) .collect::>() .join("\n "); - snippets.insert(key.clone(), snippet); + snippets.insert(key.to_owned(), snippet); } - } + }); if seeds.is_empty() { return Vec::new(); diff --git a/src/spectral.rs b/src/spectral.rs new file mode 100644 index 0000000..e97eb99 --- /dev/null +++ b/src/spectral.rs @@ -0,0 +1,566 @@ +// Spectral decomposition of the memory graph. +// +// Computes eigenvalues and eigenvectors of the normalized graph Laplacian. +// The eigenvectors provide natural coordinates for each node — connected +// nodes land nearby, communities form clusters, bridges sit between clusters. +// +// The eigenvalue spectrum reveals: +// - Number of connected components (count of zero eigenvalues) +// - Number of natural communities (eigenvalues near zero, before the gap) +// - How well-connected the graph is (Fiedler value = second eigenvalue) +// +// The eigenvectors provide: +// - Spectral coordinates for each node (the embedding) +// - Community membership (sign/magnitude of Fiedler vector) +// - Natural projections (select which eigenvectors to include) + +use crate::graph::Graph; + +use faer::Mat; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use std::path::PathBuf; + +pub struct SpectralResult { + /// Node keys in index order + pub keys: Vec, + /// Eigenvalues in ascending order + pub eigenvalues: Vec, + /// Eigenvectors: eigvecs[k] is the k-th eigenvector (ascending eigenvalue order), + /// with eigvecs[k][i] being the value for node keys[i] + pub eigvecs: Vec>, +} + +/// Per-node spectral embedding, serializable to disk. +#[derive(Serialize, Deserialize)] +pub struct SpectralEmbedding { + /// Number of dimensions (eigenvectors) + pub dims: usize, + /// Eigenvalues for each dimension + pub eigenvalues: Vec, + /// Node key → coordinate vector + pub coords: HashMap>, +} + +fn embedding_path() -> PathBuf { + let home = std::env::var("HOME").unwrap_or_default(); + PathBuf::from(home).join(".claude/memory/spectral-embedding.json") +} + +/// Compute spectral decomposition of the memory graph. +/// +/// Returns the smallest `k` eigenvalues and their eigenvectors of the +/// normalized Laplacian L_sym = I - D^{-1/2} A D^{-1/2}. +/// +/// We compute the full decomposition (it's only 2000×2000, takes <1s) +/// and return the bottom k. +pub fn decompose(graph: &Graph, k: usize) -> SpectralResult { + // Only include nodes with edges (filter isolates) + let mut keys: Vec = graph.nodes().iter() + .filter(|k| graph.degree(k) > 0) + .cloned() + .collect(); + keys.sort(); + let n = keys.len(); + let isolates = graph.nodes().len() - n; + if isolates > 0 { + eprintln!("note: filtered {} isolated nodes, decomposing {} connected nodes", isolates, n); + } + + let key_to_idx: HashMap<&str, usize> = keys.iter() + .enumerate() + .map(|(i, k)| (k.as_str(), i)) + .collect(); + + // Build weighted degree vector and adjacency + let mut degree = vec![0.0f64; n]; + let mut adj_entries: Vec<(usize, usize, f64)> = Vec::new(); + + for (i, key) in keys.iter().enumerate() { + for (neighbor, strength) in graph.neighbors(key) { + if let Some(&j) = key_to_idx.get(neighbor.as_str()) { + if j > i { // each edge once + let w = strength as f64; + adj_entries.push((i, j, w)); + degree[i] += w; + degree[j] += w; + } + } + } + } + + // Build normalized Laplacian: L_sym = I - D^{-1/2} A D^{-1/2} + let mut laplacian = Mat::::zeros(n, n); + + // Diagonal = 1 for nodes with edges, 0 for isolates + for i in 0..n { + if degree[i] > 0.0 { + laplacian[(i, i)] = 1.0; + } + } + + // Off-diagonal: -w / sqrt(d_i * d_j) + for &(i, j, w) in &adj_entries { + if degree[i] > 0.0 && degree[j] > 0.0 { + let val = -w / (degree[i] * degree[j]).sqrt(); + laplacian[(i, j)] = val; + laplacian[(j, i)] = val; + } + } + + // Eigendecompose + let eig = laplacian.self_adjoint_eigen(faer::Side::Lower) + .expect("eigendecomposition failed"); + let s = eig.S(); + let u = eig.U(); + + let k = k.min(n); + let mut eigenvalues = Vec::with_capacity(k); + let mut eigvecs = Vec::with_capacity(k); + + let s_col = s.column_vector(); + for col in 0..k { + eigenvalues.push(s_col[col]); + let mut vec = Vec::with_capacity(n); + for row in 0..n { + vec.push(u[(row, col)]); + } + eigvecs.push(vec); + } + + SpectralResult { keys, eigenvalues, eigvecs } +} + +/// Print the spectral summary: eigenvalue spectrum, then each axis with +/// its extreme nodes (what the axis "means"). +pub fn print_summary(result: &SpectralResult, graph: &Graph) { + let n = result.keys.len(); + let k = result.eigenvalues.len(); + + println!("Spectral Decomposition — {} nodes, {} eigenpairs", n, k); + println!("=========================================\n"); + + // Compact eigenvalue table + println!("Eigenvalue spectrum:"); + for (i, &ev) in result.eigenvalues.iter().enumerate() { + let gap = if i > 0 { + ev - result.eigenvalues[i - 1] + } else { + 0.0 + }; + let gap_bar = if i > 0 { + let bars = (gap * 500.0).min(40.0) as usize; + "#".repeat(bars) + } else { + String::new() + }; + println!(" λ_{:<2} = {:.6} {}", i, ev, gap_bar); + } + + // Connected components + let near_zero = result.eigenvalues.iter() + .filter(|&&v| v.abs() < 1e-6) + .count(); + if near_zero > 1 { + println!("\n {} eigenvalues near 0 = {} disconnected components", near_zero, near_zero); + } + + // Each axis: what are the extremes? + println!("\n\nNatural axes of the knowledge space"); + println!("===================================="); + + for axis in 0..k { + let ev = result.eigenvalues[axis]; + let vec = &result.eigvecs[axis]; + + // Sort nodes by their value on this axis + let mut indexed: Vec<(usize, f64)> = vec.iter() + .enumerate() + .map(|(i, &v)| (i, v)) + .collect(); + indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); + + // Compute the "spread" — how much this axis differentiates + let min_val = indexed.first().map(|x| x.1).unwrap_or(0.0); + let max_val = indexed.last().map(|x| x.1).unwrap_or(0.0); + + println!("\n--- Axis {} (λ={:.6}, range={:.4}) ---", axis, ev, max_val - min_val); + + // Show extremes: 5 most negative, 5 most positive + let show = 5; + println!(" Negative pole:"); + for &(idx, val) in indexed.iter().take(show) { + let key = &result.keys[idx]; + // Shorten key for display: take last component + let short = shorten_key(key); + let deg = graph.degree(key); + let comm = graph.communities().get(key).copied().unwrap_or(999); + println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short); + } + + println!(" Positive pole:"); + for &(idx, val) in indexed.iter().rev().take(show) { + let key = &result.keys[idx]; + let short = shorten_key(key); + let deg = graph.degree(key); + let comm = graph.communities().get(key).copied().unwrap_or(999); + println!(" {:+.5} d={:<3} c={:<3} {}", val, deg, comm, short); + } + } +} + +/// Shorten a node key for display. +fn shorten_key(key: &str) -> &str { + if key.len() > 60 { &key[..60] } else { key } +} + +/// Convert SpectralResult to a per-node embedding (transposing the layout). +pub fn to_embedding(result: &SpectralResult) -> SpectralEmbedding { + let dims = result.eigvecs.len(); + let mut coords = HashMap::new(); + + for (i, key) in result.keys.iter().enumerate() { + let mut vec = Vec::with_capacity(dims); + for d in 0..dims { + vec.push(result.eigvecs[d][i]); + } + coords.insert(key.clone(), vec); + } + + SpectralEmbedding { + dims, + eigenvalues: result.eigenvalues.clone(), + coords, + } +} + +/// Save embedding to disk. +pub fn save_embedding(emb: &SpectralEmbedding) -> Result<(), String> { + let path = embedding_path(); + let json = serde_json::to_string(emb) + .map_err(|e| format!("serialize embedding: {}", e))?; + std::fs::write(&path, json) + .map_err(|e| format!("write {}: {}", path.display(), e))?; + eprintln!("Saved {}-dim embedding for {} nodes to {}", + emb.dims, emb.coords.len(), path.display()); + Ok(()) +} + +/// Load embedding from disk. +pub fn load_embedding() -> Result { + let path = embedding_path(); + let data = std::fs::read_to_string(&path) + .map_err(|e| format!("read {}: {}", path.display(), e))?; + serde_json::from_str(&data) + .map_err(|e| format!("parse embedding: {}", e)) +} + +/// Find the k nearest neighbors to a node in spectral space. +/// +/// Uses weighted euclidean distance where each dimension is weighted +/// by 1/eigenvalue — lower eigenvalues (coarser structure) matter more. +pub fn nearest_neighbors( + emb: &SpectralEmbedding, + key: &str, + k: usize, +) -> Vec<(String, f64)> { + let target = match emb.coords.get(key) { + Some(c) => c, + None => return vec![], + }; + + // Weight by inverse eigenvalue (coarser axes matter more) + let weights: Vec = emb.eigenvalues.iter() + .map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 }) + .collect(); + + let mut distances: Vec<(String, f64)> = emb.coords.iter() + .filter(|(k, _)| k.as_str() != key) + .map(|(k, coords)| { + let dist: f64 = target.iter() + .zip(coords.iter()) + .zip(weights.iter()) + .map(|((&a, &b), &w)| w * (a - b) * (a - b)) + .sum::() + .sqrt(); + (k.clone(), dist) + }) + .collect(); + + distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); + distances.truncate(k); + distances +} + +/// Find nearest neighbors to a set of seed nodes (multi-seed query). +/// Returns nodes ranked by minimum distance to any seed. +pub fn nearest_to_seeds( + emb: &SpectralEmbedding, + seeds: &[&str], + k: usize, +) -> Vec<(String, f64)> { + let seed_set: std::collections::HashSet<&str> = seeds.iter().copied().collect(); + + let seed_coords: Vec<&Vec> = seeds.iter() + .filter_map(|s| emb.coords.get(*s)) + .collect(); + if seed_coords.is_empty() { + return vec![]; + } + + let weights: Vec = emb.eigenvalues.iter() + .map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 }) + .collect(); + + let mut distances: Vec<(String, f64)> = emb.coords.iter() + .filter(|(k, _)| !seed_set.contains(k.as_str())) + .map(|(k, coords)| { + // Distance to nearest seed + let min_dist = seed_coords.iter() + .map(|sc| { + coords.iter() + .zip(sc.iter()) + .zip(weights.iter()) + .map(|((&a, &b), &w)| w * (a - b) * (a - b)) + .sum::() + .sqrt() + }) + .fold(f64::MAX, f64::min); + (k.clone(), min_dist) + }) + .collect(); + + distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); + distances.truncate(k); + distances +} + +/// Weighted euclidean distance in spectral space. +/// Dimensions weighted by 1/eigenvalue — coarser structure matters more. +fn weighted_distance(a: &[f64], b: &[f64], weights: &[f64]) -> f64 { + a.iter() + .zip(b.iter()) + .zip(weights.iter()) + .map(|((&x, &y), &w)| w * (x - y) * (x - y)) + .sum::() + .sqrt() +} + +/// Compute eigenvalue-inverse weights for distance calculations. +fn eigenvalue_weights(eigenvalues: &[f64]) -> Vec { + eigenvalues.iter() + .map(|&ev| if ev > 1e-8 { 1.0 / ev } else { 0.0 }) + .collect() +} + +/// Compute cluster centers (centroids) in spectral space. +pub fn cluster_centers( + emb: &SpectralEmbedding, + communities: &HashMap, +) -> HashMap> { + let mut sums: HashMap, usize)> = HashMap::new(); + + for (key, coords) in &emb.coords { + if let Some(&comm) = communities.get(key) { + let entry = sums.entry(comm) + .or_insert_with(|| (vec![0.0; emb.dims], 0)); + for (i, &c) in coords.iter().enumerate() { + entry.0[i] += c; + } + entry.1 += 1; + } + } + + sums.into_iter() + .map(|(comm, (sum, count))| { + let center: Vec = sum.iter() + .map(|s| s / count as f64) + .collect(); + (comm, center) + }) + .collect() +} + +/// Per-node analysis of spectral position relative to communities. +pub struct SpectralPosition { + pub key: String, + pub community: u32, + /// Distance to own community center + pub dist_to_center: f64, + /// Distance to nearest OTHER community center + pub dist_to_nearest: f64, + /// Which community is nearest (other than own) + pub nearest_community: u32, + /// dist_to_center / median_dist_in_community (>1 = outlier) + pub outlier_score: f64, + /// dist_to_center / dist_to_nearest (>1 = between clusters, potential bridge) + pub bridge_score: f64, +} + +/// Analyze spectral positions for all nodes. +/// +/// Returns positions sorted by outlier_score descending (most displaced first). +pub fn analyze_positions( + emb: &SpectralEmbedding, + communities: &HashMap, +) -> Vec { + let centers = cluster_centers(emb, communities); + let weights = eigenvalue_weights(&emb.eigenvalues); + + // Compute distances to own community center + let mut by_community: HashMap> = HashMap::new(); + let mut node_dists: Vec<(String, u32, f64)> = Vec::new(); + + for (key, coords) in &emb.coords { + if let Some(&comm) = communities.get(key) { + if let Some(center) = centers.get(&comm) { + let dist = weighted_distance(coords, center, &weights); + by_community.entry(comm).or_default().push(dist); + node_dists.push((key.clone(), comm, dist)); + } + } + } + + // Median distance per community for outlier scoring + let medians: HashMap = by_community.into_iter() + .map(|(comm, mut dists)| { + dists.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let median = if dists.is_empty() { + 1.0 + } else if dists.len() % 2 == 0 { + (dists[dists.len() / 2 - 1] + dists[dists.len() / 2]) / 2.0 + } else { + dists[dists.len() / 2] + }; + (comm, median.max(1e-6)) + }) + .collect(); + + let mut positions: Vec = node_dists.into_iter() + .map(|(key, comm, dist_to_center)| { + let coords = &emb.coords[&key]; + + let (nearest_community, dist_to_nearest) = centers.iter() + .filter(|(&c, _)| c != comm) + .map(|(&c, center)| (c, weighted_distance(coords, center, &weights))) + .min_by(|a, b| a.1.partial_cmp(&b.1).unwrap()) + .unwrap_or((comm, f64::MAX)); + + let median = medians.get(&comm).copied().unwrap_or(1.0); + let outlier_score = dist_to_center / median; + let bridge_score = if dist_to_nearest > 1e-8 { + dist_to_center / dist_to_nearest + } else { + 0.0 + }; + + SpectralPosition { + key, community: comm, + dist_to_center, dist_to_nearest, nearest_community, + outlier_score, bridge_score, + } + }) + .collect(); + + positions.sort_by(|a, b| b.outlier_score.partial_cmp(&a.outlier_score).unwrap()); + positions +} + +/// Find pairs of nodes that are spectrally close but not linked in the graph. +/// +/// These are the most valuable candidates for extractor agents — +/// the spectral structure says they should be related, but nobody +/// has articulated why. +pub fn unlinked_neighbors( + emb: &SpectralEmbedding, + linked_pairs: &HashSet<(String, String)>, + max_pairs: usize, +) -> Vec<(String, String, f64)> { + let weights = eigenvalue_weights(&emb.eigenvalues); + let keys: Vec<&String> = emb.coords.keys().collect(); + let mut pairs: Vec<(String, String, f64)> = Vec::new(); + + for (i, k1) in keys.iter().enumerate() { + let c1 = &emb.coords[*k1]; + for k2 in keys.iter().skip(i + 1) { + // Skip if already linked + let pair_fwd = ((*k1).clone(), (*k2).clone()); + let pair_rev = ((*k2).clone(), (*k1).clone()); + if linked_pairs.contains(&pair_fwd) || linked_pairs.contains(&pair_rev) { + continue; + } + + let dist = weighted_distance(c1, &emb.coords[*k2], &weights); + pairs.push(((*k1).clone(), (*k2).clone(), dist)); + } + } + + pairs.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap()); + pairs.truncate(max_pairs); + pairs +} + +/// Approximate spectral coordinates for a new node using Nyström extension. +/// +/// Given a new node's edges to existing nodes, estimate where it would +/// land in spectral space without recomputing the full decomposition. +/// Uses weighted average of neighbors' coordinates, weighted by edge strength. +pub fn nystrom_project( + emb: &SpectralEmbedding, + neighbors: &[(&str, f32)], // (key, edge_strength) +) -> Option> { + let mut weighted_sum = vec![0.0f64; emb.dims]; + let mut total_weight = 0.0f64; + + for &(key, strength) in neighbors { + if let Some(coords) = emb.coords.get(key) { + let w = strength as f64; + for (i, &c) in coords.iter().enumerate() { + weighted_sum[i] += w * c; + } + total_weight += w; + } + } + + if total_weight < 1e-8 { + return None; + } + + Some(weighted_sum.iter().map(|s| s / total_weight).collect()) +} + +/// Classify a spectral position: well-integrated, outlier, bridge, or orphan. +pub fn classify_position(pos: &SpectralPosition) -> &'static str { + if pos.bridge_score > 0.7 { + "bridge" // between two communities + } else if pos.outlier_score > 2.0 { + "outlier" // far from own community center + } else if pos.outlier_score < 0.5 { + "core" // close to community center + } else { + "peripheral" // normal community member + } +} + +/// Identify which spectral dimensions a set of nodes load on most heavily. +/// Returns dimension indices sorted by total loading. +pub fn dominant_dimensions(emb: &SpectralEmbedding, keys: &[&str]) -> Vec<(usize, f64)> { + let coords: Vec<&Vec> = keys.iter() + .filter_map(|k| emb.coords.get(*k)) + .collect(); + if coords.is_empty() { + return vec![]; + } + + let mut dim_loading: Vec<(usize, f64)> = (0..emb.dims) + .map(|d| { + let loading: f64 = coords.iter() + .map(|c| c[d].abs()) + .sum(); + (d, loading) + }) + .collect(); + + dim_loading.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + dim_loading +}