channels: parallel queries with timeout per daemon

One misbehaving channel daemon (accepting connections but not
responding to capnp RPCs) would block channel_list indefinitely.

Spawn each daemon query as a separate task with a 3-second timeout.
A hung daemon now shows as disconnected instead of hanging the
entire tool call.

Co-Authored-By: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
ProofOfConcept 2026-04-11 00:45:01 -04:00
parent 9d5bcdcb80
commit 4fc9676545
2 changed files with 47 additions and 19 deletions

View file

@ -323,17 +323,35 @@ async fn fetch_all_channels_inner() -> Vec<(String, bool, u32)> {
sup.load_config(); sup.load_config();
sup.ensure_running(); sup.ensure_running();
let mut result = Vec::new(); let mut futs = Vec::new();
for (daemon_name, _enabled, alive) in sup.status() { for (daemon_name, _enabled, alive) in sup.status() {
if !alive { if !alive {
result.push((daemon_name, false, 0)); futs.push(tokio::task::spawn_local({
let name = daemon_name.clone();
async move { vec![(name, false, 0u32)] }
}));
continue; continue;
} }
let sock = channels_dir.join(format!("{}.sock", daemon_name)); let sock = channels_dir.join(format!("{}.sock", daemon_name));
match rpc_list(&sock).await { futs.push(tokio::task::spawn_local({
None => result.push((daemon_name, false, 0)), let name = daemon_name.clone();
Some(channels) if channels.is_empty() => result.push((daemon_name, true, 0)), async move {
Some(channels) => result.extend(channels), match tokio::time::timeout(
std::time::Duration::from_secs(3),
rpc_list(&sock),
).await {
Ok(Some(channels)) if !channels.is_empty() => channels,
Ok(Some(_)) => vec![(name, true, 0)],
_ => vec![(name, false, 0)],
}
}
}));
}
let mut result = Vec::new();
for fut in futs {
if let Ok(entries) = fut.await {
result.extend(entries);
} }
} }
result result

View file

@ -208,25 +208,35 @@ async fn fetch_all_channels_inner() -> Vec<(String, bool, u32)> {
sup.load_config(); sup.load_config();
sup.ensure_running(); // restart any dead daemons sup.ensure_running(); // restart any dead daemons
let mut result = Vec::new(); let mut futs = Vec::new();
for (daemon_name, _enabled, alive) in sup.status() { for (daemon_name, _enabled, alive) in sup.status() {
if !alive { if !alive {
result.push((daemon_name, false, 0)); futs.push(tokio::task::spawn_local({
let name = daemon_name.clone();
async move { vec![(name, false, 0u32)] }
}));
continue; continue;
} }
let sock = channels_dir.join(format!("{}.sock", daemon_name)); let sock = channels_dir.join(format!("{}.sock", daemon_name));
match query_one_daemon(&sock).await { futs.push(tokio::task::spawn_local({
None => { let name = daemon_name.clone();
// Connection failed despite socket existing async move {
result.push((daemon_name, false, 0)); match tokio::time::timeout(
} std::time::Duration::from_secs(3),
Some(channels) if channels.is_empty() => { query_one_daemon(&sock),
// Connected but no channels yet ).await {
result.push((daemon_name, true, 0)); Ok(Some(channels)) if !channels.is_empty() => channels,
} Ok(Some(_)) => vec![(name, true, 0)],
Some(channels) => { _ => vec![(name, false, 0)],
result.extend(channels); }
} }
}));
}
let mut result = Vec::new();
for fut in futs {
if let Ok(entries) = fut.await {
result.extend(entries);
} }
} }
result result