use std::env; use std::fmt; use std::fs::File; use std::io; use std::io::prelude::*; use std::io::BufReader; use std::process::Command; #[derive(Debug, Clone)] struct BlockedEvent { ns: u64, stacktrace: Vec, } const TIME_UNITS: &[&str] = &[ "ns", "us", "ms", "sec", ]; impl fmt::Display for BlockedEvent { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut ns = self.ns as f64; let mut u = 0; while ns > 10000.0 && u + 1 < TIME_UNITS.len() { ns /= 1000.0; u += 1; } write!(f, "{:>6.1}{}", ns, TIME_UNITS[u])?; for (idx, e) in self.stacktrace.iter().enumerate() { if idx != 0 { write!(f, " =>")?; } write!(f, " {}", e)?; } Ok(()) } } const STACK_START_FILTER: &[&str] = &[ "trace_event_raw_event_sched_blocked", "__schedule", "schedule", ]; const STACK_END_FILTER: &[&str] = &[ "entry_SYSCALL_64_after_hwframe", "do_syscall_64", "__x86_sys_", "syscall_exit_to_user_mode", "ret_from_fork", "kthread", "worker_thread", ]; const STACK_FILTER: &[&[&str]] = &[ &["schedule_idle", "do_idle"], &["read_events", "do_io_getevents"], &["schedule_idle", "do_idle"], &["__cond_resched", "process_one_work"], &["schedule_timeout", "rcu_gp_fqs_loop", "rcu_gp_kthread"], &["ring_buffer_wait", "tracing_wait_pipe"], &["do_epoll_wait"], &["worker_thread"], &["smpboot_thread_fn"], ]; fn fn_matches(l: &str, r: &str) -> bool { let idx = r.find(|c: char| c == '.' || c == '/' || c == '+'); l == if let Some(idx) = idx { &r[..idx] } else { &r[..] } } fn should_skip_event(ev: &BlockedEvent) -> bool { if ev.stacktrace.is_empty() { return true; } STACK_FILTER.iter() .any(|i| i.iter() .zip(ev.stacktrace.iter()) .all(|(l, r)| fn_matches(l, r))) } fn event_trim(ev: &mut BlockedEvent) { while ev.stacktrace.len() > 1 { let l = ev.stacktrace.last().unwrap(); if STACK_END_FILTER.iter().any(|i| fn_matches(i, &l[..])) { ev.stacktrace.pop(); } else { break; } } } fn read_trace(mut child: std::process::Child) -> io::Result> { let tracepoint_name = "sched_blocked: "; let trace_pipe = File::open("/sys/kernel/debug/tracing/trace_pipe") .map_err(|e| {eprintln!("error opening trace: {}", e); e})?; let mut lines = BufReader::new(trace_pipe) .lines() .filter_map(|i| i.ok()) .peekable(); let mut evs: Vec = Vec::new(); while let Some(line) = lines.next() { let idx = line.rfind(tracepoint_name); if idx.is_none() { continue; } let idx = idx.unwrap() + tracepoint_name.len(); let mut ev = BlockedEvent { ns: line[idx..].parse().unwrap(), stacktrace: Vec::new(), }; let line = lines.next_if(|i| i.find("").is_some()); if line.is_none() { continue; } let mut skip = STACK_START_FILTER.iter(); while let Some(line) = lines.next_if(|i| i.starts_with(" => ")) { let func = &line[4..]; let s = skip.next(); if s.is_none() || !fn_matches(s.unwrap(), func) { ev.stacktrace.push(func.to_string()); skip.all(|_i| true); } } event_trim(&mut ev); if !should_skip_event(&ev) { evs.push(ev); } match child.try_wait() { Err(_e) => break, Ok(Some(_status)) => break, Ok(None) => continue, } } Ok(evs) } fn run_trace(args: Vec) -> io::Result<()> { std::fs::write("/sys/kernel/debug/tracing/trace", "") .map_err(|e| {eprintln!("error clearing trace: {}", e); e})?; std::fs::write("/sys/kernel/debug/tracing/buffer_size_kb", "8192") .map_err(|e| {eprintln!("error writing buffer size: {}", e); e})?; std::fs::write("/sys/kernel/debug/tracing/set_event", "sched:sched_blocked") .map_err(|e| {eprintln!("error setting event: {}", e); e})?; std::fs::write("/sys/kernel/debug/tracing/events/sched/sched_blocked/trigger", "stacktrace").ok(); std::fs::write("/sys/kernel/debug/tracing/trace_options", "sym-offset") .map_err(|e| {eprintln!("error setting sym-offset: {}", e); e})?; std::fs::write("/sys/kernel/debug/tracing/tracing_on", "1") .map_err(|e| {eprintln!("error enabling tracing: {}", e); e})?; let child = Command::new(&args[0]) .args(&args[1..]) .spawn() .expect(&format!("Failed to start child process {}", args[0])); let mut evs = read_trace(child)?; evs.sort_by(|l, r| l.ns.cmp(&r.ns)); println!("\nIndividual events:"); for i in evs.iter().rev().take(30) { println!("{}", i); } evs.sort_by(|l, r| l.stacktrace.cmp(&r.stacktrace)); let mut evs_folded: Vec = Vec::new(); for i in evs.iter() { if let Some(l) = evs_folded.last_mut() { if l.stacktrace == i.stacktrace { l.ns += i.ns; } else { evs_folded.push(i.clone()); } } else { evs_folded.push(i.clone()); } } evs_folded.sort_by(|l, r| l.ns.cmp(&r.ns)); println!("\nAggregate blocked time:"); for i in evs_folded.iter().rev().take(30) { println!("{}", i); } Ok(()) } fn main() { let args = env::args(); let args: Vec = args.skip(1).collect(); run_trace(args).ok(); }