diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-06-26 13:42:41 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2022-06-26 13:42:41 -0400 |
commit | 7020f85079b8ea7b4ccf1b9074f536fa55554de7 (patch) | |
tree | 69baaa2715425d6bb1a455ca28d8fba55d4fb50f /lib | |
parent | 22d55e28c528665298bf4c1f069e61e8ea2bbbdd (diff) |
Improve timeout/watchdog functionality
This reworks watchdogs in the qemu-wrapper, so that now we correctly
print TEST FAILED in each logfile when a test times out.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libktest.sh | 2 | ||||
-rw-r--r-- | lib/qemu-wrapper.c | 112 | ||||
-rwxr-xr-x | lib/testrunner | 17 |
3 files changed, 75 insertions, 56 deletions
diff --git a/lib/libktest.sh b/lib/libktest.sh index 542af43..d60f768 100644 --- a/lib/libktest.sh +++ b/lib/libktest.sh @@ -261,7 +261,7 @@ start_vm() else # Inside the VM, we set a timer and on timeout trigger a crash dump. The # timeout here is a backup: - qemu_cmd+=(-S -F -T $((60 + ktest_timeout))) + qemu_cmd+=(-S -F -T $ktest_timeout) fi local test_basename=$(basename -s .ktest "$ktest_test") diff --git a/lib/qemu-wrapper.c b/lib/qemu-wrapper.c index 94fe681..7025716 100644 --- a/lib/qemu-wrapper.c +++ b/lib/qemu-wrapper.c @@ -22,15 +22,14 @@ do { \ } while (0) static pid_t child; -static bool print_timeout = true; +static int childfd; static void alarm_handler(int sig) { - char msg[] = "TEST TIMEOUT\n"; - kill(child, SIGKILL); - if (print_timeout) - write(STDOUT_FILENO, msg, strlen(msg)); - _exit(EXIT_FAILURE); + char msg[] = "TEST FAILED (timed out)\n"; + + if (write(childfd, msg, strlen(msg)) != strlen(msg)) + die("write error in alarm handler"); } static void usage(void) @@ -111,8 +110,9 @@ static const char *test_starts(const char *line) static bool test_ends(char *line) { - return str_starts_with(line, "========= FAILED ") || - str_starts_with(line, "========= PASSED "); + return str_starts_with(line, "========= PASSED ") || + str_starts_with(line, "========= FAILED "); +} static FILE *popen_with_pid(char *argv[], pid_t *child) { @@ -142,6 +142,8 @@ static FILE *popen_with_pid(char *argv[], pid_t *child) die("error execing %s: %m", argv[0]); } + childfd = pipefd[1]; + FILE *childf = fdopen(pipefd[0], "r"); if (!childf) die("fdopen error: %m"); @@ -149,13 +151,32 @@ static FILE *popen_with_pid(char *argv[], pid_t *child) return childf; } +static void update_watchdog(const char *line) +{ + const char *new_watchdog = str_starts_with(line, "WATCHDOG "); + if (new_watchdog) + alarm(atoi(new_watchdog)); +} + +static char *output_line(const char *line, struct timespec start) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_MONOTONIC, &ts)) + die("clock_gettime error: %m"); + + unsigned long elapsed = ts.tv_sec - start.tv_sec; + + return mprintf("%.5lu %s\n", elapsed, line); +} + int main(int argc, char *argv[]) { bool exit_on_success = false; bool exit_on_failure = false; unsigned long timeout = 0; int opt, ret = EXIT_FAILURE; - struct timespec start, ts; + struct timespec start; char *logdir = NULL; char *basename = NULL; @@ -197,74 +218,69 @@ int main(int argc, char *argv[]) if (!logdir) die("Required option -o missing"); - struct sigaction alarm_action = { .sa_handler = alarm_handler }; - if (sigaction(SIGALRM, &alarm_action, NULL)) - die("sigaction error: %m"); - - if (timeout) - alarm(timeout); - FILE *childf = popen_with_pid(argv + optind, &child); FILE *logfile = log_open(logdir, basename, NULL); FILE *test_logfile = NULL; - size_t n = 0, output_len = 0; + size_t n = 0; ssize_t len; char *line = NULL; - char *output_line = NULL; + + struct sigaction alarm_action = { + .sa_handler = alarm_handler, + .sa_flags = SA_RESTART, + }; + if (sigaction(SIGALRM, &alarm_action, NULL)) + die("sigaction error: %m"); + + if (timeout) + alarm(timeout); while ((len = getline(&line, &n, childf)) >= 0) { strim(line); + char *output = output_line(line, start); + + update_watchdog(line); + const char *testname = test_starts(line); - if (test_logfile && - (testname || test_ends(line))) { - fputc('\n', test_logfile); + if (test_logfile && testname) { fclose(test_logfile); test_logfile = NULL; } - if (clock_gettime(CLOCK_MONOTONIC, &ts)) { - fprintf(stderr, "clock_gettime error: %m\n"); - break; - } + if (test_logfile) + fputs(output, test_logfile); + fputs(output, logfile); + fputs(output, stdout); - if (output_len < n + 20) { - output_len = n + 20; - output_line = realloc(output_line, output_len); + if (test_logfile && test_ends(line)) { + fclose(test_logfile); + test_logfile = NULL; } - unsigned long elapsed = ts.tv_sec - start.tv_sec; - - strim(line); - sprintf(output_line, "%.5lu %s\n", elapsed, line); - - if (test_logfile) - fputs(output_line, test_logfile); - fputs(output_line, logfile); - fputs(output_line, stdout); - if (testname) test_logfile = log_open(logdir, basename, testname); - if (exit_on_success && - strstr(line, "TEST SUCCESS")) { - ret = 0; + if (exit_on_failure && str_starts_with(line, "TEST FAILED")) break; - } - if (exit_on_failure && strstr(line, "TEST FAILED")) + if (exit_on_success && str_starts_with(line, "TEST SUCCESS")) { + ret = 0; break; + } - if (exit_on_failure && strstr(line, "Kernel panic")) { - /* Read output for five more seconds, then exit */ - print_timeout = false; + if (exit_on_failure && + (strstr(line, "Kernel panic") || + strstr(line, "BUG"))) alarm(5); - } + + free(output); } -out: + + fputs("done", stdout); kill(child, SIGKILL); exit(ret); } diff --git a/lib/testrunner b/lib/testrunner index 6d9e1ea..d0114d8 100755 --- a/lib/testrunner +++ b/lib/testrunner @@ -250,13 +250,16 @@ if [[ $tests = "none" ]]; then exit 0 fi -if [[ $ktest_exit_on_success = 0 && $ktest_interactive = 0 ]]; then - ( - sleep $ktest_timeout - echo "TEST TIMEOUT - triggering crash" - echo c > /proc/sysrq-trigger - ) & -fi + +# For now, timeouts/watchdogs are only handled in lib/qemu-wrapper.c: + +#if [[ $ktest_exit_on_success = 0 && $ktest_interactive = 0 ]]; then +# ( +# sleep $ktest_timeout +# echo "TEST TIMEOUT - triggering crash" +# echo c > /proc/sysrq-trigger +# ) & +#fi trap 'pkill -P $$ >/dev/null' EXIT cd /root |