diff options
Diffstat (limited to 'tools/testing/selftests')
27 files changed, 1406 insertions, 165 deletions
diff --git a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c index a1b139888048..511ac634eef0 100644 --- a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c +++ b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c @@ -15,7 +15,7 @@ long total_entries = 0; #define ENTRY_CNT 32 struct perf_branch_entry entries[ENTRY_CNT] = {}; -static inline bool in_range(__u64 val) +static inline bool gbs_in_range(__u64 val) { return (val >= address_low) && (val < address_high); } @@ -31,7 +31,7 @@ int BPF_PROG(test1, int n, int ret) for (i = 0; i < ENTRY_CNT; i++) { if (i >= total_entries) break; - if (in_range(entries[i].from) && in_range(entries[i].to)) + if (gbs_in_range(entries[i].from) && gbs_in_range(entries[i].to)) test1_hits++; else if (!test1_hits) wasted_entries++; diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index c4a57e69f749..4d556df4f77b 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -5,4 +5,5 @@ test_freezer test_kmem test_kill test_cpu +test_zswap wait_inotify diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 3d263747d2ad..27dbdd7bb4bb 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -12,6 +12,7 @@ TEST_GEN_PROGS += test_core TEST_GEN_PROGS += test_freezer TEST_GEN_PROGS += test_kill TEST_GEN_PROGS += test_cpu +TEST_GEN_PROGS += test_zswap LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h @@ -23,3 +24,4 @@ $(OUTPUT)/test_core: cgroup_util.c $(OUTPUT)/test_freezer: cgroup_util.c $(OUTPUT)/test_kill: cgroup_util.c $(OUTPUT)/test_cpu: cgroup_util.c +$(OUTPUT)/test_zswap: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index ed2e50bb1e76..c82f974b85c9 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -162,11 +162,11 @@ static int cg_run_in_subcgroups(const char *parent, * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS * threads. Then it checks the sanity of numbers on the parent level: * the total size of the cgroups should be roughly equal to - * anon + file + slab + kernel_stack. + * anon + file + kernel + sock. */ static int test_kmem_memcg_deletion(const char *root) { - long current, slab, anon, file, kernel_stack, pagetables, percpu, sock, sum; + long current, anon, file, kernel, sock, sum; int ret = KSFT_FAIL; char *parent; @@ -184,29 +184,22 @@ static int test_kmem_memcg_deletion(const char *root) goto cleanup; current = cg_read_long(parent, "memory.current"); - slab = cg_read_key_long(parent, "memory.stat", "slab "); anon = cg_read_key_long(parent, "memory.stat", "anon "); file = cg_read_key_long(parent, "memory.stat", "file "); - kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack "); - pagetables = cg_read_key_long(parent, "memory.stat", "pagetables "); - percpu = cg_read_key_long(parent, "memory.stat", "percpu "); + kernel = cg_read_key_long(parent, "memory.stat", "kernel "); sock = cg_read_key_long(parent, "memory.stat", "sock "); - if (current < 0 || slab < 0 || anon < 0 || file < 0 || - kernel_stack < 0 || pagetables < 0 || percpu < 0 || sock < 0) + if (current < 0 || anon < 0 || file < 0 || kernel < 0 || sock < 0) goto cleanup; - sum = slab + anon + file + kernel_stack + pagetables + percpu + sock; + sum = anon + file + kernel + sock; if (abs(sum - current) < MAX_VMSTAT_ERROR) { ret = KSFT_PASS; } else { printf("memory.current = %ld\n", current); - printf("slab + anon + file + kernel_stack = %ld\n", sum); - printf("slab = %ld\n", slab); + printf("anon + file + kernel + sock = %ld\n", sum); printf("anon = %ld\n", anon); printf("file = %ld\n", file); - printf("kernel_stack = %ld\n", kernel_stack); - printf("pagetables = %ld\n", pagetables); - printf("percpu = %ld\n", percpu); + printf("kernel = %ld\n", kernel); printf("sock = %ld\n", sock); } diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c new file mode 100644 index 000000000000..49def87a909b --- /dev/null +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <linux/limits.h> +#include <unistd.h> +#include <stdio.h> +#include <signal.h> +#include <sys/sysinfo.h> +#include <string.h> +#include <sys/wait.h> +#include <sys/mman.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + +static int read_int(const char *path, size_t *value) +{ + FILE *file; + int ret = 0; + + file = fopen(path, "r"); + if (!file) + return -1; + if (fscanf(file, "%ld", value) != 1) + ret = -1; + fclose(file); + return ret; +} + +static int set_min_free_kb(size_t value) +{ + FILE *file; + int ret; + + file = fopen("/proc/sys/vm/min_free_kbytes", "w"); + if (!file) + return -1; + ret = fprintf(file, "%ld\n", value); + fclose(file); + return ret; +} + +static int read_min_free_kb(size_t *value) +{ + return read_int("/proc/sys/vm/min_free_kbytes", value); +} + +static int get_zswap_stored_pages(size_t *value) +{ + return read_int("/sys/kernel/debug/zswap/stored_pages", value); +} + +static int get_zswap_written_back_pages(size_t *value) +{ + return read_int("/sys/kernel/debug/zswap/written_back_pages", value); +} + +static int allocate_bytes(const char *cgroup, void *arg) +{ + size_t size = (size_t)arg; + char *mem = (char *)malloc(size); + + if (!mem) + return -1; + for (int i = 0; i < size; i += 4095) + mem[i] = 'a'; + free(mem); + return 0; +} + +/* + * When trying to store a memcg page in zswap, if the memcg hits its memory + * limit in zswap, writeback should not be triggered. + * + * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may + * not zswap"). Needs to be revised when a per memcg writeback mechanism is + * implemented. + */ +static int test_no_invasive_cgroup_shrink(const char *root) +{ + size_t written_back_before, written_back_after; + int ret = KSFT_FAIL; + char *test_group; + + /* Set up */ + test_group = cg_name(root, "no_shrink_test"); + if (!test_group) + goto out; + if (cg_create(test_group)) + goto out; + if (cg_write(test_group, "memory.max", "1M")) + goto out; + if (cg_write(test_group, "memory.zswap.max", "10K")) + goto out; + if (get_zswap_written_back_pages(&written_back_before)) + goto out; + + /* Allocate 10x memory.max to push memory into zswap */ + if (cg_run(test_group, allocate_bytes, (void *)MB(10))) + goto out; + + /* Verify that no writeback happened because of the memcg allocation */ + if (get_zswap_written_back_pages(&written_back_after)) + goto out; + if (written_back_after == written_back_before) + ret = KSFT_PASS; +out: + cg_destroy(test_group); + free(test_group); + return ret; +} + +struct no_kmem_bypass_child_args { + size_t target_alloc_bytes; + size_t child_allocated; +}; + +static int no_kmem_bypass_child(const char *cgroup, void *arg) +{ + struct no_kmem_bypass_child_args *values = arg; + void *allocation; + + allocation = malloc(values->target_alloc_bytes); + if (!allocation) { + values->child_allocated = true; + return -1; + } + for (long i = 0; i < values->target_alloc_bytes; i += 4095) + ((char *)allocation)[i] = 'a'; + values->child_allocated = true; + pause(); + free(allocation); + return 0; +} + +/* + * When pages owned by a memcg are pushed to zswap by kswapd, they should be + * charged to that cgroup. This wasn't the case before commit + * cd08d80ecdac("mm: correctly charge compressed memory to its memcg"). + * + * The test first allocates memory in a memcg, then raises min_free_kbytes to + * a very high value so that the allocation falls below low wm, then makes + * another allocation to trigger kswapd that should push the memcg-owned pages + * to zswap and verifies that the zswap pages are correctly charged. + * + * To be run on a VM with at most 4G of memory. + */ +static int test_no_kmem_bypass(const char *root) +{ + size_t min_free_kb_high, min_free_kb_low, min_free_kb_original; + struct no_kmem_bypass_child_args *values; + size_t trigger_allocation_size; + int wait_child_iteration = 0; + long stored_pages_threshold; + struct sysinfo sys_info; + int ret = KSFT_FAIL; + int child_status; + char *test_group; + pid_t child_pid; + + /* Read sys info and compute test values accordingly */ + if (sysinfo(&sys_info) != 0) + return KSFT_FAIL; + if (sys_info.totalram > 5000000000) + return KSFT_SKIP; + values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ | + PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (values == MAP_FAILED) + return KSFT_FAIL; + if (read_min_free_kb(&min_free_kb_original)) + return KSFT_FAIL; + min_free_kb_high = sys_info.totalram / 2000; + min_free_kb_low = sys_info.totalram / 500000; + values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) + + sys_info.totalram * 5 / 100; + stored_pages_threshold = sys_info.totalram / 5 / 4096; + trigger_allocation_size = sys_info.totalram / 20; + + /* Set up test memcg */ + if (cg_write(root, "cgroup.subtree_control", "+memory")) + goto out; + test_group = cg_name(root, "kmem_bypass_test"); + if (!test_group) + goto out; + + /* Spawn memcg child and wait for it to allocate */ + set_min_free_kb(min_free_kb_low); + if (cg_create(test_group)) + goto out; + values->child_allocated = false; + child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values); + if (child_pid < 0) + goto out; + while (!values->child_allocated && wait_child_iteration++ < 10000) + usleep(1000); + + /* Try to wakeup kswapd and let it push child memory to zswap */ + set_min_free_kb(min_free_kb_high); + for (int i = 0; i < 20; i++) { + size_t stored_pages; + char *trigger_allocation = malloc(trigger_allocation_size); + + if (!trigger_allocation) + break; + for (int i = 0; i < trigger_allocation_size; i += 4095) + trigger_allocation[i] = 'b'; + usleep(100000); + free(trigger_allocation); + if (get_zswap_stored_pages(&stored_pages)) + break; + if (stored_pages < 0) + break; + /* If memory was pushed to zswap, verify it belongs to memcg */ + if (stored_pages > stored_pages_threshold) { + int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped "); + int delta = stored_pages * 4096 - zswapped; + int result_ok = delta < stored_pages * 4096 / 4; + + ret = result_ok ? KSFT_PASS : KSFT_FAIL; + break; + } + } + + kill(child_pid, SIGTERM); + waitpid(child_pid, &child_status, 0); +out: + set_min_free_kb(min_free_kb_original); + cg_destroy(test_group); + free(test_group); + return ret; +} + +#define T(x) { x, #x } +struct zswap_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_no_kmem_bypass), + T(test_no_invasive_cgroup_shrink), +}; +#undef T + +static bool zswap_configured(void) +{ + return access("/sys/module/zswap", F_OK) == 0; +} + +int main(int argc, char **argv) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + if (!zswap_configured()) + ksft_exit_skip("zswap isn't configured\n"); + + /* + * Check that memory controller is available: + * memory is listed in cgroup.controllers + */ + if (cg_read_strstr(root, "cgroup.controllers", "memory")) + ksft_exit_skip("memory controller isn't available\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index bcd4734ca094..60a9a305aef0 100644 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -84,6 +84,7 @@ test_tried_regions() { tried_regions_dir=$1 ensure_dir "$tried_regions_dir" "exist" + ensure_file "$tried_regions_dir/total_bytes" "exist" "400" } test_stats() @@ -102,9 +103,14 @@ test_filter() ensure_file "$filter_dir/type" "exist" "600" ensure_write_succ "$filter_dir/type" "anon" "valid input" ensure_write_succ "$filter_dir/type" "memcg" "valid input" + ensure_write_succ "$filter_dir/type" "addr" "valid input" + ensure_write_succ "$filter_dir/type" "target" "valid input" ensure_write_fail "$filter_dir/type" "foo" "invalid input" ensure_file "$filter_dir/matching" "exist" "600" ensure_file "$filter_dir/memcg_path" "exist" "600" + ensure_file "$filter_dir/addr_start" "exist" "600" + ensure_file "$filter_dir/addr_end" "exist" "600" + ensure_file "$filter_dir/damon_target_idx" "exist" "600" } test_filters() diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 829be379545a..529d29a35900 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -113,6 +113,15 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; } static inline void ksft_print_header(void) { + /* + * Force line buffering; If stdout is not connected to a terminal, it + * will otherwise default to fully buffered, which can cause output + * duplication if there is content in the buffer when fork()ing. If + * there is a crash, line buffering also means the most recent output + * line will be visible. + */ + setvbuf(stdout, NULL, _IOLBF, 0); + if (!(getenv("KSFT_TAP_LEVEL"))) printf("TAP version 13\n"); } diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 1c952d1401d4..261c73cab41b 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -105,15 +105,18 @@ run_one() echo "# Warning: file $TEST is missing!" echo "not ok $test_num $TEST_HDR_MSG" else + if [ -x /usr/bin/stdbuf ]; then + stdbuf="/usr/bin/stdbuf --output=L " + fi eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}" - cmd="./$BASENAME_TEST $kselftest_cmd_args" + cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args" if [ ! -x "$TEST" ]; then echo "# Warning: file $TEST is not executable" if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ] then interpreter=$(head -n 1 "$TEST" | cut -c 3-) - cmd="$interpreter ./$BASENAME_TEST" + cmd="$stdbuf $interpreter ./$BASENAME_TEST" else echo "not ok $test_num $TEST_HDR_MSG" return diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index dba0e8ba002f..3df008677239 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -18,6 +18,7 @@ #include <sys/syscall.h> #include <sys/wait.h> #include <unistd.h> +#include <ctype.h> #include "common.h" @@ -43,7 +44,6 @@ */ static size_t mfd_def_size = MFD_DEF_SIZE; static const char *memfd_str = MEMFD_STR; -static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *)); static int newpid_thread_fn2(void *arg); static void join_newpid_thread(pid_t pid); @@ -96,12 +96,12 @@ static void sysctl_assert_write(const char *val) int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC); if (fd < 0) { - printf("open sysctl failed\n"); + printf("open sysctl failed: %m\n"); abort(); } if (write(fd, val, strlen(val)) < 0) { - printf("write sysctl failed\n"); + printf("write sysctl %s failed: %m\n", val); abort(); } } @@ -111,7 +111,7 @@ static void sysctl_fail_write(const char *val) int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC); if (fd < 0) { - printf("open sysctl failed\n"); + printf("open sysctl failed: %m\n"); abort(); } @@ -122,6 +122,33 @@ static void sysctl_fail_write(const char *val) } } +static void sysctl_assert_equal(const char *val) +{ + char *p, buf[128] = {}; + int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC); + + if (fd < 0) { + printf("open sysctl failed: %m\n"); + abort(); + } + + if (read(fd, buf, sizeof(buf)) < 0) { + printf("read sysctl failed: %m\n"); + abort(); + } + + /* Strip trailing whitespace. */ + p = buf; + while (!isspace(*p)) + p++; + *p = '\0'; + + if (strcmp(buf, val) != 0) { + printf("unexpected sysctl value: expected %s, got %s\n", val, buf); + abort(); + } +} + static int mfd_assert_reopen_fd(int fd_in) { int fd; @@ -736,7 +763,7 @@ static int idle_thread_fn(void *arg) return 0; } -static pid_t spawn_idle_thread(unsigned int flags) +static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg) { uint8_t *stack; pid_t pid; @@ -747,10 +774,7 @@ static pid_t spawn_idle_thread(unsigned int flags) abort(); } - pid = clone(idle_thread_fn, - stack + STACK_SIZE, - SIGCHLD | flags, - NULL); + pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg); if (pid < 0) { printf("clone() failed: %m\n"); abort(); @@ -759,6 +783,33 @@ static pid_t spawn_idle_thread(unsigned int flags) return pid; } +static void join_thread(pid_t pid) +{ + int wstatus; + + if (waitpid(pid, &wstatus, 0) < 0) { + printf("newpid thread: waitpid() failed: %m\n"); + abort(); + } + + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) { + printf("newpid thread: exited with non-zero error code %d\n", + WEXITSTATUS(wstatus)); + abort(); + } + + if (WIFSIGNALED(wstatus)) { + printf("newpid thread: killed by signal %d\n", + WTERMSIG(wstatus)); + abort(); + } +} + +static pid_t spawn_idle_thread(unsigned int flags) +{ + return spawn_thread(flags, idle_thread_fn, NULL); +} + static void join_idle_thread(pid_t pid) { kill(pid, SIGTERM); @@ -1111,109 +1162,260 @@ static void test_noexec_seal(void) close(fd); } -static void test_sysctl_child(void) +static void test_sysctl_sysctl0(void) { int fd; - int pid; - printf("%s sysctl 0\n", memfd_str); - sysctl_assert_write("0"); - fd = mfd_assert_new("kern_memfd_sysctl_0", + sysctl_assert_equal("0"); + + fd = mfd_assert_new("kern_memfd_sysctl_0_dfl", mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); - mfd_assert_mode(fd, 0777); mfd_assert_has_seals(fd, 0); mfd_assert_chmod(fd, 0644); close(fd); +} - printf("%s sysctl 1\n", memfd_str); - sysctl_assert_write("1"); - fd = mfd_assert_new("kern_memfd_sysctl_1", +static void test_sysctl_set_sysctl0(void) +{ + sysctl_assert_write("0"); + test_sysctl_sysctl0(); +} + +static void test_sysctl_sysctl1(void) +{ + int fd; + + sysctl_assert_equal("1"); + + fd = mfd_assert_new("kern_memfd_sysctl_1_dfl", mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); + mfd_assert_mode(fd, 0666); + mfd_assert_has_seals(fd, F_SEAL_EXEC); + mfd_fail_chmod(fd, 0777); + close(fd); - printf("%s child ns\n", memfd_str); - pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn2); - join_newpid_thread(pid); + fd = mfd_assert_new("kern_memfd_sysctl_1_exec", + mfd_def_size, + MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING); + mfd_assert_mode(fd, 0777); + mfd_assert_has_seals(fd, 0); + mfd_assert_chmod(fd, 0644); + close(fd); + fd = mfd_assert_new("kern_memfd_sysctl_1_noexec", + mfd_def_size, + MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING); mfd_assert_mode(fd, 0666); mfd_assert_has_seals(fd, F_SEAL_EXEC); mfd_fail_chmod(fd, 0777); - sysctl_fail_write("0"); close(fd); - - printf("%s sysctl 2\n", memfd_str); - sysctl_assert_write("2"); - mfd_fail_new("kern_memfd_sysctl_2", - MFD_CLOEXEC | MFD_ALLOW_SEALING); - sysctl_fail_write("0"); - sysctl_fail_write("1"); } -static int newpid_thread_fn(void *arg) +static void test_sysctl_set_sysctl1(void) { - test_sysctl_child(); - return 0; + sysctl_assert_write("1"); + test_sysctl_sysctl1(); } -static void test_sysctl_child2(void) +static void test_sysctl_sysctl2(void) { int fd; - sysctl_fail_write("0"); - fd = mfd_assert_new("kern_memfd_sysctl_1", + sysctl_assert_equal("2"); + + fd = mfd_assert_new("kern_memfd_sysctl_2_dfl", mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); + mfd_assert_mode(fd, 0666); + mfd_assert_has_seals(fd, F_SEAL_EXEC); + mfd_fail_chmod(fd, 0777); + close(fd); + + mfd_fail_new("kern_memfd_sysctl_2_exec", + MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING); + fd = mfd_assert_new("kern_memfd_sysctl_2_noexec", + mfd_def_size, + MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING); mfd_assert_mode(fd, 0666); mfd_assert_has_seals(fd, F_SEAL_EXEC); mfd_fail_chmod(fd, 0777); close(fd); } -static int newpid_thread_fn2(void *arg) +static void test_sysctl_set_sysctl2(void) +{ + sysctl_assert_write("2"); + test_sysctl_sysctl2(); +} + +static int sysctl_simple_child(void *arg) +{ + int fd; + int pid; + + printf("%s sysctl 0\n", memfd_str); + test_sysctl_set_sysctl0(); + + printf("%s sysctl 1\n", memfd_str); + test_sysctl_set_sysctl1(); + + printf("%s sysctl 0\n", memfd_str); + test_sysctl_set_sysctl0(); + + printf("%s sysctl 2\n", memfd_str); + test_sysctl_set_sysctl2(); + + printf("%s sysctl 1\n", memfd_str); + test_sysctl_set_sysctl1(); + + printf("%s sysctl 0\n", memfd_str); + test_sysctl_set_sysctl0(); + + return 0; +} + +/* + * Test sysctl + * A very basic test to make sure the core sysctl semantics work. + */ +static void test_sysctl_simple(void) +{ + int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL); + + join_thread(pid); +} + +static int sysctl_nested(void *arg) { - test_sysctl_child2(); + void (*fn)(void) = arg; + + fn(); return 0; } -static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *)) + +static int sysctl_nested_wait(void *arg) { - uint8_t *stack; - pid_t pid; + /* Wait for a SIGCONT. */ + kill(getpid(), SIGSTOP); + return sysctl_nested(arg); +} - stack = malloc(STACK_SIZE); - if (!stack) { - printf("malloc(STACK_SIZE) failed: %m\n"); - abort(); - } +static void test_sysctl_sysctl1_failset(void) +{ + sysctl_fail_write("0"); + test_sysctl_sysctl1(); +} - pid = clone(fn, - stack + STACK_SIZE, - SIGCHLD | flags, - NULL); - if (pid < 0) { - printf("clone() failed: %m\n"); - abort(); - } +static void test_sysctl_sysctl2_failset(void) +{ + sysctl_fail_write("1"); + test_sysctl_sysctl2(); - return pid; + sysctl_fail_write("0"); + test_sysctl_sysctl2(); } -static void join_newpid_thread(pid_t pid) +static int sysctl_nested_child(void *arg) { - waitpid(pid, NULL, 0); + int fd; + int pid; + + printf("%s nested sysctl 0\n", memfd_str); + sysctl_assert_write("0"); + /* A further nested pidns works the same. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL); + join_thread(pid); + + printf("%s nested sysctl 1\n", memfd_str); + sysctl_assert_write("1"); + /* Child inherits our setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1); + join_thread(pid); + /* Child cannot raise the setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, + test_sysctl_sysctl1_failset); + join_thread(pid); + /* Child can lower the setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, + test_sysctl_set_sysctl2); + join_thread(pid); + /* Child lowering the setting has no effect on our setting. */ + test_sysctl_sysctl1(); + + printf("%s nested sysctl 2\n", memfd_str); + sysctl_assert_write("2"); + /* Child inherits our setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2); + join_thread(pid); + /* Child cannot raise the setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, + test_sysctl_sysctl2_failset); + join_thread(pid); + + /* Verify that the rules are actually inherited after fork. */ + printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str); + sysctl_assert_write("0"); + + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl1_failset); + sysctl_assert_write("1"); + kill(pid, SIGCONT); + join_thread(pid); + + printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str); + sysctl_assert_write("0"); + + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl2_failset); + sysctl_assert_write("2"); + kill(pid, SIGCONT); + join_thread(pid); + + /* + * Verify that the current effective setting is saved on fork, meaning + * that the parent lowering the sysctl doesn't affect already-forked + * children. + */ + printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str); + sysctl_assert_write("2"); + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl2); + sysctl_assert_write("1"); + kill(pid, SIGCONT); + join_thread(pid); + + printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str); + sysctl_assert_write("2"); + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl2); + sysctl_assert_write("0"); + kill(pid, SIGCONT); + join_thread(pid); + + printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str); + sysctl_assert_write("1"); + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl1); + sysctl_assert_write("0"); + kill(pid, SIGCONT); + join_thread(pid); + + return 0; } /* - * Test sysctl - * A very basic sealing test to see whether setting/retrieving seals works. + * Test sysctl with nested pid namespaces + * Make sure that the sysctl nesting semantics work correctly. */ -static void test_sysctl(void) +static void test_sysctl_nested(void) { - int pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn); + int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL); - join_newpid_thread(pid); + join_thread(pid); } /* @@ -1399,6 +1601,9 @@ int main(int argc, char **argv) test_seal_grow(); test_seal_resize(); + test_sysctl_simple(); + test_sysctl_nested(); + test_share_dup("SHARE-DUP", ""); test_share_mmap("SHARE-MMAP", ""); test_share_open("SHARE-OPEN", ""); @@ -1413,8 +1618,6 @@ int main(int argc, char **argv) test_share_fork("SHARE-FORK", SHARED_FT_STR); join_idle_thread(pid); - test_sysctl(); - printf("memfd: DONE\n"); return 0; diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 7e2a982383c0..cdc9ce4426b9 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -5,6 +5,7 @@ hugepage-mremap hugepage-shm hugepage-vmemmap hugetlb-madvise +hugetlb-read-hwpoison khugepaged map_hugetlb map_populate diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 66d7c07dc177..6a9fc5693145 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -35,39 +35,43 @@ MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) LDLIBS = -lrt -lpthread -TEST_GEN_PROGS = cow -TEST_GEN_PROGS += compaction_test -TEST_GEN_PROGS += gup_longterm -TEST_GEN_PROGS += gup_test -TEST_GEN_PROGS += hmm-tests -TEST_GEN_PROGS += hugetlb-madvise -TEST_GEN_PROGS += hugepage-mmap -TEST_GEN_PROGS += hugepage-mremap -TEST_GEN_PROGS += hugepage-shm -TEST_GEN_PROGS += hugepage-vmemmap -TEST_GEN_PROGS += khugepaged -TEST_GEN_PROGS += madv_populate -TEST_GEN_PROGS += map_fixed_noreplace -TEST_GEN_PROGS += map_hugetlb -TEST_GEN_PROGS += map_populate -TEST_GEN_PROGS += memfd_secret -TEST_GEN_PROGS += migration -TEST_GEN_PROGS += mkdirty -TEST_GEN_PROGS += mlock-random-test -TEST_GEN_PROGS += mlock2-tests -TEST_GEN_PROGS += mrelease_test -TEST_GEN_PROGS += mremap_dontunmap -TEST_GEN_PROGS += mremap_test -TEST_GEN_PROGS += on-fault-limit -TEST_GEN_PROGS += thuge-gen -TEST_GEN_PROGS += transhuge-stress -TEST_GEN_PROGS += uffd-stress -TEST_GEN_PROGS += uffd-unit-tests +TEST_GEN_FILES = cow +TEST_GEN_FILES += compaction_test +TEST_GEN_FILES += gup_longterm +TEST_GEN_FILES += gup_test +TEST_GEN_FILES += hmm-tests +TEST_GEN_FILES += hugetlb-madvise +TEST_GEN_FILES += hugetlb-read-hwpoison +TEST_GEN_FILES += hugepage-mmap +TEST_GEN_FILES += hugepage-mremap +TEST_GEN_FILES += hugepage-shm +TEST_GEN_FILES += hugepage-vmemmap +TEST_GEN_FILES += khugepaged +TEST_GEN_FILES += madv_populate +TEST_GEN_FILES += map_fixed_noreplace +TEST_GEN_FILES += map_hugetlb +TEST_GEN_FILES += map_populate +TEST_GEN_FILES += memfd_secret +TEST_GEN_FILES += migration +TEST_GEN_FILES += mkdirty +TEST_GEN_FILES += mlock-random-test +TEST_GEN_FILES += mlock2-tests +TEST_GEN_FILES += mrelease_test +TEST_GEN_FILES += mremap_dontunmap +TEST_GEN_FILES += mremap_test +TEST_GEN_FILES += on-fault-limit +TEST_GEN_FILES += thuge-gen +TEST_GEN_FILES += transhuge-stress +TEST_GEN_FILES += uffd-stress +TEST_GEN_FILES += uffd-unit-tests +TEST_GEN_FILES += split_huge_page_test +TEST_GEN_FILES += ksm_tests +TEST_GEN_FILES += ksm_functional_tests +TEST_GEN_FILES += mdwe_test + +ifneq ($(ARCH),arm64) TEST_GEN_PROGS += soft-dirty -TEST_GEN_PROGS += split_huge_page_test -TEST_GEN_PROGS += ksm_tests -TEST_GEN_PROGS += ksm_functional_tests -TEST_GEN_PROGS += mdwe_test +endif ifeq ($(ARCH),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32) @@ -83,24 +87,24 @@ CFLAGS += -no-pie endif ifeq ($(CAN_BUILD_I386),1) -TEST_GEN_PROGS += $(BINARIES_32) +TEST_GEN_FILES += $(BINARIES_32) endif ifeq ($(CAN_BUILD_X86_64),1) -TEST_GEN_PROGS += $(BINARIES_64) +TEST_GEN_FILES += $(BINARIES_64) endif else ifneq (,$(findstring $(ARCH),ppc64)) -TEST_GEN_PROGS += protection_keys +TEST_GEN_FILES += protection_keys endif endif ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64)) -TEST_GEN_PROGS += va_high_addr_switch -TEST_GEN_PROGS += virtual_address_range -TEST_GEN_PROGS += write_to_hugetlbfs +TEST_GEN_FILES += va_high_addr_switch +TEST_GEN_FILES += virtual_address_range +TEST_GEN_FILES += write_to_hugetlbfs endif TEST_PROGS := run_vmtests.sh @@ -112,6 +116,7 @@ TEST_FILES += va_high_addr_switch.sh include ../lib.mk $(TEST_GEN_PROGS): vm_util.c +$(TEST_GEN_FILES): vm_util.c $(OUTPUT)/uffd-stress: uffd-common.c $(OUTPUT)/uffd-unit-tests: uffd-common.c diff --git a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c new file mode 100644 index 000000000000..ba6cc6f9cabc --- /dev/null +++ b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <linux/magic.h> +#include <sys/mman.h> +#include <sys/statfs.h> +#include <errno.h> +#include <stdbool.h> + +#include "../kselftest.h" + +#define PREFIX " ... " +#define ERROR_PREFIX " !!! " + +#define MAX_WRITE_READ_CHUNK_SIZE (getpagesize() * 16) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +enum test_status { + TEST_PASSED = 0, + TEST_FAILED = 1, + TEST_SKIPPED = 2, +}; + +static char *status_to_str(enum test_status status) +{ + switch (status) { + case TEST_PASSED: + return "TEST_PASSED"; + case TEST_FAILED: + return "TEST_FAILED"; + case TEST_SKIPPED: + return "TEST_SKIPPED"; + default: + return "TEST_???"; + } +} + +static int setup_filemap(char *filemap, size_t len, size_t wr_chunk_size) +{ + char iter = 0; + + for (size_t offset = 0; offset < len; + offset += wr_chunk_size) { + iter++; + memset(filemap + offset, iter, wr_chunk_size); + } + + return 0; +} + +static bool verify_chunk(char *buf, size_t len, char val) +{ + size_t i; + + for (i = 0; i < len; ++i) { + if (buf[i] != val) { + printf(PREFIX ERROR_PREFIX "check fail: buf[%lu] = %u != %u\n", + i, buf[i], val); + return false; + } + } + + return true; +} + +static bool seek_read_hugepage_filemap(int fd, size_t len, size_t wr_chunk_size, + off_t offset, size_t expected) +{ + char buf[MAX_WRITE_READ_CHUNK_SIZE]; + ssize_t ret_count = 0; + ssize_t total_ret_count = 0; + char val = offset / wr_chunk_size + offset % wr_chunk_size; + + printf(PREFIX PREFIX "init val=%u with offset=0x%lx\n", val, offset); + printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n", + expected); + if (lseek(fd, offset, SEEK_SET) < 0) { + perror(PREFIX ERROR_PREFIX "seek failed"); + return false; + } + + while (offset + total_ret_count < len) { + ret_count = read(fd, buf, wr_chunk_size); + if (ret_count == 0) { + printf(PREFIX PREFIX "read reach end of the file\n"); + break; + } else if (ret_count < 0) { + perror(PREFIX ERROR_PREFIX "read failed"); + break; + } + ++val; + if (!verify_chunk(buf, ret_count, val)) + return false; + + total_ret_count += ret_count; + } + printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n", + total_ret_count); + + return total_ret_count == expected; +} + +static bool read_hugepage_filemap(int fd, size_t len, + size_t wr_chunk_size, size_t expected) +{ + char buf[MAX_WRITE_READ_CHUNK_SIZE]; + ssize_t ret_count = 0; + ssize_t total_ret_count = 0; + char val = 0; + + printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n", + expected); + while (total_ret_count < len) { + ret_count = read(fd, buf, wr_chunk_size); + if (ret_count == 0) { + printf(PREFIX PREFIX "read reach end of the file\n"); + break; + } else if (ret_count < 0) { + perror(PREFIX ERROR_PREFIX "read failed"); + break; + } + ++val; + if (!verify_chunk(buf, ret_count, val)) + return false; + + total_ret_count += ret_count; + } + printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n", + total_ret_count); + + return total_ret_count == expected; +} + +static enum test_status +test_hugetlb_read(int fd, size_t len, size_t wr_chunk_size) +{ + enum test_status status = TEST_SKIPPED; + char *filemap = NULL; + + if (ftruncate(fd, len) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate failed"); + return status; + } + + filemap = mmap(NULL, len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (filemap == MAP_FAILED) { + perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed"); + goto done; + } + + setup_filemap(filemap, len, wr_chunk_size); + status = TEST_FAILED; + + if (read_hugepage_filemap(fd, len, wr_chunk_size, len)) + status = TEST_PASSED; + + munmap(filemap, len); +done: + if (ftruncate(fd, 0) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed"); + status = TEST_FAILED; + } + + return status; +} + +static enum test_status +test_hugetlb_read_hwpoison(int fd, size_t len, size_t wr_chunk_size, + bool skip_hwpoison_page) +{ + enum test_status status = TEST_SKIPPED; + char *filemap = NULL; + char *hwp_addr = NULL; + const unsigned long pagesize = getpagesize(); + + if (ftruncate(fd, len) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate failed"); + return status; + } + + filemap = mmap(NULL, len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (filemap == MAP_FAILED) { + perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed"); + goto done; + } + + setup_filemap(filemap, len, wr_chunk_size); + status = TEST_FAILED; + + /* + * Poisoned hugetlb page layout (assume hugepagesize=2MB): + * |<---------------------- 1MB ---------------------->| + * |<---- healthy page ---->|<---- HWPOISON page ----->| + * |<------------------- (1MB - 8KB) ----------------->| + */ + hwp_addr = filemap + len / 2 + pagesize; + if (madvise(hwp_addr, pagesize, MADV_HWPOISON) < 0) { + perror(PREFIX ERROR_PREFIX "MADV_HWPOISON failed"); + goto unmap; + } + + if (!skip_hwpoison_page) { + /* + * Userspace should be able to read (1MB + 1 page) from + * the beginning of the HWPOISONed hugepage. + */ + if (read_hugepage_filemap(fd, len, wr_chunk_size, + len / 2 + pagesize)) + status = TEST_PASSED; + } else { + /* + * Userspace should be able to read (1MB - 2 pages) from + * HWPOISONed hugepage. + */ + if (seek_read_hugepage_filemap(fd, len, wr_chunk_size, + len / 2 + MAX(2 * pagesize, wr_chunk_size), + len / 2 - MAX(2 * pagesize, wr_chunk_size))) + status = TEST_PASSED; + } + +unmap: + munmap(filemap, len); +done: + if (ftruncate(fd, 0) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed"); + status = TEST_FAILED; + } + + return status; +} + +static int create_hugetlbfs_file(struct statfs *file_stat) +{ + int fd; + + fd = memfd_create("hugetlb_tmp", MFD_HUGETLB); + if (fd < 0) { + perror(PREFIX ERROR_PREFIX "could not open hugetlbfs file"); + return -1; + } + + memset(file_stat, 0, sizeof(*file_stat)); + if (fstatfs(fd, file_stat)) { + perror(PREFIX ERROR_PREFIX "fstatfs failed"); + goto close; + } + if (file_stat->f_type != HUGETLBFS_MAGIC) { + printf(PREFIX ERROR_PREFIX "not hugetlbfs file\n"); + goto close; + } + + return fd; +close: + close(fd); + return -1; +} + +int main(void) +{ + int fd; + struct statfs file_stat; + enum test_status status; + /* Test read() in different granularity. */ + size_t wr_chunk_sizes[] = { + getpagesize() / 2, getpagesize(), + getpagesize() * 2, getpagesize() * 4 + }; + size_t i; + + for (i = 0; i < ARRAY_SIZE(wr_chunk_sizes); ++i) { + printf("Write/read chunk size=0x%lx\n", + wr_chunk_sizes[i]); + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + goto create_failure; + printf(PREFIX "HugeTLB read regression test...\n"); + status = test_hugetlb_read(fd, file_stat.f_bsize, + wr_chunk_sizes[i]); + printf(PREFIX "HugeTLB read regression test...%s\n", + status_to_str(status)); + close(fd); + if (status == TEST_FAILED) + return -1; + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + goto create_failure; + printf(PREFIX "HugeTLB read HWPOISON test...\n"); + status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize, + wr_chunk_sizes[i], false); + printf(PREFIX "HugeTLB read HWPOISON test...%s\n", + status_to_str(status)); + close(fd); + if (status == TEST_FAILED) + return -1; + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + goto create_failure; + printf(PREFIX "HugeTLB seek then read HWPOISON test...\n"); + status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize, + wr_chunk_sizes[i], true); + printf(PREFIX "HugeTLB seek then read HWPOISON test...%s\n", + status_to_str(status)); + close(fd); + if (status == TEST_FAILED) + return -1; + } + + return 0; + +create_failure: + printf(ERROR_PREFIX "Abort test: failed to create hugetlbfs file\n"); + return -1; +} diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 26853badae70..901e950f9138 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -27,8 +27,12 @@ #define KiB 1024u #define MiB (1024 * KiB) +static int mem_fd; static int ksm_fd; static int ksm_full_scans_fd; +static int proc_self_ksm_stat_fd; +static int proc_self_ksm_merging_pages_fd; +static int ksm_use_zero_pages_fd; static int pagemap_fd; static size_t pagesize; @@ -59,6 +63,49 @@ static bool range_maps_duplicates(char *addr, unsigned long size) return false; } +static long get_my_ksm_zero_pages(void) +{ + char buf[200]; + char *substr_ksm_zero; + size_t value_pos; + ssize_t read_size; + unsigned long my_ksm_zero_pages; + + if (!proc_self_ksm_stat_fd) + return 0; + + read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0); + if (read_size < 0) + return -errno; + + buf[read_size] = 0; + + substr_ksm_zero = strstr(buf, "ksm_zero_pages"); + if (!substr_ksm_zero) + return 0; + + value_pos = strcspn(substr_ksm_zero, "0123456789"); + my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10); + + return my_ksm_zero_pages; +} + +static long get_my_merging_pages(void) +{ + char buf[10]; + ssize_t ret; + + if (proc_self_ksm_merging_pages_fd < 0) + return proc_self_ksm_merging_pages_fd; + + ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + static long ksm_get_full_scans(void) { char buf[10]; @@ -91,11 +138,30 @@ static int ksm_merge(void) return 0; } -static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) +static int ksm_unmerge(void) +{ + if (write(ksm_fd, "2", 1) != 1) + return -errno; + return 0; +} + +static char *mmap_and_merge_range(char val, unsigned long size, int prot, + bool use_prctl) { char *map; int ret; + /* Stabilize accounting by disabling KSM completely. */ + if (ksm_unmerge()) { + ksft_test_result_fail("Disabling (unmerging) KSM failed\n"); + goto unmap; + } + + if (get_my_merging_pages() > 0) { + ksft_test_result_fail("Still pages merged\n"); + goto unmap; + } + map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (map == MAP_FAILED) { @@ -112,6 +178,11 @@ static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) /* Make sure each page contains the same values to merge them. */ memset(map, val, size); + if (mprotect(map, size, prot)) { + ksft_test_result_skip("mprotect() failed\n"); + goto unmap; + } + if (use_prctl) { ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); if (ret < 0 && errno == EINVAL) { @@ -131,6 +202,16 @@ static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) ksft_test_result_fail("Running KSM failed\n"); goto unmap; } + + /* + * Check if anything was merged at all. Ignore the zero page that is + * accounted differently (depending on kernel support). + */ + if (val && !get_my_merging_pages()) { + ksft_test_result_fail("No pages got merged\n"); + goto unmap; + } + return map; unmap: munmap(map, size); @@ -144,7 +225,7 @@ static void test_unmerge(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); if (map == MAP_FAILED) return; @@ -159,6 +240,70 @@ unmap: munmap(map, size); } +static void test_unmerge_zero_pages(void) +{ + const unsigned int size = 2 * MiB; + char *map; + unsigned int offs; + unsigned long pages_expected; + + ksft_print_msg("[RUN] %s\n", __func__); + + if (proc_self_ksm_stat_fd < 0) { + ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n"); + return; + } + if (ksm_use_zero_pages_fd < 0) { + ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); + return; + } + if (write(ksm_use_zero_pages_fd, "1", 1) != 1) { + ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); + return; + } + + /* Let KSM deduplicate zero pages. */ + map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false); + if (map == MAP_FAILED) + return; + + /* Check if ksm_zero_pages is updated correctly after KSM merging */ + pages_expected = size / pagesize; + if (pages_expected != get_my_ksm_zero_pages()) { + ksft_test_result_fail("'ksm_zero_pages' updated after merging\n"); + goto unmap; + } + + /* Try to unmerge half of the region */ + if (madvise(map, size / 2, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + /* Check if ksm_zero_pages is updated correctly after unmerging */ + pages_expected /= 2; + if (pages_expected != get_my_ksm_zero_pages()) { + ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n"); + goto unmap; + } + + /* Trigger unmerging of the other half by writing to the pages. */ + for (offs = size / 2; offs < size; offs += pagesize) + *((unsigned int *)&map[offs]) = offs; + + /* Now we should have no zeropages remaining. */ + if (get_my_ksm_zero_pages()) { + ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n"); + goto unmap; + } + + /* Check if ksm zero pages are really unmerged */ + ksft_test_result(!range_maps_duplicates(map, size), + "KSM zero pages were unmerged\n"); +unmap: + munmap(map, size); +} + static void test_unmerge_discarded(void) { const unsigned int size = 2 * MiB; @@ -166,7 +311,7 @@ static void test_unmerge_discarded(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); if (map == MAP_FAILED) return; @@ -198,7 +343,7 @@ static void test_unmerge_uffd_wp(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); if (map == MAP_FAILED) return; @@ -341,7 +486,7 @@ static void test_prctl_unmerge(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, true); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true); if (map == MAP_FAILED) return; @@ -356,9 +501,42 @@ unmap: munmap(map, size); } +static void test_prot_none(void) +{ + const unsigned int size = 2 * MiB; + char *map; + int i; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0x11, size, PROT_NONE, false); + if (map == MAP_FAILED) + goto unmap; + + /* Store a unique value in each page on one half using ptrace */ + for (i = 0; i < size / 2; i += pagesize) { + lseek(mem_fd, (uintptr_t) map + i, SEEK_SET); + if (write(mem_fd, &i, sizeof(i)) != sizeof(i)) { + ksft_test_result_fail("ptrace write failed\n"); + goto unmap; + } + } + + /* Trigger unsharing on the other half. */ + if (madvise(map + size / 2, size / 2, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +unmap: + munmap(map, size); +} + int main(int argc, char **argv) { - unsigned int tests = 5; + unsigned int tests = 7; int err; #ifdef __NR_userfaultfd @@ -370,6 +548,9 @@ int main(int argc, char **argv) pagesize = getpagesize(); + mem_fd = open("/proc/self/mem", O_RDWR); + if (mem_fd < 0) + ksft_exit_fail_msg("opening /proc/self/mem failed\n"); ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); if (ksm_fd < 0) ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); @@ -379,13 +560,20 @@ int main(int argc, char **argv) pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); + proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); + proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", + O_RDONLY); + ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); test_unmerge(); + test_unmerge_zero_pages(); test_unmerge_discarded(); #ifdef __NR_userfaultfd test_unmerge_uffd_wp(); #endif + test_prot_none(); + test_prctl(); test_prctl_fork(); test_prctl_unmerge(); diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c index 60547245e479..17bcb07f19f3 100644 --- a/tools/testing/selftests/mm/madv_populate.c +++ b/tools/testing/selftests/mm/madv_populate.c @@ -264,14 +264,35 @@ static void test_softdirty(void) munmap(addr, SIZE); } +static int system_has_softdirty(void) +{ + /* + * There is no way to check if the kernel supports soft-dirty, other + * than by writing to a page and seeing if the bit was set. But the + * tests are intended to check that the bit gets set when it should, so + * doing that check would turn a potentially legitimate fail into a + * skip. Fortunately, we know for sure that arm64 does not support + * soft-dirty. So for now, let's just use the arch as a corse guide. + */ +#if defined(__aarch64__) + return 0; +#else + return 1; +#endif +} + int main(int argc, char **argv) { + int nr_tests = 16; int err; pagesize = getpagesize(); + if (system_has_softdirty()) + nr_tests += 5; + ksft_print_header(); - ksft_set_plan(21); + ksft_set_plan(nr_tests); sense_support(); test_prot_read(); @@ -279,7 +300,8 @@ int main(int argc, char **argv) test_holes(); test_populate_read(); test_populate_write(); - test_softdirty(); + if (system_has_softdirty()) + test_softdirty(); err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c index 240f2d9dae7a..7945d0754875 100644 --- a/tools/testing/selftests/mm/map_populate.c +++ b/tools/testing/selftests/mm/map_populate.c @@ -77,7 +77,7 @@ int main(int argc, char **argv) unsigned long *smap; ftmp = tmpfile(); - BUG_ON(ftmp == 0, "tmpfile()"); + BUG_ON(!ftmp, "tmpfile()"); ret = ftruncate(fileno(ftmp), MMAP_SZ); BUG_ON(ret, "ftruncate()"); diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index 379581567f27..6908569ef406 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -10,12 +10,13 @@ #include <numa.h> #include <numaif.h> #include <sys/mman.h> +#include <sys/prctl.h> #include <sys/types.h> #include <signal.h> #include <time.h> #define TWOMEG (2<<20) -#define RUNTIME (60) +#define RUNTIME (20) #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) @@ -155,10 +156,15 @@ TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME) memset(ptr, 0xde, TWOMEG); for (i = 0; i < self->nthreads - 1; i++) { pid = fork(); - if (!pid) + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGHUP); + /* Parent may have died before prctl so check now. */ + if (getppid() == 1) + kill(getpid(), SIGHUP); access_mem(ptr); - else + } else { self->pids[i] = pid; + } } ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); diff --git a/tools/testing/selftests/mm/mrelease_test.c b/tools/testing/selftests/mm/mrelease_test.c index dca21042b679..d822004a374e 100644 --- a/tools/testing/selftests/mm/mrelease_test.c +++ b/tools/testing/selftests/mm/mrelease_test.c @@ -7,6 +7,7 @@ #include <stdbool.h> #include <stdio.h> #include <stdlib.h> +#include <sys/syscall.h> #include <sys/wait.h> #include <unistd.h> #include <asm-generic/unistd.h> diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 3f26f6e15b2a..3e2bc818d566 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -12,11 +12,14 @@ exitcode=0 usage() { cat <<EOF -usage: ${BASH_SOURCE[0]:-$0} [ -h | -t "<categories>"] +usage: ${BASH_SOURCE[0]:-$0} [ options ] + + -a: run all tests, including extra ones -t: specify specific categories to tests to run -h: display this message -The default behavior is to run all tests. +The default behavior is to run required tests only. If -a is specified, +will run all tests. Alternatively, specific groups tests can be run by passing a string to the -t argument containing one or more of the following categories @@ -55,14 +58,27 @@ separated by spaces: test soft dirty page bit semantics - cow test copy-on-write semantics +- thp + test transparent huge pages +- migration + invoke move_pages(2) to exercise the migration entry code + paths in the kernel +- mkdirty + test handling of code that might set PTE/PMD dirty in + read-only VMAs +- mdwe + test prctl(PR_SET_MDWE, ...) + example: ./run_vmtests.sh -t "hmm mmap ksm" EOF exit 0 } +RUN_ALL=false -while getopts "ht:" OPT; do +while getopts "aht:" OPT; do case ${OPT} in + "a") RUN_ALL=true ;; "h") usage ;; "t") VM_SELFTEST_ITEMS=${OPTARG} ;; esac @@ -85,6 +101,30 @@ test_selected() { fi } +run_gup_matrix() { + # -t: thp=on, -T: thp=off, -H: hugetlb=on + local hugetlb_mb=$(( needmem_KB / 1024 )) + + for huge in -t -T "-H -m $hugetlb_mb"; do + # -u: gup-fast, -U: gup-basic, -a: pin-fast, -b: pin-basic, -L: pin-longterm + for test_cmd in -u -U -a -b -L; do + # -w: write=1, -W: write=0 + for write in -w -W; do + # -S: shared + for share in -S " "; do + # -n: How many pages to fetch together? 512 is special + # because it's default thp size (or 2M on x86), 123 to + # just test partial gup when hit a huge in whatever form + for num in "-n 1" "-n 512" "-n 123"; do + CATEGORY="gup_test" run_test ./gup_test \ + $huge $test_cmd $write $share $num + done + done + done + done + done +} + # get huge pagesize and freepages from /proc/meminfo while read -r name size unit; do if [ "$name" = "HugePages_Free:" ]; then @@ -189,13 +229,16 @@ fi CATEGORY="mmap" run_test ./map_fixed_noreplace -# get_user_pages_fast() benchmark -CATEGORY="gup_test" run_test ./gup_test -u -# pin_user_pages_fast() benchmark -CATEGORY="gup_test" run_test ./gup_test -a +if $RUN_ALL; then + run_gup_matrix +else + # get_user_pages_fast() benchmark + CATEGORY="gup_test" run_test ./gup_test -u + # pin_user_pages_fast() benchmark + CATEGORY="gup_test" run_test ./gup_test -a +fi # Dump pages 0, 19, and 4096, using pin_user_pages: CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000 - CATEGORY="gup_test" run_test ./gup_longterm CATEGORY="userfaultfd" run_test ./uffd-unit-tests @@ -262,6 +305,10 @@ CATEGORY="madv_populate" run_test ./madv_populate CATEGORY="memfd_secret" run_test ./memfd_secret +# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 +CATEGORY="ksm" run_test ./ksm_tests -H -s 100 +# KSM KSM_MERGE_TIME test with size of 100 +CATEGORY="ksm" run_test ./ksm_tests -P -s 100 # KSM MADV_MERGEABLE test with 10 identical pages CATEGORY="ksm" run_test ./ksm_tests -M -p 10 # KSM unmerge test @@ -290,11 +337,26 @@ then CATEGORY="pkey" run_test ./protection_keys_64 fi -CATEGORY="soft_dirty" run_test ./soft-dirty +if [ -x ./soft-dirty ] +then + CATEGORY="soft_dirty" run_test ./soft-dirty +fi # COW tests CATEGORY="cow" run_test ./cow +CATEGORY="thp" run_test ./khugepaged + +CATEGORY="thp" run_test ./transhuge-stress -d 20 + +CATEGORY="thp" run_test ./split_huge_page_test + +CATEGORY="migration" run_test ./migration + +CATEGORY="mkdirty" run_test ./mkdirty + +CATEGORY="mdwe" run_test ./mdwe_test + echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" exit $exitcode diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings index 9abfc60e9e6f..a953c96aa16e 100644 --- a/tools/testing/selftests/mm/settings +++ b/tools/testing/selftests/mm/settings @@ -1 +1 @@ -timeout=45 +timeout=180 diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 380ab5f0a534..16ed4dfa7359 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -139,7 +139,7 @@ void test_mmap(unsigned long size, unsigned flags) before, after, before - after, size); assert(size == getpagesize() || (before - after) == NUM_PAGES); show(size); - err = munmap(map, size); + err = munmap(map, size * NUM_PAGES); assert(!err); } @@ -222,7 +222,7 @@ int main(void) test_mmap(ps, MAP_HUGETLB | arg); } printf("Testing default huge mmap\n"); - test_mmap(default_hps, SHM_HUGETLB); + test_mmap(default_hps, MAP_HUGETLB); puts("Testing non-huge shmget"); test_shmget(getpagesize(), 0); diff --git a/tools/testing/selftests/mm/transhuge-stress.c b/tools/testing/selftests/mm/transhuge-stress.c index ba9d37ad3a89..c61fb9350b8c 100644 --- a/tools/testing/selftests/mm/transhuge-stress.c +++ b/tools/testing/selftests/mm/transhuge-stress.c @@ -25,13 +25,14 @@ int main(int argc, char **argv) { size_t ram, len; void *ptr, *p; - struct timespec a, b; + struct timespec start, a, b; int i = 0; char *name = NULL; double s; uint8_t *map; size_t map_len; int pagemap_fd; + int duration = 0; ram = sysconf(_SC_PHYS_PAGES); if (ram > SIZE_MAX / psize() / 4) @@ -42,9 +43,11 @@ int main(int argc, char **argv) while (++i < argc) { if (!strcmp(argv[i], "-h")) - errx(1, "usage: %s [size in MiB]", argv[0]); + errx(1, "usage: %s [-f <filename>] [-d <duration>] [size in MiB]", argv[0]); else if (!strcmp(argv[i], "-f")) name = argv[++i]; + else if (!strcmp(argv[i], "-d")) + duration = atoi(argv[++i]); else len = atoll(argv[i]) << 20; } @@ -78,6 +81,8 @@ int main(int argc, char **argv) if (!map) errx(2, "map malloc"); + clock_gettime(CLOCK_MONOTONIC, &start); + while (1) { int nr_succeed = 0, nr_failed = 0, nr_pages = 0; @@ -118,5 +123,8 @@ int main(int argc, char **argv) "%4d succeed, %4d failed, %4d different pages", s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20), nr_succeed, nr_failed, nr_pages); + + if (duration > 0 && b.tv_sec - start.tv_sec >= duration) + return 0; } } diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index ba20d7504022..02b89860e193 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -499,6 +499,9 @@ void *uffd_poll_thread(void *arg) int ret; char tmp_chr; + if (!args->handle_fault) + args->handle_fault = uffd_handle_page_fault; + pollfd[0].fd = uffd; pollfd[0].events = POLLIN; pollfd[1].fd = pipefd[cpu*2]; @@ -527,7 +530,7 @@ void *uffd_poll_thread(void *arg) err("unexpected msg event %u\n", msg.event); break; case UFFD_EVENT_PAGEFAULT: - uffd_handle_page_fault(&msg, args); + args->handle_fault(&msg, args); break; case UFFD_EVENT_FORK: close(uffd); diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index 197f5262fe0d..7c4fa964c3b0 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -77,6 +77,9 @@ struct uffd_args { unsigned long missing_faults; unsigned long wp_faults; unsigned long minor_faults; + + /* A custom fault handler; defaults to uffd_handle_page_fault. */ + void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args); }; struct uffd_test_ops { diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 995ff13e74c7..469e0476af26 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -53,21 +53,21 @@ pthread_attr_t attr; do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) const char *examples = - "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" - "./userfaultfd anon 100 99999\n\n" - "# Run share memory test on 1GiB region with 99 bounces:\n" - "./userfaultfd shmem 1000 99\n\n" - "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" - "./userfaultfd hugetlb 256 50\n\n" - "# Run the same hugetlb test but using private file:\n" - "./userfaultfd hugetlb-private 256 50\n\n" - "# 10MiB-~6GiB 999 bounces anonymous test, " - "continue forever unless an error triggers\n" - "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; + "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" + "./uffd-stress anon 100 99999\n\n" + "# Run share memory test on 1GiB region with 99 bounces:\n" + "./uffd-stress shmem 1000 99\n\n" + "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" + "./uffd-stress hugetlb 256 50\n\n" + "# Run the same hugetlb test but using private file:\n" + "./uffd-stress hugetlb-private 256 50\n\n" + "# 10MiB-~6GiB 999 bounces anonymous test, " + "continue forever unless an error triggers\n" + "while ./uffd-stress anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; static void usage(void) { - fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces>\n\n"); + fprintf(stderr, "\nUsage: ./uffd-stress <test type> <MiB> <bounces>\n\n"); fprintf(stderr, "Supported <test type>: anon, hugetlb, " "hugetlb-private, shmem, shmem-private\n\n"); fprintf(stderr, "Examples:\n\n"); @@ -189,10 +189,8 @@ static int stress(struct uffd_args *args) locking_thread, (void *)cpu)) return 1; if (bounces & BOUNCE_POLL) { - if (pthread_create(&uffd_threads[cpu], &attr, - uffd_poll_thread, - (void *)&args[cpu])) - return 1; + if (pthread_create(&uffd_threads[cpu], &attr, uffd_poll_thread, &args[cpu])) + err("uffd_poll_thread create"); } else { if (pthread_create(&uffd_threads[cpu], &attr, uffd_read_thread, @@ -250,6 +248,8 @@ static int userfaultfd_stress(void) struct uffd_args args[nr_cpus]; uint64_t mem_size = nr_pages * page_size; + memset(args, 0, sizeof(struct uffd_args) * nr_cpus); + if (uffd_test_ctx_init(UFFD_FEATURE_WP_UNPOPULATED, NULL)) err("context init failed"); diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 04d91f144d1c..2709a34a39c5 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -951,6 +951,117 @@ static void uffd_zeropage_test(uffd_test_args_t *args) uffd_test_pass(); } +static void uffd_register_poison(int uffd, void *addr, uint64_t len) +{ + uint64_t ioctls = 0; + uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON); + + if (uffd_register_with_ioctls(uffd, addr, len, true, + false, false, &ioctls)) + err("poison register fail"); + + if ((ioctls & expected) != expected) + err("registered area doesn't support COPY and POISON ioctls"); +} + +static void do_uffdio_poison(int uffd, unsigned long offset) +{ + struct uffdio_poison uffdio_poison = { 0 }; + int ret; + __s64 res; + + uffdio_poison.range.start = (unsigned long) area_dst + offset; + uffdio_poison.range.len = page_size; + uffdio_poison.mode = 0; + ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison); + res = uffdio_poison.updated; + + if (ret) + err("UFFDIO_POISON error: %"PRId64, (int64_t)res); + else if (res != page_size) + err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res); +} + +static void uffd_poison_handle_fault( + struct uffd_msg *msg, struct uffd_args *args) +{ + unsigned long offset; + + if (msg->event != UFFD_EVENT_PAGEFAULT) + err("unexpected msg event %u", msg->event); + + if (msg->arg.pagefault.flags & + (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR)) + err("unexpected fault type %llu", msg->arg.pagefault.flags); + + offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset &= ~(page_size-1); + + /* Odd pages -> copy zeroed page; even pages -> poison. */ + if (offset & page_size) + copy_page(uffd, offset, false); + else + do_uffdio_poison(uffd, offset); +} + +static void uffd_poison_test(uffd_test_args_t *targs) +{ + pthread_t uffd_mon; + char c; + struct uffd_args args = { 0 }; + struct sigaction act = { 0 }; + unsigned long nr_sigbus = 0; + unsigned long nr; + + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + + uffd_register_poison(uffd, area_dst, nr_pages * page_size); + memset(area_src, 0, nr_pages * page_size); + + args.handle_fault = uffd_poison_handle_fault; + if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) + err("uffd_poll_thread create"); + + sigbuf = &jbuf; + act.sa_sigaction = sighndl; + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGBUS, &act, 0)) + err("sigaction"); + + for (nr = 0; nr < nr_pages; ++nr) { + unsigned long offset = nr * page_size; + const char *bytes = (const char *) area_dst + offset; + const char *i; + + if (sigsetjmp(*sigbuf, 1)) { + /* + * Access below triggered a SIGBUS, which was caught by + * sighndl, which then jumped here. Count this SIGBUS, + * and move on to next page. + */ + ++nr_sigbus; + continue; + } + + for (i = bytes; i < bytes + page_size; ++i) { + if (*i) + err("nonzero byte in area_dst (%p) at %p: %u", + area_dst, i, *i); + } + } + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + err("pipe write"); + if (pthread_join(uffd_mon, NULL)) + err("pthread_join()"); + + if (nr_sigbus != nr_pages / 2) + err("expected to receive %lu SIGBUS, actually received %lu", + nr_pages / 2, nr_sigbus); + + uffd_test_pass(); +} + /* * Test the returned uffdio_register.ioctls with different register modes. * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. @@ -1126,6 +1237,12 @@ uffd_test_case_t uffd_tests[] = { UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_HUGETLBFS_SHMEM, }, + { + .name = "poison", + .uffd_fn = uffd_poison_test, + .mem_targets = MEM_ALL, + .uffd_feature_required = UFFD_FEATURE_POISON, + }, }; static void usage(const char *prog) diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c index 7cfaf4a74c57..cfbc501290d3 100644 --- a/tools/testing/selftests/mm/va_high_addr_switch.c +++ b/tools/testing/selftests/mm/va_high_addr_switch.c @@ -292,7 +292,7 @@ static int supported_arch(void) #elif defined(__x86_64__) return 1; #elif defined(__aarch64__) - return 1; + return getpagesize() == PAGE_SIZE; #else return 0; #endif diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index 7588428b8fcd..e1052443d070 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -77,7 +77,7 @@ static const char proc_pid_smaps_vsyscall_1[] = "Swap: 0 kB\n" "SwapPss: 0 kB\n" "Locked: 0 kB\n" -"THPeligible: 0\n" +"THPeligible: 0\n" /* * "ProtectionKey:" field is conditional. It is possible to check it as well, * but I don't have such machine. @@ -107,7 +107,7 @@ static const char proc_pid_smaps_vsyscall_2[] = "Swap: 0 kB\n" "SwapPss: 0 kB\n" "Locked: 0 kB\n" -"THPeligible: 0\n" +"THPeligible: 0\n" /* * "ProtectionKey:" field is conditional. It is possible to check it as well, * but I'm too tired. |