From ac88ee7d2b87c1f93b89fd9ce5911c2ab2bda816 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Dec 2023 08:24:23 +0100 Subject: module: Use set_memory_rox() A couple of architectures seem concerned about calling set_memory_ro() and set_memory_x() too frequently and have implemented a version of set_memory_rox(), see commit 60463628c9e0 ("x86/mm: Implement native set_memory_rox()") and commit 22e99fa56443 ("s390/mm: implement set_memory_rox()") Use set_memory_rox() in modules when STRICT_MODULES_RWX is set. Signed-off-by: Christophe Leroy Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/module/main.c') diff --git a/kernel/module/main.c b/kernel/module/main.c index 36681911c05a..2e0187e16669 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2738,7 +2738,7 @@ static int complete_formation(struct module *mod, struct load_info *info) module_enable_ro(mod, false); module_enable_nx(mod); - module_enable_x(mod); + module_enable_rox(mod); /* * Mark state as coming so strong_try_module_get() ignores us, -- cgit v1.2.3 From 3559ad395bf02f3dee576dc9acab4ce330ce57b5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Dec 2023 08:24:24 +0100 Subject: module: Change module_enable_{nx/x/ro}() to more explicit names It's a bit puzzling to see a call to module_enable_nx() followed by a call to module_enable_x(). This is because one applies on text while the other applies on data. Change name to make that more clear. Signed-off-by: Christophe Leroy Signed-off-by: Luis Chamberlain --- kernel/module/internal.h | 6 +++--- kernel/module/main.c | 8 ++++---- kernel/module/strict_rwx.c | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel/module/main.c') diff --git a/kernel/module/internal.h b/kernel/module/internal.h index a647ab17193d..4f1b98f011da 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -322,9 +322,9 @@ static inline struct module *mod_find(unsigned long addr, struct mod_tree_root * } #endif /* CONFIG_MODULES_TREE_LOOKUP */ -void module_enable_ro(const struct module *mod, bool after_init); -void module_enable_nx(const struct module *mod); -void module_enable_rox(const struct module *mod); +void module_enable_rodata_ro(const struct module *mod, bool after_init); +void module_enable_data_nx(const struct module *mod); +void module_enable_text_rox(const struct module *mod); int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod); diff --git a/kernel/module/main.c b/kernel/module/main.c index 2e0187e16669..a9a4a4885102 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2571,7 +2571,7 @@ static noinline int do_init_module(struct module *mod) /* Switch to core kallsyms now init is done: kallsyms may be walking! */ rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms); #endif - module_enable_ro(mod, true); + module_enable_rodata_ro(mod, true); mod_tree_remove_init(mod); module_arch_freeing_init(mod); for_class_mod_mem_type(type, init) { @@ -2736,9 +2736,9 @@ static int complete_formation(struct module *mod, struct load_info *info) module_bug_finalize(info->hdr, info->sechdrs, mod); module_cfi_finalize(info->hdr, info->sechdrs, mod); - module_enable_ro(mod, false); - module_enable_nx(mod); - module_enable_rox(mod); + module_enable_rodata_ro(mod, false); + module_enable_data_nx(mod); + module_enable_text_rox(mod); /* * Mark state as coming so strong_try_module_get() ignores us, diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index 9345b09f28a5..9b2d58a8d59d 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -26,7 +26,7 @@ static void module_set_memory(const struct module *mod, enum mod_mem_type type, * CONFIG_STRICT_MODULE_RWX because they are needed regardless of whether we * are strict. */ -void module_enable_rox(const struct module *mod) +void module_enable_text_rox(const struct module *mod) { for_class_mod_mem_type(type, text) { if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) @@ -36,7 +36,7 @@ void module_enable_rox(const struct module *mod) } } -void module_enable_ro(const struct module *mod, bool after_init) +void module_enable_rodata_ro(const struct module *mod, bool after_init) { if (!IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) return; @@ -52,7 +52,7 @@ void module_enable_ro(const struct module *mod, bool after_init) module_set_memory(mod, MOD_RO_AFTER_INIT, set_memory_ro); } -void module_enable_nx(const struct module *mod) +void module_enable_data_nx(const struct module *mod) { if (!IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) return; -- cgit v1.2.3 From d1909c0221739356f31c721de4743e7d219a56cc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Feb 2024 09:14:27 +0100 Subject: module: Don't ignore errors from set_memory_XX() set_memory_ro(), set_memory_nx(), set_memory_x() and other helpers can fail and return an error. In that case the memory might not be protected as expected and the module loading has to be aborted to avoid security issues. Check return value of all calls to set_memory_XX() and handle error if any. Add a check to not call set_memory_XX() on NULL pointers as some architectures may not like it allthough numpages is always 0 in that case. This also avoid a useless call to set_vm_flush_reset_perms(). Link: https://github.com/KSPP/linux/issues/7 Signed-off-by: Christophe Leroy Tested-by: Marek Szyprowski Reviewed-by: Kees Cook Signed-off-by: Luis Chamberlain --- kernel/module/internal.h | 6 +++--- kernel/module/main.c | 20 ++++++++++++++---- kernel/module/strict_rwx.c | 51 ++++++++++++++++++++++++++++++++-------------- 3 files changed, 55 insertions(+), 22 deletions(-) (limited to 'kernel/module/main.c') diff --git a/kernel/module/internal.h b/kernel/module/internal.h index 4f1b98f011da..2ebece8a789f 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -322,9 +322,9 @@ static inline struct module *mod_find(unsigned long addr, struct mod_tree_root * } #endif /* CONFIG_MODULES_TREE_LOOKUP */ -void module_enable_rodata_ro(const struct module *mod, bool after_init); -void module_enable_data_nx(const struct module *mod); -void module_enable_text_rox(const struct module *mod); +int module_enable_rodata_ro(const struct module *mod, bool after_init); +int module_enable_data_nx(const struct module *mod); +int module_enable_text_rox(const struct module *mod); int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod); diff --git a/kernel/module/main.c b/kernel/module/main.c index a9a4a4885102..689def7676c4 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2571,7 +2571,9 @@ static noinline int do_init_module(struct module *mod) /* Switch to core kallsyms now init is done: kallsyms may be walking! */ rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms); #endif - module_enable_rodata_ro(mod, true); + ret = module_enable_rodata_ro(mod, true); + if (ret) + goto fail_mutex_unlock; mod_tree_remove_init(mod); module_arch_freeing_init(mod); for_class_mod_mem_type(type, init) { @@ -2609,6 +2611,8 @@ static noinline int do_init_module(struct module *mod) return 0; +fail_mutex_unlock: + mutex_unlock(&module_mutex); fail_free_freeinit: kfree(freeinit); fail: @@ -2736,9 +2740,15 @@ static int complete_formation(struct module *mod, struct load_info *info) module_bug_finalize(info->hdr, info->sechdrs, mod); module_cfi_finalize(info->hdr, info->sechdrs, mod); - module_enable_rodata_ro(mod, false); - module_enable_data_nx(mod); - module_enable_text_rox(mod); + err = module_enable_rodata_ro(mod, false); + if (err) + goto out_strict_rwx; + err = module_enable_data_nx(mod); + if (err) + goto out_strict_rwx; + err = module_enable_text_rox(mod); + if (err) + goto out_strict_rwx; /* * Mark state as coming so strong_try_module_get() ignores us, @@ -2749,6 +2759,8 @@ static int complete_formation(struct module *mod, struct load_info *info) return 0; +out_strict_rwx: + module_bug_cleanup(mod); out: mutex_unlock(&module_mutex); return err; diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index b36d93983465..c45caa4690e5 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -11,13 +11,16 @@ #include #include "internal.h" -static void module_set_memory(const struct module *mod, enum mod_mem_type type, - int (*set_memory)(unsigned long start, int num_pages)) +static int module_set_memory(const struct module *mod, enum mod_mem_type type, + int (*set_memory)(unsigned long start, int num_pages)) { const struct module_memory *mod_mem = &mod->mem[type]; + if (!mod_mem->base) + return 0; + set_vm_flush_reset_perms(mod_mem->base); - set_memory((unsigned long)mod_mem->base, mod_mem->size >> PAGE_SHIFT); + return set_memory((unsigned long)mod_mem->base, mod_mem->size >> PAGE_SHIFT); } /* @@ -26,35 +29,53 @@ static void module_set_memory(const struct module *mod, enum mod_mem_type type, * CONFIG_STRICT_MODULE_RWX because they are needed regardless of whether we * are strict. */ -void module_enable_text_rox(const struct module *mod) +int module_enable_text_rox(const struct module *mod) { for_class_mod_mem_type(type, text) { + int ret; + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) - module_set_memory(mod, type, set_memory_rox); + ret = module_set_memory(mod, type, set_memory_rox); else - module_set_memory(mod, type, set_memory_x); + ret = module_set_memory(mod, type, set_memory_x); + if (ret) + return ret; } + return 0; } -void module_enable_rodata_ro(const struct module *mod, bool after_init) +int module_enable_rodata_ro(const struct module *mod, bool after_init) { + int ret; + if (!IS_ENABLED(CONFIG_STRICT_MODULE_RWX) || !rodata_enabled) - return; + return 0; - module_set_memory(mod, MOD_RODATA, set_memory_ro); - module_set_memory(mod, MOD_INIT_RODATA, set_memory_ro); + ret = module_set_memory(mod, MOD_RODATA, set_memory_ro); + if (ret) + return ret; + ret = module_set_memory(mod, MOD_INIT_RODATA, set_memory_ro); + if (ret) + return ret; if (after_init) - module_set_memory(mod, MOD_RO_AFTER_INIT, set_memory_ro); + return module_set_memory(mod, MOD_RO_AFTER_INIT, set_memory_ro); + + return 0; } -void module_enable_data_nx(const struct module *mod) +int module_enable_data_nx(const struct module *mod) { if (!IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) - return; + return 0; - for_class_mod_mem_type(type, data) - module_set_memory(mod, type, set_memory_nx); + for_class_mod_mem_type(type, data) { + int ret = module_set_memory(mod, type, set_memory_nx); + + if (ret) + return ret; + } + return 0; } int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, -- cgit v1.2.3 From 8f8cd6c0a43ed637e620bbe45a8d0e0c2f4d5130 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 27 Feb 2024 10:35:46 +0800 Subject: modules: wait do_free_init correctly The synchronization here is to ensure the ordering of freeing of a module init so that it happens before W+X checking. It is worth noting it is not that the freeing was not happening, it is just that our sanity checkers raced against the permission checkers which assume init memory is already gone. Commit 1a7b7d922081 ("modules: Use vmalloc special flag") moved calling do_free_init() into a global workqueue instead of relying on it being called through call_rcu(..., do_free_init), which used to allowed us call do_free_init() asynchronously after the end of a subsequent grace period. The move to a global workqueue broke the gaurantees for code which needed to be sure the do_free_init() would complete with rcu_barrier(). To fix this callers which used to rely on rcu_barrier() must now instead use flush_work(&init_free_wq). Without this fix, we still could encounter false positive reports in W+X checking since the rcu_barrier() here can not ensure the ordering now. Even worse, the rcu_barrier() can introduce significant delay. Eric Chanudet reported that the rcu_barrier introduces ~0.1s delay on a PREEMPT_RT kernel. [ 0.291444] Freeing unused kernel memory: 5568K [ 0.402442] Run /sbin/init as init process With this fix, the above delay can be eliminated. Link: https://lkml.kernel.org/r/20240227023546.2490667-1-changbin.du@huawei.com Fixes: 1a7b7d922081 ("modules: Use vmalloc special flag") Signed-off-by: Changbin Du Tested-by: Eric Chanudet Acked-by: Luis Chamberlain Cc: Xiaoyi Su Signed-off-by: Andrew Morton --- include/linux/moduleloader.h | 8 ++++++++ init/main.c | 5 +++-- kernel/module/main.c | 9 +++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) (limited to 'kernel/module/main.c') diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 001b2ce83832..89b1e0ed9811 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -115,6 +115,14 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); +#ifdef CONFIG_MODULES +void flush_module_init_free_work(void); +#else +static inline void flush_module_init_free_work(void) +{ +} +#endif + /* Any cleanup needed when module leaves. */ void module_arch_cleanup(struct module *mod); diff --git a/init/main.c b/init/main.c index 749a9f8d2c9b..5ecd4e8cf6d3 100644 --- a/init/main.c +++ b/init/main.c @@ -88,6 +88,7 @@ #include #include #include +#include #include #include #include @@ -1403,11 +1404,11 @@ static void mark_readonly(void) if (rodata_enabled) { /* * load_module() results in W+X mappings, which are cleaned - * up with call_rcu(). Let's make sure that queued work is + * up with init_free_wq. Let's make sure that queued work is * flushed so that we don't hit false positives looking for * insecure pages which are W+X. */ - rcu_barrier(); + flush_module_init_free_work(); mark_rodata_ro(); debug_checkwx(); rodata_test(); diff --git a/kernel/module/main.c b/kernel/module/main.c index 36681911c05a..b0b99348e1a8 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2489,6 +2489,11 @@ static void do_free_init(struct work_struct *w) } } +void flush_module_init_free_work(void) +{ + flush_work(&init_free_wq); +} + #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "module." /* Default value for module->async_probe_requested */ @@ -2593,8 +2598,8 @@ static noinline int do_init_module(struct module *mod) * Note that module_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to - * be cleaned up needs to sync with the queued work - ie - * rcu_barrier() + * be cleaned up needs to sync with the queued work by invoking + * flush_module_init_free_work(). */ if (llist_add(&freeinit->node, &init_free_list)) schedule_work(&init_free_wq); -- cgit v1.2.3