summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 16:52:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 16:52:33 -0700
commit888d3c9f7f3ae44101a3fd76528d3dd6f96e9fd0 (patch)
tree833fa91e015ea12e4eb4e0aa1111bcc08832fa91 /mm
parentb6a7828502dc769e1a5329027bc5048222fa210a (diff)
parente3184de9d46c2eebdb776face2e2662c6733331d (diff)
Merge tag 'sysctl-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux
Pull sysctl updates from Luis Chamberlain: "This only does a few sysctl moves from the kernel/sysctl.c file, the rest of the work has been put towards deprecating two API calls which incur recursion and prevent us from simplifying the registration process / saving memory per move. Most of the changes have been soaking on linux-next since v6.3-rc3. I've slowed down the kernel/sysctl.c moves due to Matthew Wilcox's feedback that we should see if we could *save* memory with these moves instead of incurring more memory. We currently incur more memory since when we move a syctl from kernel/sysclt.c out to its own file we end up having to add a new empty sysctl used to register it. To achieve saving memory we want to allow syctls to be passed without requiring the end element being empty, and just have our registration process rely on ARRAY_SIZE(). Without this, supporting both styles of sysctls would make the sysctl registration pretty brittle, hard to read and maintain as can be seen from Meng Tang's efforts to do just this [0]. Fortunately, in order to use ARRAY_SIZE() for all sysctl registrations also implies doing the work to deprecate two API calls which use recursion in order to support sysctl declarations with subdirectories. And so during this development cycle quite a bit of effort went into this deprecation effort. I've annotated the following two APIs are deprecated and in few kernel releases we should be good to remove them: - register_sysctl_table() - register_sysctl_paths() During this merge window we should be able to deprecate and unexport register_sysctl_paths(), we can probably do that towards the end of this merge window. Deprecating register_sysctl_table() will take a bit more time but this pull request goes with a few example of how to do this. As it turns out each of the conversions to move away from either of these two API calls *also* saves memory. And so long term, all these changes *will* prove to have saved a bit of memory on boot. The way I see it then is if remove a user of one deprecated call, it gives us enough savings to move one kernel/sysctl.c out from the generic arrays as we end up with about the same amount of bytes. Since deprecating register_sysctl_table() and register_sysctl_paths() does not require maintainer coordination except the final unexport you'll see quite a bit of these changes from other pull requests, I've just kept the stragglers after rc3" Link: https://lkml.kernel.org/r/ZAD+cpbrqlc5vmry@bombadil.infradead.org [0] * tag 'sysctl-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux: (29 commits) fs: fix sysctls.c built mm: compaction: remove incorrect #ifdef checks mm: compaction: move compaction sysctl to its own file mm: memory-failure: Move memory failure sysctls to its own file arm: simplify two-level sysctl registration for ctl_isa_vars ia64: simplify one-level sysctl registration for kdump_ctl_table utsname: simplify one-level sysctl registration for uts_kern_table ntfs: simplfy one-level sysctl registration for ntfs_sysctls coda: simplify one-level sysctl registration for coda_table fs/cachefiles: simplify one-level sysctl registration for cachefiles_sysctls xfs: simplify two-level sysctl registration for xfs_table nfs: simplify two-level sysctl registration for nfs_cb_sysctls nfs: simplify two-level sysctl registration for nfs4_cb_sysctls lockd: simplify two-level sysctl registration for nlm_sysctls proc_sysctl: enhance documentation xen: simplify sysctl registration for balloon md: simplify sysctl registration hv: simplify sysctl registration scsi: simplify sysctl registration with register_sysctl() csky: simplify alignment sysctl registration ...
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c80
-rw-r--r--mm/hugetlb.c51
-rw-r--r--mm/memory-failure.c36
3 files changed, 150 insertions, 17 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 5a9501e0ae01..9ff71239b1fc 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1716,7 +1716,14 @@ typedef enum {
* Allow userspace to control policy on scanning the unevictable LRU for
* compactable pages.
*/
-int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNEVICTABLE_DEFAULT;
+static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNEVICTABLE_DEFAULT;
+/*
+ * Tunable for proactive compaction. It determines how
+ * aggressively the kernel should compact memory in the
+ * background. It takes values in the range [0, 100].
+ */
+static unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
+static int sysctl_extfrag_threshold = 500;
static inline void
update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
@@ -2572,8 +2579,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
return ret;
}
-int sysctl_extfrag_threshold = 500;
-
/**
* try_to_compact_pages - Direct compact to satisfy a high-order allocation
* @gfp_mask: The GFP mask of the current allocation
@@ -2730,14 +2735,7 @@ static void compact_nodes(void)
compact_node(nid);
}
-/*
- * Tunable for proactive compaction. It determines how
- * aggressively the kernel should compact memory in the
- * background. It takes values in the range [0, 100].
- */
-unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
-
-int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
+static int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc, nid;
@@ -2767,7 +2765,7 @@ int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
* This is the entry point for compacting all nodes via
* /proc/sys/vm/compact_memory
*/
-int sysctl_compaction_handler(struct ctl_table *table, int write,
+static int sysctl_compaction_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
if (write)
@@ -3063,6 +3061,63 @@ static int kcompactd_cpu_online(unsigned int cpu)
return 0;
}
+static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
+ int write, void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret, old;
+
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ old = *(int *)table->data;
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (ret)
+ return ret;
+ if (old != *(int *)table->data)
+ pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
+ table->procname, current->comm,
+ task_pid_nr(current));
+ return ret;
+}
+
+static struct ctl_table vm_compaction[] = {
+ {
+ .procname = "compact_memory",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0200,
+ .proc_handler = sysctl_compaction_handler,
+ },
+ {
+ .procname = "compaction_proactiveness",
+ .data = &sysctl_compaction_proactiveness,
+ .maxlen = sizeof(sysctl_compaction_proactiveness),
+ .mode = 0644,
+ .proc_handler = compaction_proactiveness_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+ {
+ .procname = "extfrag_threshold",
+ .data = &sysctl_extfrag_threshold,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE_THOUSAND,
+ },
+ {
+ .procname = "compact_unevictable_allowed",
+ .data = &sysctl_compact_unevictable_allowed,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_warn_RT_change,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ { }
+};
+
static int __init kcompactd_init(void)
{
int nid;
@@ -3078,6 +3133,7 @@ static int __init kcompactd_init(void)
for_each_node_state(nid, N_MEMORY)
kcompactd_run(nid);
+ register_sysctl_init("vm", vm_compaction);
return 0;
}
subsys_initcall(kcompactd_init)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 245038a9fe4e..a93e070ab175 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4202,6 +4202,12 @@ static void __init hugetlb_sysfs_init(void)
hugetlb_register_all_nodes();
}
+#ifdef CONFIG_SYSCTL
+static void hugetlb_sysctl_init(void);
+#else
+static inline void hugetlb_sysctl_init(void) { }
+#endif
+
static int __init hugetlb_init(void)
{
int i;
@@ -4257,6 +4263,7 @@ static int __init hugetlb_init(void)
hugetlb_sysfs_init();
hugetlb_cgroup_file_init();
+ hugetlb_sysctl_init();
#ifdef CONFIG_SMP
num_fault_mutexes = roundup_pow_of_two(8 * num_possible_cpus());
@@ -4588,7 +4595,7 @@ out:
return ret;
}
-int hugetlb_sysctl_handler(struct ctl_table *table, int write,
+static int hugetlb_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
@@ -4597,7 +4604,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
}
#ifdef CONFIG_NUMA
-int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
+static int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
return hugetlb_sysctl_handler_common(true, table, write,
@@ -4605,7 +4612,7 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
}
#endif /* CONFIG_NUMA */
-int hugetlb_overcommit_handler(struct ctl_table *table, int write,
+static int hugetlb_overcommit_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
@@ -4634,6 +4641,44 @@ out:
return ret;
}
+static struct ctl_table hugetlb_table[] = {
+ {
+ .procname = "nr_hugepages",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = hugetlb_sysctl_handler,
+ },
+#ifdef CONFIG_NUMA
+ {
+ .procname = "nr_hugepages_mempolicy",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &hugetlb_mempolicy_sysctl_handler,
+ },
+#endif
+ {
+ .procname = "hugetlb_shm_group",
+ .data = &sysctl_hugetlb_shm_group,
+ .maxlen = sizeof(gid_t),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "nr_overcommit_hugepages",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = hugetlb_overcommit_handler,
+ },
+ { }
+};
+
+static void hugetlb_sysctl_init(void)
+{
+ register_sysctl_init("vm", hugetlb_table);
+}
#endif /* CONFIG_SYSCTL */
void hugetlb_report_meminfo(struct seq_file *m)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index fae9baf3be16..10e60b6b2447 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -62,13 +62,14 @@
#include <linux/page-isolation.h>
#include <linux/pagewalk.h>
#include <linux/shmem_fs.h>
+#include <linux/sysctl.h>
#include "swap.h"
#include "internal.h"
#include "ras/ras_event.h"
-int sysctl_memory_failure_early_kill __read_mostly = 0;
+static int sysctl_memory_failure_early_kill __read_mostly;
-int sysctl_memory_failure_recovery __read_mostly = 1;
+static int sysctl_memory_failure_recovery __read_mostly = 1;
atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
@@ -122,6 +123,37 @@ const struct attribute_group memory_failure_attr_group = {
.attrs = memory_failure_attr,
};
+#ifdef CONFIG_SYSCTL
+static struct ctl_table memory_failure_table[] = {
+ {
+ .procname = "memory_failure_early_kill",
+ .data = &sysctl_memory_failure_early_kill,
+ .maxlen = sizeof(sysctl_memory_failure_early_kill),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "memory_failure_recovery",
+ .data = &sysctl_memory_failure_recovery,
+ .maxlen = sizeof(sysctl_memory_failure_recovery),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ { }
+};
+
+static int __init memory_failure_sysctl_init(void)
+{
+ register_sysctl_init("vm", memory_failure_table);
+ return 0;
+}
+late_initcall(memory_failure_sysctl_init);
+#endif /* CONFIG_SYSCTL */
+
/*
* Return values:
* 1: the page is dissolved (if needed) and taken off from buddy,