From b6fca72536fb94098acb23c00b4e65d3bc4bb252 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 9 Jan 2013 20:06:28 +0530 Subject: sysctl: Enable IA64 "ignore-unaligned-usertrap" to be used cross-arch IA64 defines /proc/sys/kernel/ignore-unaligned-usertrap to control verbose warnings on unaligned access emulation. Although the exact mechanics of what to do with sysctl (ignore/shout) are arch specific, this change enables the sysctl to be usable cross-arch. Signed-off-by: Vineet Gupta Cc: Fenghua Yu Cc: "Eric W. Biederman" Cc: Serge Hallyn Signed-off-by: Tony Luck --- kernel/sysctl.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c88878db491e..840fd5eb2309 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -161,10 +161,13 @@ extern int unaligned_enabled; #endif #ifdef CONFIG_IA64 -extern int no_unaligned_warning; extern int unaligned_dump_stack; #endif +#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN +extern int no_unaligned_warning; +#endif + #ifdef CONFIG_PROC_SYSCTL static int proc_do_cad_pid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -911,7 +914,7 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_doulongvec_minmax, }, #endif -#ifdef CONFIG_IA64 +#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN { .procname = "ignore-unaligned-usertrap", .data = &no_unaligned_warning, @@ -919,6 +922,8 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#endif +#ifdef CONFIG_IA64 { .procname = "unaligned-dump-stack", .data = &unaligned_dump_stack, -- cgit v1.2.3 From 373d4d099761cb1f637bed488ab3871945882273 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 21 Jan 2013 17:17:39 +1030 Subject: taint: add explicit flag to show whether lock dep is still OK. Fix up all callers as they were before, with make one change: an unsigned module taints the kernel, but doesn't turn off lockdep. Signed-off-by: Rusty Russell --- arch/alpha/kernel/traps.c | 2 +- arch/arm/kernel/traps.c | 2 +- arch/arm64/kernel/traps.c | 2 +- arch/avr32/kernel/traps.c | 2 +- arch/hexagon/kernel/traps.c | 2 +- arch/ia64/kernel/traps.c | 2 +- arch/m68k/kernel/traps.c | 2 +- arch/mips/kernel/traps.c | 2 +- arch/parisc/kernel/traps.c | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/s390/kernel/traps.c | 2 +- arch/sh/kernel/traps.c | 2 +- arch/sparc/kernel/setup_64.c | 2 +- arch/sparc/kernel/traps_32.c | 2 +- arch/sparc/kernel/traps_64.c | 2 +- arch/unicore32/kernel/traps.c | 2 +- arch/x86/kernel/cpu/amd.c | 3 +-- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- arch/x86/kernel/cpu/mcheck/p5.c | 2 +- arch/x86/kernel/cpu/mcheck/winchip.c | 2 +- arch/x86/kernel/cpu/mtrr/generic.c | 2 +- arch/x86/kernel/dumpstack.c | 2 +- arch/xtensa/kernel/traps.c | 2 +- drivers/acpi/custom_method.c | 2 +- drivers/acpi/osl.c | 2 +- drivers/base/regmap/regmap-debugfs.c | 2 +- include/linux/kernel.h | 6 +++++- kernel/module.c | 26 +++++++++++++++----------- kernel/panic.c | 34 ++++++++++++++-------------------- kernel/sched/core.c | 2 +- kernel/sysctl.c | 2 +- lib/bug.c | 3 ++- mm/memory.c | 2 +- mm/page_alloc.c | 2 +- mm/slab.c | 2 +- mm/slub.c | 2 +- sound/soc/soc-core.c | 2 +- 37 files changed, 69 insertions(+), 67 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 272666d006df..4037461a6493 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -186,7 +186,7 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) #endif printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err); dik_show_regs(regs, r9_15); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); dik_show_trace((unsigned long *)(regs+1)); dik_show_code((unsigned int *)regs->pc); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index b0179b89a04c..1c089119b2d7 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -296,7 +296,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) bust_spinlocks(0); die_owner = -1; - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); die_nest_count--; if (!die_nest_count) /* Nest count reaches zero, release the lock. */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 3883f842434f..b3c5f628bdb4 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -242,7 +242,7 @@ void die(const char *str, struct pt_regs *regs, int err) crash_kexec(regs); bust_spinlocks(0); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); raw_spin_unlock_irq(&die_lock); oops_exit(); diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c index 3d760c06f024..682b2478691a 100644 --- a/arch/avr32/kernel/traps.c +++ b/arch/avr32/kernel/traps.c @@ -61,7 +61,7 @@ void die(const char *str, struct pt_regs *regs, long err) show_regs_log_lvl(regs, KERN_EMERG); show_stack_log_lvl(current, regs->sp, regs, KERN_EMERG); bust_spinlocks(0); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die_lock); if (in_interrupt()) diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index a41eeb8eeaa1..be5e2dd9c9d3 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -225,7 +225,7 @@ int die(const char *str, struct pt_regs *regs, long err) do_show_stack(current, ®s->r30, pt_elr(regs)); bust_spinlocks(0); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die.lock); diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index bd42b76000d1..f7f9f9c6caf0 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -72,7 +72,7 @@ die (const char *str, struct pt_regs *regs, long err) bust_spinlocks(0); die.lock_owner = -1; - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die.lock); if (!regs) diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index cbc624af4494..f32ab22e7ed3 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -1176,7 +1176,7 @@ void die_if_kernel (char *str, struct pt_regs *fp, int nr) console_verbose(); printk("%s: %08x\n",str,nr); show_registers(fp); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); do_exit(SIGSEGV); } diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index cf7ac5483f53..9007966d56d4 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -396,7 +396,7 @@ void __noreturn die(const char *str, struct pt_regs *regs) printk("%s[#%d]:\n", str, ++die_counter); show_registers(regs); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); raw_spin_unlock_irq(&die_lock); oops_exit(); diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 45ba99f5080b..aeb8f8f2c07a 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -282,7 +282,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) show_regs(regs); dump_stack(); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); if (in_interrupt()) panic("Fatal exception in interrupt"); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 32518401af68..c579db859388 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -138,7 +138,7 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, { bust_spinlocks(0); die_owner = -1; - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); die_nest_count--; oops_exit(); printk("\n"); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 70ecfc5fe8f0..13dd63fba367 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -271,7 +271,7 @@ void die(struct pt_regs *regs, const char *str) print_modules(); show_regs(regs); bust_spinlocks(0); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die_lock); if (in_interrupt()) panic("Fatal exception in interrupt"); diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index 72246bc06884..dfdad72c61ca 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -38,7 +38,7 @@ void die(const char *str, struct pt_regs *regs, long err) notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV); bust_spinlocks(0); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die_lock); oops_exit(); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 0eaf0059aaef..88a127b9c69e 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -115,7 +115,7 @@ static void __init process_switch(char c) break; } cheetah_pcache_forced_on = 1; - add_taint(TAINT_MACHINE_CHECK); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); cheetah_enable_pcache(); break; diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index a5785ea2a85d..662982946a89 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -58,7 +58,7 @@ void die_if_kernel(char *str, struct pt_regs *regs) printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter); show_regs(regs); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); __SAVE; __SAVE; __SAVE; __SAVE; __SAVE; __SAVE; __SAVE; __SAVE; diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index e7ecf1507d90..8d38ca97aa23 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2383,7 +2383,7 @@ void die_if_kernel(char *str, struct pt_regs *regs) notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV); __asm__ __volatile__("flushw"); show_regs(regs); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); if (regs->tstate & TSTATE_PRIV) { struct thread_info *tp = current_thread_info(); struct reg_window *rw = (struct reg_window *) diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 2054f0d4db13..0870b68d2ad9 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -231,7 +231,7 @@ void die(const char *str, struct pt_regs *regs, int err) ret = __die(str, err, thread, regs); bust_spinlocks(0); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die_lock); oops_exit(); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15239fffd6fe..5853e57523e5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -220,8 +220,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) */ WARN_ONCE(1, "WARNING: This combination of AMD" " processors is not suitable for SMP.\n"); - if (!test_taint(TAINT_UNSAFE_SMP)) - add_taint(TAINT_UNSAFE_SMP); + add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE); valid_k7: ; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 80dbda84f1c3..6bc15edbc8cd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1085,7 +1085,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) /* * Set taint even when machine check was not enabled. */ - add_taint(TAINT_MACHINE_CHECK); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); severity = mce_severity(&m, cfg->tolerant, NULL); diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 2d5454cd2c4f..1c044b1ccc59 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -33,7 +33,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) smp_processor_id()); } - add_taint(TAINT_MACHINE_CHECK); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); } /* Set up machine check reporting for processors with Intel style MCE: */ diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 2d7998fb628c..e9a701aecaa1 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -15,7 +15,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code) { printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); - add_taint(TAINT_MACHINE_CHECK); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); } /* Set up machine check reporting on the Winchip C6 series */ diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index e9fe907cd249..fa72a39e5d46 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -542,7 +542,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, if (tmp != mask_lo) { printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); - add_taint(TAINT_FIRMWARE_WORKAROUND); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); mask_lo = tmp; } } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ae42418bc50f..c8797d55b245 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -232,7 +232,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) bust_spinlocks(0); die_owner = -1; - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); die_nest_count--; if (!die_nest_count) /* Nest count reaches zero, release the lock. */ diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 01e0111bf787..ded955d45155 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -524,7 +524,7 @@ void die(const char * str, struct pt_regs * regs, long err) if (!user_mode(regs)) show_stack(NULL, (unsigned long*)regs->areg[1]); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die_lock); if (in_interrupt()) diff --git a/drivers/acpi/custom_method.c b/drivers/acpi/custom_method.c index 5d42c2414ae5..cd9a68e2fea9 100644 --- a/drivers/acpi/custom_method.c +++ b/drivers/acpi/custom_method.c @@ -66,7 +66,7 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf, buf = NULL; if (ACPI_FAILURE(status)) return -EINVAL; - add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); + add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE); } return count; diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 3ff267861541..535e888bad78 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -661,7 +661,7 @@ static void acpi_table_taint(struct acpi_table_header *table) pr_warn(PREFIX "Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n", table->signature, table->oem_table_id); - add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); + add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE); } diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index 46a213a596e2..f63c830083ec 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -267,7 +267,7 @@ static ssize_t regmap_map_write_file(struct file *file, return -EINVAL; /* Userspace has been fiddling around behind the kernel's back */ - add_taint(TAINT_USER); + add_taint(TAINT_USER, LOCKDEP_NOW_UNRELIABLE); regmap_write(map, reg, value); return buf_size; diff --git a/include/linux/kernel.h b/include/linux/kernel.h index c566927efcbd..80d36874689b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -398,7 +398,11 @@ extern int panic_on_unrecovered_nmi; extern int panic_on_io_nmi; extern int sysctl_panic_on_stackoverflow; extern const char *print_tainted(void); -extern void add_taint(unsigned flag); +enum lockdep_ok { + LOCKDEP_STILL_OK, + LOCKDEP_NOW_UNRELIABLE +}; +extern void add_taint(unsigned flag, enum lockdep_ok); extern int test_taint(unsigned flag); extern unsigned long get_taint(void); extern int root_mountflags; diff --git a/kernel/module.c b/kernel/module.c index e69a5a68766f..cc000dd6e4a8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -197,9 +197,10 @@ static inline int strong_try_module_get(struct module *mod) return -ENOENT; } -static inline void add_taint_module(struct module *mod, unsigned flag) +static inline void add_taint_module(struct module *mod, unsigned flag, + enum lockdep_ok lockdep_ok) { - add_taint(flag); + add_taint(flag, lockdep_ok); mod->taints |= (1U << flag); } @@ -727,7 +728,7 @@ static inline int try_force_unload(unsigned int flags) { int ret = (flags & O_TRUNC); if (ret) - add_taint(TAINT_FORCED_RMMOD); + add_taint(TAINT_FORCED_RMMOD, LOCKDEP_NOW_UNRELIABLE); return ret; } #else @@ -1138,7 +1139,7 @@ static int try_to_force_load(struct module *mod, const char *reason) if (!test_taint(TAINT_FORCED_MODULE)) printk(KERN_WARNING "%s: %s: kernel tainted.\n", mod->name, reason); - add_taint_module(mod, TAINT_FORCED_MODULE); + add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_NOW_UNRELIABLE); return 0; #else return -ENOEXEC; @@ -2147,7 +2148,8 @@ static void set_license(struct module *mod, const char *license) if (!test_taint(TAINT_PROPRIETARY_MODULE)) printk(KERN_WARNING "%s: module license '%s' taints " "kernel.\n", mod->name, license); - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + add_taint_module(mod, TAINT_PROPRIETARY_MODULE, + LOCKDEP_NOW_UNRELIABLE); } } @@ -2700,10 +2702,10 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) } if (!get_modinfo(info, "intree")) - add_taint_module(mod, TAINT_OOT_MODULE); + add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); if (get_modinfo(info, "staging")) { - add_taint_module(mod, TAINT_CRAP); + add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); printk(KERN_WARNING "%s: module is from the staging directory," " the quality is unknown, you have been warned.\n", mod->name); @@ -2869,15 +2871,17 @@ static int check_module_license_and_versions(struct module *mod) * using GPL-only symbols it needs. */ if (strcmp(mod->name, "ndiswrapper") == 0) - add_taint(TAINT_PROPRIETARY_MODULE); + add_taint(TAINT_PROPRIETARY_MODULE, LOCKDEP_NOW_UNRELIABLE); /* driverloader was caught wrongly pretending to be under GPL */ if (strcmp(mod->name, "driverloader") == 0) - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + add_taint_module(mod, TAINT_PROPRIETARY_MODULE, + LOCKDEP_NOW_UNRELIABLE); /* lve claims to be GPL but upstream won't provide source */ if (strcmp(mod->name, "lve") == 0) - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + add_taint_module(mod, TAINT_PROPRIETARY_MODULE, + LOCKDEP_NOW_UNRELIABLE); #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) @@ -3197,7 +3201,7 @@ again: "%s: module verification failed: signature and/or" " required key missing - tainting kernel\n", mod->name); - add_taint_module(mod, TAINT_FORCED_MODULE); + add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_STILL_OK); } #endif diff --git a/kernel/panic.c b/kernel/panic.c index e1b2822fff97..7c57cc9eee2c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -259,26 +259,19 @@ unsigned long get_taint(void) return tainted_mask; } -void add_taint(unsigned flag) +/** + * add_taint: add a taint flag if not already set. + * @flag: one of the TAINT_* constants. + * @lockdep_ok: whether lock debugging is still OK. + * + * If something bad has gone wrong, you'll want @lockdebug_ok = false, but for + * some notewortht-but-not-corrupting cases, it can be set to true. + */ +void add_taint(unsigned flag, enum lockdep_ok lockdep_ok) { - /* - * Can't trust the integrity of the kernel anymore. - * We don't call directly debug_locks_off() because the issue - * is not necessarily serious enough to set oops_in_progress to 1 - * Also we want to keep up lockdep for staging/out-of-tree - * development and post-warning case. - */ - switch (flag) { - case TAINT_CRAP: - case TAINT_OOT_MODULE: - case TAINT_WARN: - case TAINT_FIRMWARE_WORKAROUND: - break; - - default: - if (__debug_locks_off()) - printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n"); - } + if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off()) + printk(KERN_WARNING + "Disabling lock debugging due to kernel taint\n"); set_bit(flag, &tainted_mask); } @@ -421,7 +414,8 @@ static void warn_slowpath_common(const char *file, int line, void *caller, print_modules(); dump_stack(); print_oops_end_marker(); - add_taint(taint); + /* Just a warning, don't kill lockdep. */ + add_taint(taint, LOCKDEP_STILL_OK); } void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 257002c13bb0..662f3d512183 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2785,7 +2785,7 @@ static noinline void __schedule_bug(struct task_struct *prev) if (irqs_disabled()) print_irqtrace_events(prev); dump_stack(); - add_taint(TAINT_WARN); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c88878db491e..f97f9d75cde8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2006,7 +2006,7 @@ static int proc_taint(struct ctl_table *table, int write, int i; for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) { if ((tmptaint >> i) & 1) - add_taint(i); + add_taint(i, LOCKDEP_STILL_OK); } } diff --git a/lib/bug.c b/lib/bug.c index d0cdf14c651a..168603477f02 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -166,7 +166,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) print_modules(); show_regs(regs); print_oops_end_marker(); - add_taint(BUG_GET_TAINT(bug)); + /* Just a warning, don't kill lockdep. */ + add_taint(BUG_GET_TAINT(bug), LOCKDEP_STILL_OK); return BUG_TRAP_TYPE_WARN; } diff --git a/mm/memory.c b/mm/memory.c index bb1369f7b9b4..bc8bec762db7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -716,7 +716,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, print_symbol(KERN_ALERT "vma->vm_file->f_op->mmap: %s\n", (unsigned long)vma->vm_file->f_op->mmap); dump_stack(); - add_taint(TAINT_BAD_PAGE); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } static inline bool is_cow_mapping(vm_flags_t flags) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index df2022ff0c8a..4c99cb7e276a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -320,7 +320,7 @@ static void bad_page(struct page *page) out: /* Leave bad fields for debug, except PageBuddy could make trouble */ reset_page_mapcount(page); /* remove PageBuddy */ - add_taint(TAINT_BAD_PAGE); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } /* diff --git a/mm/slab.c b/mm/slab.c index e7667a3584bc..856e4a192d25 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -812,7 +812,7 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", function, cachep->name, msg); dump_stack(); - add_taint(TAINT_BAD_PAGE); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } #endif diff --git a/mm/slub.c b/mm/slub.c index ba2ca53f6c3a..7ec3041bdd0d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -562,7 +562,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...) printk(KERN_ERR "----------------------------------------" "-------------------------------------\n\n"); - add_taint(TAINT_BAD_PAGE); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } static void slab_fix(struct kmem_cache *s, char *fmt, ...) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 2370063b5824..4c6526c0db03 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -251,7 +251,7 @@ static ssize_t codec_reg_write_file(struct file *file, return -EINVAL; /* Userspace has been fiddling around behind the kernel's back */ - add_taint(TAINT_USER); + add_taint(TAINT_USER, LOCKDEP_NOW_UNRELIABLE); snd_soc_write(codec, reg, value); return buf_size; -- cgit v1.2.3 From cf4aebc292fac7f34f8345664320e9d4a42ca76c Mon Sep 17 00:00:00 2001 From: Clark Williams Date: Thu, 7 Feb 2013 09:46:59 -0600 Subject: sched: Move sched.h sysctl bits into separate header Move the sysctl-related bits from include/linux/sched.h into a new file: include/linux/sched/sysctl.h. Then update source files requiring access to those bits by including the new header file. Signed-off-by: Clark Williams Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20130207094659.06dced96@riff.lan Signed-off-by: Ingo Molnar --- block/blk-exec.c | 1 + include/linux/sched.h | 91 ----------------------------------------- include/linux/sched/sysctl.h | 97 ++++++++++++++++++++++++++++++++++++++++++++ init/init_task.c | 1 + kernel/hrtimer.c | 1 + kernel/sched/sched.h | 1 + kernel/sysctl.c | 1 + kernel/timer.c | 1 + mm/mmap.c | 1 + mm/mremap.c | 1 + mm/nommu.c | 1 + 11 files changed, 106 insertions(+), 91 deletions(-) create mode 100644 include/linux/sched/sysctl.h (limited to 'kernel/sysctl.c') diff --git a/block/blk-exec.c b/block/blk-exec.c index 74638ec234c8..c88202f973d9 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "blk.h" diff --git a/include/linux/sched.h b/include/linux/sched.h index 719ee0815e3a..8fc9b2710a80 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -304,19 +304,6 @@ static inline void lockup_detector_init(void) } #endif -#ifdef CONFIG_DETECT_HUNG_TASK -extern unsigned int sysctl_hung_task_panic; -extern unsigned long sysctl_hung_task_check_count; -extern unsigned long sysctl_hung_task_timeout_secs; -extern unsigned long sysctl_hung_task_warnings; -extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); -#else -/* Avoid need for ifdefs elsewhere in the code */ -enum { sysctl_hung_task_timeout_secs = 0 }; -#endif - /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) @@ -338,23 +325,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); struct nsproxy; struct user_namespace; -/* - * Default maximum number of active map areas, this limits the number of vmas - * per mm struct. Users can overwrite this number by sysctl but there is a - * problem. - * - * When a program's coredump is generated as ELF format, a section is created - * per a vma. In ELF, the number of sections is represented in unsigned short. - * This means the number of sections should be smaller than 65535 at coredump. - * Because the kernel adds some informative sections to a image of program at - * generating coredump, we need some margin. The number of extra sections is - * 1-3 now and depends on arch. We use "5" as safe margin, here. - */ -#define MAPCOUNT_ELF_CORE_MARGIN (5) -#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) - -extern int sysctl_max_map_count; - #include #ifdef CONFIG_MMU @@ -1221,12 +1191,6 @@ struct sched_rt_entity { #endif }; -/* - * default timeslice is 100 msecs (used only for SCHED_RR tasks). - * Timeslices get refilled after they expire. - */ -#define RR_TIMESLICE (100 * HZ / 1000) - struct rcu_node; enum perf_event_task_context { @@ -2074,58 +2038,7 @@ extern void wake_up_idle_cpu(int cpu); static inline void wake_up_idle_cpu(int cpu) { } #endif -extern unsigned int sysctl_sched_latency; -extern unsigned int sysctl_sched_min_granularity; -extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_child_runs_first; - -enum sched_tunable_scaling { - SCHED_TUNABLESCALING_NONE, - SCHED_TUNABLESCALING_LOG, - SCHED_TUNABLESCALING_LINEAR, - SCHED_TUNABLESCALING_END, -}; -extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; - -extern unsigned int sysctl_numa_balancing_scan_delay; -extern unsigned int sysctl_numa_balancing_scan_period_min; -extern unsigned int sysctl_numa_balancing_scan_period_max; -extern unsigned int sysctl_numa_balancing_scan_period_reset; -extern unsigned int sysctl_numa_balancing_scan_size; -extern unsigned int sysctl_numa_balancing_settle_count; - -#ifdef CONFIG_SCHED_DEBUG -extern unsigned int sysctl_sched_migration_cost; -extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_time_avg; -extern unsigned int sysctl_timer_migration; -extern unsigned int sysctl_sched_shares_window; - -int sched_proc_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, - loff_t *ppos); -#endif -#ifdef CONFIG_SCHED_DEBUG -static inline unsigned int get_sysctl_timer_migration(void) -{ - return sysctl_timer_migration; -} -#else -static inline unsigned int get_sysctl_timer_migration(void) -{ - return 1; -} -#endif -extern unsigned int sysctl_sched_rt_period; -extern int sysctl_sched_rt_runtime; - -int sched_rt_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - #ifdef CONFIG_SCHED_AUTOGROUP -extern unsigned int sysctl_sched_autogroup_enabled; - extern void sched_autogroup_create_attach(struct task_struct *p); extern void sched_autogroup_detach(struct task_struct *p); extern void sched_autogroup_fork(struct signal_struct *sig); @@ -2141,10 +2054,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { } static inline void sched_autogroup_exit(struct signal_struct *sig) { } #endif -#ifdef CONFIG_CFS_BANDWIDTH -extern unsigned int sysctl_sched_cfs_bandwidth_slice; -#endif - #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h new file mode 100644 index 000000000000..bac914e458ca --- /dev/null +++ b/include/linux/sched/sysctl.h @@ -0,0 +1,97 @@ +#ifndef _SCHED_SYSCTL_H +#define _SCHED_SYSCTL_H + +#ifdef CONFIG_DETECT_HUNG_TASK +extern unsigned int sysctl_hung_task_panic; +extern unsigned long sysctl_hung_task_check_count; +extern unsigned long sysctl_hung_task_timeout_secs; +extern unsigned long sysctl_hung_task_warnings; +extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); +#else +/* Avoid need for ifdefs elsewhere in the code */ +enum { sysctl_hung_task_timeout_secs = 0 }; +#endif + +/* + * Default maximum number of active map areas, this limits the number of vmas + * per mm struct. Users can overwrite this number by sysctl but there is a + * problem. + * + * When a program's coredump is generated as ELF format, a section is created + * per a vma. In ELF, the number of sections is represented in unsigned short. + * This means the number of sections should be smaller than 65535 at coredump. + * Because the kernel adds some informative sections to a image of program at + * generating coredump, we need some margin. The number of extra sections is + * 1-3 now and depends on arch. We use "5" as safe margin, here. + */ +#define MAPCOUNT_ELF_CORE_MARGIN (5) +#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) + +extern int sysctl_max_map_count; + +extern unsigned int sysctl_sched_latency; +extern unsigned int sysctl_sched_min_granularity; +extern unsigned int sysctl_sched_wakeup_granularity; +extern unsigned int sysctl_sched_child_runs_first; + +enum sched_tunable_scaling { + SCHED_TUNABLESCALING_NONE, + SCHED_TUNABLESCALING_LOG, + SCHED_TUNABLESCALING_LINEAR, + SCHED_TUNABLESCALING_END, +}; +extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; + +extern unsigned int sysctl_numa_balancing_scan_delay; +extern unsigned int sysctl_numa_balancing_scan_period_min; +extern unsigned int sysctl_numa_balancing_scan_period_max; +extern unsigned int sysctl_numa_balancing_scan_period_reset; +extern unsigned int sysctl_numa_balancing_scan_size; +extern unsigned int sysctl_numa_balancing_settle_count; + +#ifdef CONFIG_SCHED_DEBUG +extern unsigned int sysctl_sched_migration_cost; +extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_time_avg; +extern unsigned int sysctl_timer_migration; +extern unsigned int sysctl_sched_shares_window; + +int sched_proc_update_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, + loff_t *ppos); +#endif +#ifdef CONFIG_SCHED_DEBUG +static inline unsigned int get_sysctl_timer_migration(void) +{ + return sysctl_timer_migration; +} +#else +static inline unsigned int get_sysctl_timer_migration(void) +{ + return 1; +} +#endif +extern unsigned int sysctl_sched_rt_period; +extern int sysctl_sched_rt_runtime; + +#ifdef CONFIG_CFS_BANDWIDTH +extern unsigned int sysctl_sched_cfs_bandwidth_slice; +#endif + +#ifdef CONFIG_SCHED_AUTOGROUP +extern unsigned int sysctl_sched_autogroup_enabled; +#endif + +/* + * default timeslice is 100 msecs (used only for SCHED_RR tasks). + * Timeslices get refilled after they expire. + */ +#define RR_TIMESLICE (100 * HZ / 1000) + +int sched_rt_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + +#endif /* _SCHED_SYSCTL_H */ diff --git a/init/init_task.c b/init/init_task.c index 8b2f3996b035..a031ad14c950 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 6db7a5ed52b5..8a9aa59d0d61 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fc886441436a..ed8de30a040e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c88878db491e..7357e23aaf68 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include diff --git a/kernel/timer.c b/kernel/timer.c index 367d00858482..3e13baf3f0ea 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include diff --git a/mm/mmap.c b/mm/mmap.c index 35730ee9d515..5dee4a0bb49f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include diff --git a/mm/mremap.c b/mm/mremap.c index e1031e1f6a61..f9766f460299 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/mm/nommu.c b/mm/nommu.c index 79c3cac87afa..b20db4e22263 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From ce0dbbbb30aee6a835511d5be446462388ba9eee Mon Sep 17 00:00:00 2001 From: Clark Williams Date: Thu, 7 Feb 2013 09:47:04 -0600 Subject: sched/rt: Add a tuning knob to allow changing SCHED_RR timeslice Add a /proc/sys/kernel scheduler knob named sched_rr_timeslice_ms that allows global changing of the SCHED_RR timeslice value. User visable value is in milliseconds but is stored as jiffies. Setting to 0 (zero) resets to the default (currently 100ms). Signed-off-by: Clark Williams Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20130207094704.13751796@riff.lan Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 15 ++++++++++++++- kernel/sched/core.c | 19 +++++++++++++++++++ kernel/sched/rt.c | 6 ++++-- kernel/sysctl.c | 7 +++++++ 4 files changed, 44 insertions(+), 3 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index bac914e458ca..d2bb0ae979d0 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -73,6 +73,13 @@ static inline unsigned int get_sysctl_timer_migration(void) return 1; } #endif + +/* + * control realtime throttling: + * + * /proc/sys/kernel/sched_rt_period_us + * /proc/sys/kernel/sched_rt_runtime_us + */ extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; @@ -90,7 +97,13 @@ extern unsigned int sysctl_sched_autogroup_enabled; */ #define RR_TIMESLICE (100 * HZ / 1000) -int sched_rt_handler(struct ctl_table *table, int write, +extern int sched_rr_timeslice; + +extern int sched_rr_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + +extern int sched_rt_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1dff78a9e2ab..4a88f1d51563 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7509,6 +7509,25 @@ static int sched_rt_global_constraints(void) } #endif /* CONFIG_RT_GROUP_SCHED */ +int sched_rr_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + static DEFINE_MUTEX(mutex); + + mutex_lock(&mutex); + ret = proc_dointvec(table, write, buffer, lenp, ppos); + /* make sure that internally we keep jiffies */ + /* also, writing zero resets timeslice to default */ + if (!ret && write) { + sched_rr_timeslice = sched_rr_timeslice <= 0 ? + RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice); + } + mutex_unlock(&mutex); + return ret; +} + int sched_rt_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index c25de141c576..fb0f77e402b6 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -7,6 +7,8 @@ #include +int sched_rr_timeslice = RR_TIMESLICE; + static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); struct rt_bandwidth def_rt_bandwidth; @@ -2016,7 +2018,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) if (--p->rt.time_slice) return; - p->rt.time_slice = RR_TIMESLICE; + p->rt.time_slice = sched_rr_timeslice; /* * Requeue to the end of queue if we (and all of our ancestors) are the @@ -2047,7 +2049,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) * Time slice is 0 for SCHED_FIFO tasks */ if (task->policy == SCHED_RR) - return RR_TIMESLICE; + return sched_rr_timeslice; else return 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7357e23aaf68..4fc9be955c71 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -404,6 +404,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sched_rt_handler, }, + { + .procname = "sched_rr_timeslice_ms", + .data = &sched_rr_timeslice, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sched_rr_handler, + }, #ifdef CONFIG_SCHED_AUTOGROUP { .procname = "sched_autogroup_enabled", -- cgit v1.2.3 From bf14e3b979a01cd7298d631736f965fe83c6e2bc Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 18 Jan 2013 15:12:24 +0530 Subject: sysctl: Enable PARISC "unaligned-trap" to be used cross-arch PARISC defines /proc/sys/kernel/unaligned-trap to runtime toggle unaligned access emulation. The exact mechanics of enablig/disabling are still arch specific, we can make the sysctl usable by other arches. Signed-off-by: Vineet Gupta Acked-by: Helge Deller Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: "Eric W. Biederman" Cc: Serge Hallyn --- arch/parisc/Kconfig | 1 + init/Kconfig | 8 ++++++++ kernel/sysctl.c | 5 +++++ 3 files changed, 14 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b77feffbadea..8c7609539717 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -20,6 +20,7 @@ config PARISC select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER + select SYSCTL_ARCH_UNALIGN_ALLOW select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select CLONE_BACKWARDS diff --git a/init/Kconfig b/init/Kconfig index be8b7f55312d..01180da49add 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1232,6 +1232,14 @@ config SYSCTL_EXCEPTION_TRACE help Enable support for /proc/sys/debug/exception-trace. +config SYSCTL_ARCH_UNALIGN_ALLOW + bool + help + Enable support for /proc/sys/kernel/unaligned-trap + Allows arches to define/use @unaligned_enabled to runtime toggle + the unaligned access emulation. + see arch/parisc/kernel/unaligned.c for reference + config KALLSYMS bool "Load all symbols for debugging/ksymoops" if EXPERT default y diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c88878db491e..878b4c41cfb7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -157,6 +157,9 @@ extern int sysctl_tsb_ratio; #ifdef __hppa__ extern int pwrsw_enabled; +#endif + +#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW extern int unaligned_enabled; #endif @@ -545,6 +548,8 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#endif +#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW { .procname = "unaligned-trap", .data = &unaligned_enabled, -- cgit v1.2.3 From 75f7ad8e043d9383337d917584297f7737154bbf Mon Sep 17 00:00:00 2001 From: Paul Szabo Date: Fri, 22 Feb 2013 16:34:42 -0800 Subject: page-writeback.c: subtract min_free_kbytes from dirtyable memory When calculating amount of dirtyable memory, min_free_kbytes should be subtracted because it is not intended for dirty pages. Addresses http://bugs.debian.org/695182 [akpm@linux-foundation.org: fix up min_free_kbytes extern declarations] [akpm@linux-foundation.org: fix min() warning] Signed-off-by: Paul Szabo Acked-by: Rik van Riel Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ kernel/sysctl.c | 1 - mm/huge_memory.c | 1 - mm/page-writeback.c | 3 +++ 4 files changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1d4122bf6f27..437da0ce78c7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1393,6 +1393,9 @@ extern void setup_per_cpu_pageset(void); extern void zone_pcp_update(struct zone *zone); extern void zone_pcp_reset(struct zone *zone); +/* page_alloc.c */ +extern int min_free_kbytes; + /* nommu.c */ extern atomic_long_t mmap_pages_allocated; extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 467d8b923fcd..95e9e55602a8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -105,7 +105,6 @@ extern char core_pattern[]; extern unsigned int core_pipe_limit; #endif extern int pid_max; -extern int min_free_kbytes; extern int pid_max_min, pid_max_max; extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b1cc6591ed83..c63a21d0e991 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -105,7 +105,6 @@ static int set_recommended_min_free_kbytes(void) struct zone *zone; int nr_zones = 0; unsigned long recommended_min; - extern int min_free_kbytes; if (!khugepaged_enabled()) return 0; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7300c9d5e1d9..cdc377c456c0 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -241,6 +241,9 @@ static unsigned long global_dirtyable_memory(void) if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x); + /* Subtract min_free_kbytes */ + x -= min_t(unsigned long, x, min_free_kbytes >> (PAGE_SHIFT - 10)); + return x + 1; /* Ensure that we never return 0 */ } -- cgit v1.2.3 From e579d2c259be42b6f29458327e5153b22414b031 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 27 Feb 2013 17:03:15 -0800 Subject: coredump: remove redundant defines for dumpable states The existing SUID_DUMP_* defines duplicate the newer SUID_DUMPABLE_* defines introduced in 54b501992dd2 ("coredump: warn about unsafe suid_dumpable / core_pattern combo"). Remove the new ones, and use the prior values instead. Signed-off-by: Kees Cook Reported-by: Chen Gang Cc: Alexander Viro Cc: Alan Cox Cc: "Eric W. Biederman" Cc: Doug Ledford Cc: Serge Hallyn Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coredump.c | 2 +- fs/exec.c | 10 +++++----- fs/proc/internal.h | 3 ++- include/linux/sched.h | 5 ----- kernel/sysctl.c | 2 +- 5 files changed, 9 insertions(+), 13 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/fs/coredump.c b/fs/coredump.c index 69baf903d3bd..c6479658d487 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -501,7 +501,7 @@ void do_coredump(siginfo_t *siginfo) * so we dump it as root in mode 2, and only into a controlled * environment (pipe handler or fully qualified path). */ - if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { + if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ diff --git a/fs/exec.c b/fs/exec.c index 864c50df660a..a96a4885bbbf 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1111,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm) current->sas_ss_sp = current->sas_ss_size = 0; if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) - set_dumpable(current->mm, SUID_DUMPABLE_ENABLED); + set_dumpable(current->mm, SUID_DUMP_USER); else set_dumpable(current->mm, suid_dumpable); @@ -1639,17 +1639,17 @@ EXPORT_SYMBOL(set_binfmt); void set_dumpable(struct mm_struct *mm, int value) { switch (value) { - case SUID_DUMPABLE_DISABLED: + case SUID_DUMP_DISABLE: clear_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case SUID_DUMPABLE_ENABLED: + case SUID_DUMP_USER: set_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case SUID_DUMPABLE_SAFE: + case SUID_DUMP_ROOT: set_bit(MMF_DUMP_SECURELY, &mm->flags); smp_wmb(); set_bit(MMF_DUMPABLE, &mm->flags); @@ -1662,7 +1662,7 @@ int __get_dumpable(unsigned long mm_flags) int ret; ret = mm_flags & MMF_DUMPABLE_MASK; - return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret; + return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret; } int get_dumpable(struct mm_struct *mm) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 252544c05207..85ff3a4598b3 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,6 +11,7 @@ #include #include +#include struct ctl_table_header; struct mempolicy; @@ -108,7 +109,7 @@ static inline int task_dumpable(struct task_struct *task) if (mm) dumpable = get_dumpable(mm); task_unlock(task); - if (dumpable == SUID_DUMPABLE_ENABLED) + if (dumpable == SUID_DUMP_USER) return 1; return 0; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 6853bf947fde..d35d2b6ddbfb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -346,11 +346,6 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); -/* get/set_dumpable() values */ -#define SUID_DUMPABLE_DISABLED 0 -#define SUID_DUMPABLE_ENABLED 1 -#define SUID_DUMPABLE_SAFE 2 - /* mm flags */ /* dumpable bits */ #define MMF_DUMPABLE 0 /* core dump is permitted */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d8df00e69c14..d1b4ee67d2df 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2095,7 +2095,7 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, static void validate_coredump_safety(void) { #ifdef CONFIG_COREDUMP - if (suid_dumpable == SUID_DUMPABLE_SAFE && + if (suid_dumpable == SUID_DUMP_ROOT && core_pattern[0] != '/' && core_pattern[0] != '|') { printk(KERN_WARNING "Unsafe core_pattern used with "\ "suid_dumpable=2. Pipe handler or fully qualified "\ -- cgit v1.2.3