diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2020-05-04 16:47:39 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2020-05-04 16:47:39 +1000 |
commit | 03dde276fc6019914699a9537ec2c00c2d2390c4 (patch) | |
tree | e2f62fc3c740841fcde17ef0608123e27faf5a58 | |
parent | 85a4d08e4f8b463723cc17d827e2c5e56e7647ce (diff) | |
parent | 474443f674a3abca9744cb27693aa3c5039b5bb3 (diff) |
Merge branch 'akpm/master'
144 files changed, 1176 insertions, 587 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bac19f6d6da8..5a44c1bf85e7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1505,7 +1505,7 @@ [KNL] Should the hung task detector generate panics. Format: <integer> - A nonzero value instructs the kernel to panic when a + A value of 1 instructs the kernel to panic when a hung task is detected. The default value is controlled by the CONFIG_BOOTPARAM_HUNG_TASK_PANIC build-time option. The value selected by this boot parameter can @@ -4947,6 +4947,15 @@ switches= [HW,M68k] + sysctl.*= [KNL] + Set a sysctl parameter, right before loading the init + process, as if the value was written to the respective + /proc/sys/... file. Both '.' and '/' are recognized as + separators. Unrecognized parameters and invalid values + are reported in the kernel log. Sysctls registered + later by a loaded module cannot be set this way. + Example: sysctl.vm.swappiness=40 + sysfs.deprecated=0|1 [KNL] Enable/disable old style sysfs layout for old udev on older distributions. When this option is enabled diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index a8e1feaec181..065e6d9d39ad 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -301,6 +301,20 @@ Path for the hotplug policy agent. Default value is "``/sbin/hotplug``". +hung_task_all_cpu_backtrace: +================ + +If this option is set, the kernel will send an NMI to all CPUs to dump +their backtraces when a hung task is detected. This file shows up if +CONFIG_DETECT_HUNG_TASK and CONFIG_SMP are enabled. + +0: Won't show all CPUs backtraces when a hung task is detected. +This is the default behavior. + +1: Will non-maskably interrupt all CPUs and dump their backtraces when +a hung task is detected. + + hung_task_panic =============== @@ -570,6 +584,22 @@ rate for each task. scanned for a given scan. +oops_all_cpu_backtrace: +================ + +If this option is set, the kernel will send an NMI to all CPUs to dump +their backtraces when an oops event occurs. It should be used as a last +resort in case a panic cannot be triggered (to protect VMs running, for +example) or kdump can't be collected. This file shows up if CONFIG_SMP +is enabled. + +0: Won't show all CPUs backtraces when an oops is detected. +This is the default behavior. + +1: Will non-maskably interrupt all CPUs and dump their backtraces when +an oops event is detected. + + osrelease, ostype & version =========================== diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 36d42da7466a..c82952e6fb80 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -477,3 +477,4 @@ # 545 reserved for clone3 547 common openat2 sys_openat2 548 common pidfd_getfd sys_pidfd_getfd +549 common process_madvise sys_process_madvise diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index f6b9664ac504..8383ccfaccdc 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -121,10 +121,10 @@ dik_show_code(unsigned int *pc) } static void -dik_show_trace(unsigned long *sp) +dik_show_trace(unsigned long *sp, const char *loglvl) { long i = 0; - printk("Trace:\n"); + printk("%sTrace:\n", loglvl); while (0x1ff8 & (unsigned long) sp) { extern char _stext[], _etext[]; unsigned long tmp = *sp; @@ -133,24 +133,24 @@ dik_show_trace(unsigned long *sp) continue; if (tmp >= (unsigned long) &_etext) continue; - printk("[<%lx>] %pSR\n", tmp, (void *)tmp); + printk("%s[<%lx>] %pSR\n", loglvl, tmp, (void *)tmp); if (i > 40) { - printk(" ..."); + printk("%s ...", loglvl); break; } } - printk("\n"); + printk("%s\n", loglvl); } static int kstack_depth_to_print = 24; -void show_stack(struct task_struct *task, unsigned long *sp) +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { unsigned long *stack; int i; /* - * debugging aid: "show_stack(NULL);" prints the + * debugging aid: "show_stack(NULL, NULL, KERN_EMERG);" prints the * back trace for this cpu. */ if(sp==NULL) @@ -163,14 +163,14 @@ void show_stack(struct task_struct *task, unsigned long *sp) if ((i % 4) == 0) { if (i) pr_cont("\n"); - printk(" "); + printk("%s ", loglvl); } else { pr_cont(" "); } pr_cont("%016lx", *stack++); } pr_cont("\n"); - dik_show_trace(sp); + dik_show_trace(sp, loglvl); } void @@ -184,7 +184,7 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err); dik_show_regs(regs, r9_15); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - dik_show_trace((unsigned long *)(regs+1)); + dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT); dik_show_code((unsigned int *)regs->pc); if (test_and_set_thread_flag (TIF_DIE_IF_KERNEL)) { @@ -625,7 +625,7 @@ got_exception: printk("gp = %016lx sp = %p\n", regs->gp, regs+1); dik_show_code((unsigned int *)pc); - dik_show_trace((unsigned long *)(regs+1)); + dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT); if (test_and_set_thread_flag (TIF_DIE_IF_KERNEL)) { printk("die_if_kernel recursion detected.\n"); diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index 0be19fd1a412..4c453ba96c51 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -13,7 +13,8 @@ struct task_struct; void show_regs(struct pt_regs *regs); -void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs); +void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs, + const char *loglvl); void show_kernel_fault_diag(const char *str, struct pt_regs *regs, unsigned long address); void die(const char *str, struct pt_regs *regs, unsigned long address); diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 1e440bbfa876..feba91c9d969 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -158,9 +158,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, /* Call-back which plugs into unwinding core to dump the stack in * case of panic/OOPs/BUG etc */ -static int __print_sym(unsigned int address, void *unused) +static int __print_sym(unsigned int address, void *arg) { - printk(" %pS\n", (void *)address); + const char *loglvl = arg; + + printk("%s %pS\n", loglvl, (void *)address); return 0; } @@ -217,17 +219,18 @@ static int __get_first_nonsched(unsigned int address, void *unused) *------------------------------------------------------------------------- */ -noinline void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs) +noinline void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs, + const char *loglvl) { - pr_info("\nStack Trace:\n"); - arc_unwind_core(tsk, regs, __print_sym, NULL); + printk("%s\nStack Trace:\n", loglvl); + arc_unwind_core(tsk, regs, __print_sym, (void *)loglvl); } EXPORT_SYMBOL(show_stacktrace); /* Expected by sched Code */ -void show_stack(struct task_struct *tsk, unsigned long *sp) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { - show_stacktrace(tsk, NULL); + show_stacktrace(tsk, NULL, loglvl); } /* Another API expected by schedular, shows up in "ps" as Wait Channel diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index d2999503fb8a..660681101523 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -242,5 +242,5 @@ void show_kernel_fault_diag(const char *str, struct pt_regs *regs, /* Show stack trace if this Fatality happened in kernel mode */ if (!user_mode(regs)) - show_stacktrace(current, regs); + show_stacktrace(current, regs, KERN_DEFAULT); } diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index deef4d0cb3b5..673c7dd75ab9 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -82,7 +82,8 @@ void hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), int sig, int code, const char *name); -extern asmlinkage void c_backtrace(unsigned long fp, int pmode); +extern asmlinkage void c_backtrace(unsigned long fp, int pmode, + const char *loglvl); struct mm_struct; void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr); diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h index 172b08ff3760..987fefb0a4db 100644 --- a/arch/arm/include/asm/traps.h +++ b/arch/arm/include/asm/traps.h @@ -29,7 +29,8 @@ static inline int __in_irqentry_text(unsigned long ptr) } extern void __init early_trap_init(void *); -extern void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame); +extern void dump_backtrace_entry(unsigned long where, unsigned long from, + unsigned long frame, const char *loglvl); extern void ptrace_break(struct pt_regs *regs); extern void *vectors_page; diff --git a/arch/arm/include/asm/unwind.h b/arch/arm/include/asm/unwind.h index 6e282c33126b..0f8a3439902d 100644 --- a/arch/arm/include/asm/unwind.h +++ b/arch/arm/include/asm/unwind.h @@ -36,7 +36,8 @@ extern struct unwind_table *unwind_table_add(unsigned long start, unsigned long text_addr, unsigned long text_size); extern void unwind_table_del(struct unwind_table *tab); -extern void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk); +extern void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl); #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 1e70e7227f0f..09faa0efe47b 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -62,21 +62,24 @@ __setup("user_debug=", user_debug_setup); static void dump_mem(const char *, const char *, unsigned long, unsigned long); -void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame) +void dump_backtrace_entry(unsigned long where, unsigned long from, + unsigned long frame, const char *loglvl) { unsigned long end = frame + 4 + sizeof(struct pt_regs); #ifdef CONFIG_KALLSYMS - printk("[<%08lx>] (%ps) from [<%08lx>] (%pS)\n", where, (void *)where, from, (void *)from); + printk("%s[<%08lx>] (%ps) from [<%08lx>] (%pS)\n", + loglvl, where, (void *)where, from, (void *)from); #else - printk("Function entered at [<%08lx>] from [<%08lx>]\n", where, from); + printk("%sFunction entered at [<%08lx>] from [<%08lx>]\n", + loglvl, where, from); #endif if (in_entry_text(from) && end <= ALIGN(frame, THREAD_SIZE)) - dump_mem("", "Exception stack", frame + 4, end); + dump_mem(loglvl, "Exception stack", frame + 4, end); } -void dump_backtrace_stm(u32 *stack, u32 instruction) +void dump_backtrace_stm(u32 *stack, u32 instruction, const char *loglvl) { char str[80], *p; unsigned int x; @@ -88,12 +91,12 @@ void dump_backtrace_stm(u32 *stack, u32 instruction) if (++x == 6) { x = 0; p = str; - printk("%s\n", str); + printk("%s%s\n", loglvl, str); } } } if (p != str) - printk("%s\n", str); + printk("%s%s\n", loglvl, str); } #ifndef CONFIG_ARM_UNWIND @@ -201,17 +204,19 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } #ifdef CONFIG_ARM_UNWIND -static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl) { - unwind_backtrace(regs, tsk); + unwind_backtrace(regs, tsk, loglvl); } #else -static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl) { unsigned int fp, mode; int ok = 1; - printk("Backtrace: "); + printk("%sBacktrace: ", loglvl); if (!tsk) tsk = current; @@ -238,13 +243,13 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) pr_cont("\n"); if (ok) - c_backtrace(fp, mode); + c_backtrace(fp, mode, loglvl); } #endif -void show_stack(struct task_struct *tsk, unsigned long *sp) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { - dump_backtrace(NULL, tsk); + dump_backtrace(NULL, tsk, loglvl); barrier(); } @@ -288,7 +293,7 @@ static int __die(const char *str, int err, struct pt_regs *regs) if (!user_mode(regs) || in_interrupt()) { dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp, THREAD_SIZE + (unsigned long)task_stack_page(tsk)); - dump_backtrace(regs, tsk); + dump_backtrace(regs, tsk, KERN_EMERG); dump_instr(KERN_EMERG, regs); } @@ -663,10 +668,10 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) if (user_debug & UDBG_SYSCALL) { pr_err("[%d] %s: arm syscall %d\n", task_pid_nr(current), current->comm, no); - dump_instr("", regs); + dump_instr(KERN_ERR, regs); if (user_mode(regs)) { __show_regs(regs); - c_backtrace(frame_pointer(regs), processor_mode(regs)); + c_backtrace(frame_pointer(regs), processor_mode(regs), KERN_ERR); } } #endif diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index 11a964fd66f4..d2bd0df2318d 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -455,7 +455,8 @@ int unwind_frame(struct stackframe *frame) return URC_OK; } -void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk) +void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl) { struct stackframe frame; @@ -493,7 +494,7 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk) urc = unwind_frame(&frame); if (urc < 0) break; - dump_backtrace_entry(where, frame.pc, frame.sp - 4); + dump_backtrace_entry(where, frame.pc, frame.sp - 4, loglvl); } } diff --git a/arch/arm/lib/backtrace-clang.S b/arch/arm/lib/backtrace-clang.S index 2ff375144b55..6174c45f53a5 100644 --- a/arch/arm/lib/backtrace-clang.S +++ b/arch/arm/lib/backtrace-clang.S @@ -17,6 +17,7 @@ #define sv_pc r6 #define mask r7 #define sv_lr r8 +#define loglvl r9 ENTRY(c_backtrace) @@ -99,6 +100,7 @@ ENDPROC(c_backtrace) @ to ensure 8 byte alignment movs frame, r0 @ if frame pointer is zero beq no_frame @ we have no stack frames + mov loglvl, r2 tst r1, #0x10 @ 26 or 32-bit mode? moveq mask, #0xfc000003 movne mask, #0 @ mask for 32-bit @@ -167,6 +169,7 @@ finished_setup: mov r1, sv_lr mov r2, frame bic r1, r1, mask @ mask PC/LR for the mode + mov r3, loglvl bl dump_backtrace_entry /* @@ -183,6 +186,7 @@ finished_setup: ldr r0, [frame] @ locals are stored in @ the preceding frame subeq r0, r0, #4 + mov r2, loglvl bleq dump_backtrace_stm @ dump saved registers /* @@ -196,7 +200,8 @@ finished_setup: bhi for_each_frame 1006: adr r0, .Lbad - mov r1, frame + mov r1, loglvl + mov r2, frame bl printk no_frame: ldmfd sp!, {r4 - r9, fp, pc} ENDPROC(c_backtrace) @@ -209,7 +214,7 @@ ENDPROC(c_backtrace) .long 1005b, 1006b .popsection -.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" +.Lbad: .asciz "%sBacktrace aborted due to bad frame pointer <%p>\n" .align .Lopcode: .word 0xe92d4800 >> 11 @ stmfd sp!, {... fp, lr} .word 0x0b000000 @ bl if these bits are set diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index 582925238d65..872f658638d9 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -18,6 +18,7 @@ #define sv_pc r6 #define mask r7 #define offset r8 +#define loglvl r9 ENTRY(c_backtrace) @@ -25,9 +26,10 @@ ENTRY(c_backtrace) ret lr ENDPROC(c_backtrace) #else - stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location... + stmfd sp!, {r4 - r9, lr} @ Save an extra register so we have a location... movs frame, r0 @ if frame pointer is zero beq no_frame @ we have no stack frames + mov loglvl, r2 tst r1, #0x10 @ 26 or 32-bit mode? ARM( moveq mask, #0xfc000003 ) @@ -73,6 +75,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions ldr r1, [frame, #-4] @ get saved lr mov r2, frame bic r1, r1, mask @ mask PC/LR for the mode + mov r3, loglvl bl dump_backtrace_entry ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists, @@ -80,12 +83,14 @@ for_each_frame: tst frame, mask @ Check for address exceptions teq r3, r1, lsr #11 ldreq r0, [frame, #-8] @ get sp subeq r0, r0, #4 @ point at the last arg + mov r2, loglvl bleq dump_backtrace_stm @ dump saved registers 1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} ldr r3, .Ldsi @ instruction exists, teq r3, r1, lsr #11 subeq r0, frame, #16 + mov r2, loglvl bleq dump_backtrace_stm @ dump saved registers teq sv_fp, #0 @ zero saved fp means @@ -96,9 +101,10 @@ for_each_frame: tst frame, mask @ Check for address exceptions bhi for_each_frame 1006: adr r0, .Lbad - mov r1, frame + mov r1, loglvl + mov r2, frame bl printk -no_frame: ldmfd sp!, {r4 - r8, pc} +no_frame: ldmfd sp!, {r4 - r9, pc} ENDPROC(c_backtrace) .pushsection __ex_table,"a" @@ -109,7 +115,7 @@ ENDPROC(c_backtrace) .long 1004b, 1006b .popsection -.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" +.Lbad: .asciz "%sBacktrace aborted due to bad frame pointer <%p>\n" .align .Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc} .word 0xe92d0000 >> 11 @ stmfd sp!, {} diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 4d1cf74a2caa..54c2719fec46 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -451,3 +451,4 @@ 435 common clone3 sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common process_madvise sys_process_madvise diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 4d9b1f48dc39..fdb913cc0bcb 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -64,7 +64,8 @@ struct stackframe { extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); -extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); +extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl); DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 803039d504de..3b859596840d 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 439 +#define __NR_compat_syscalls 440 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index c1c61635f89c..4958633ea7c2 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -883,6 +883,8 @@ __SYSCALL(__NR_clone3, sys_clone3) __SYSCALL(__NR_openat2, sys_openat2) #define __NR_pidfd_getfd 438 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) +#define __NR_process_madvise 439 +__SYSCALL(__NR_process_madvise, compat_sys_process_madvise) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index eade7807e819..6089638c7d43 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -306,7 +306,7 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { __show_regs(regs); - dump_backtrace(regs, NULL); + dump_backtrace(regs, NULL, KERN_DEFAULT); } static void tls_thread_flush(void) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 10d6451b2776..a4b5292459f3 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -53,9 +53,9 @@ static const char *handler[]= { int show_unhandled_signals = 0; -static void dump_backtrace_entry(unsigned long where) +static void dump_backtrace_entry(unsigned long where, const char *loglvl) { - printk(" %pS\n", (void *)where); + printk("%s %pS\n", loglvl, (void *)where); } static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) @@ -83,7 +83,8 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) printk("%sCode: %s\n", lvl, str); } -void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl) { struct stackframe frame; int skip = 0; @@ -115,11 +116,11 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) thread_saved_pc(tsk)); } - printk("Call trace:\n"); + printk("%sCall trace:\n", loglvl); do { /* skip until specified stack frame */ if (!skip) { - dump_backtrace_entry(frame.pc); + dump_backtrace_entry(frame.pc, loglvl); } else if (frame.fp == regs->regs[29]) { skip = 0; /* @@ -129,16 +130,16 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) * at which an exception has taken place, use regs->pc * instead. */ - dump_backtrace_entry(regs->pc); + dump_backtrace_entry(regs->pc, loglvl); } } while (!unwind_frame(tsk, &frame)); put_task_stack(tsk); } -void show_stack(struct task_struct *tsk, unsigned long *sp) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { - dump_backtrace(NULL, tsk); + dump_backtrace(NULL, tsk, loglvl); barrier(); } diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c index ec61034fdf56..2b9121c755be 100644 --- a/arch/c6x/kernel/traps.c +++ b/arch/c6x/kernel/traps.c @@ -344,12 +344,13 @@ asmlinkage int process_exception(struct pt_regs *regs) static int kstack_depth_to_print = 48; -static void show_trace(unsigned long *stack, unsigned long *endstack) +static void show_trace(unsigned long *stack, unsigned long *endstack, + const char *loglvl) { unsigned long addr; int i; - pr_debug("Call trace:"); + printk("%sCall trace:", loglvl); i = 0; while (stack + 1 <= endstack) { addr = *stack++; @@ -364,16 +365,17 @@ static void show_trace(unsigned long *stack, unsigned long *endstack) if (__kernel_text_address(addr)) { #ifndef CONFIG_KALLSYMS if (i % 5 == 0) - pr_debug("\n "); + printk("%s\n ", loglvl); #endif - pr_debug(" [<%08lx>] %pS\n", addr, (void *)addr); + printk("%s [<%08lx>] %pS\n", loglvl, addr, (void *)addr); i++; } } - pr_debug("\n"); + printk("%s\n", loglvl); } -void show_stack(struct task_struct *task, unsigned long *stack) +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) { unsigned long *p, *endstack; int i; @@ -398,7 +400,7 @@ void show_stack(struct task_struct *task, unsigned long *stack) pr_cont(" %08lx", *p++); } pr_cont("\n"); - show_trace(stack, endstack); + show_trace(stack, endstack, loglvl); } int is_valid_bugaddr(unsigned long addr) diff --git a/arch/csky/kernel/dumpstack.c b/arch/csky/kernel/dumpstack.c index d67f9777cfd9..b9660b1944a1 100644 --- a/arch/csky/kernel/dumpstack.c +++ b/arch/csky/kernel/dumpstack.c @@ -5,7 +5,7 @@ int kstack_depth_to_print = 48; -void show_trace(unsigned long *stack) +static void show_trace(unsigned long *stack, const char *loglvl) { unsigned long *stack_end; unsigned long *stack_start; @@ -17,7 +17,7 @@ void show_trace(unsigned long *stack) stack_end = (unsigned long *) (addr + THREAD_SIZE); fp = stack; - pr_info("\nCall Trace:"); + printk("%s\nCall Trace:", loglvl); while (fp > stack_start && fp < stack_end) { #ifdef CONFIG_STACKTRACE @@ -32,7 +32,8 @@ void show_trace(unsigned long *stack) pr_cont("\n"); } -void show_stack(struct task_struct *task, unsigned long *stack) +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) { if (!stack) { if (task) @@ -45,5 +46,5 @@ void show_stack(struct task_struct *task, unsigned long *stack) #endif } - show_trace(stack); + show_trace(stack, loglvl); } diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c index 5a82230bddf9..bbd801f86eb5 100644 --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@ -344,7 +344,7 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) trace_sys_exit(regs, syscall_get_return_value(current, regs)); } -extern void show_stack(struct task_struct *task, unsigned long *stack); +extern void show_stack(struct task_struct *task, unsigned long *stack, const char *loglvl); void show_regs(struct pt_regs *fp) { unsigned long *sp; @@ -420,6 +420,6 @@ void show_regs(struct pt_regs *fp) } pr_cont("\n"); - show_stack(NULL, (unsigned long *)fp->regs[4]); + show_stack(NULL, (unsigned long *)fp->regs[4], KERN_INFO); return; } diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index e47a9e0dc278..5d8b969cd8f3 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -115,7 +115,7 @@ void die(const char *str, struct pt_regs *fp, unsigned long err) static int kstack_depth_to_print = 24; -void show_stack(struct task_struct *task, unsigned long *esp) +void show_stack(struct task_struct *task, unsigned long *esp, const char *loglvl) { unsigned long *stack, addr; int i; @@ -125,17 +125,17 @@ void show_stack(struct task_struct *task, unsigned long *esp) stack = esp; - pr_info("Stack from %08lx:", (unsigned long)stack); + printk("%sStack from %08lx:", loglvl, (unsigned long)stack); for (i = 0; i < kstack_depth_to_print; i++) { if (((unsigned long)stack & (THREAD_SIZE - 1)) >= THREAD_SIZE-4) break; if (i % 8 == 0) - pr_info(" "); + printk("%s ", loglvl); pr_cont(" %08lx", *stack++); } - pr_info("\nCall Trace:\n"); + printk("%s\nCall Trace:\n", loglvl); i = 0; stack = esp; while (((unsigned long)stack & (THREAD_SIZE - 1)) < THREAD_SIZE-4) { @@ -150,10 +150,10 @@ void show_stack(struct task_struct *task, unsigned long *esp) */ if (check_kernel_text(addr)) { if (i % 4 == 0) - pr_info(" "); + printk("%s ", loglvl); pr_cont(" [<%08lx>]", addr); i++; } } - pr_info("\n"); + printk("%s\n", loglvl); } diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 69c623b14ddd..904134b37232 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -79,7 +79,7 @@ static const char *ex_name(int ex) } static void do_show_stack(struct task_struct *task, unsigned long *fp, - unsigned long ip) + unsigned long ip, const char *loglvl) { int kstack_depth_to_print = 24; unsigned long offset, size; @@ -93,9 +93,8 @@ static void do_show_stack(struct task_struct *task, unsigned long *fp, if (task == NULL) task = current; - printk(KERN_INFO "CPU#%d, %s/%d, Call Trace:\n", - raw_smp_processor_id(), task->comm, - task_pid_nr(task)); + printk("%sCPU#%d, %s/%d, Call Trace:\n", loglvl, raw_smp_processor_id(), + task->comm, task_pid_nr(task)); if (fp == NULL) { if (task == current) { @@ -108,7 +107,7 @@ static void do_show_stack(struct task_struct *task, unsigned long *fp, } if ((((unsigned long) fp) & 0x3) || ((unsigned long) fp < 0x1000)) { - printk(KERN_INFO "-- Corrupt frame pointer %p\n", fp); + printk("%s-- Corrupt frame pointer %p\n", loglvl, fp); return; } @@ -125,8 +124,7 @@ static void do_show_stack(struct task_struct *task, unsigned long *fp, name = kallsyms_lookup(ip, &size, &offset, &modname, tmpstr); - printk(KERN_INFO "[%p] 0x%lx: %s + 0x%lx", fp, ip, name, - offset); + printk("%s[%p] 0x%lx: %s + 0x%lx", loglvl, fp, ip, name, offset); if (((unsigned long) fp < low) || (high < (unsigned long) fp)) printk(KERN_CONT " (FP out of bounds!)"); if (modname) @@ -136,8 +134,7 @@ static void do_show_stack(struct task_struct *task, unsigned long *fp, newfp = (unsigned long *) *fp; if (((unsigned long) newfp) & 0x3) { - printk(KERN_INFO "-- Corrupt frame pointer %p\n", - newfp); + printk("%s-- Corrupt frame pointer %p\n", loglvl, newfp); break; } @@ -147,7 +144,7 @@ static void do_show_stack(struct task_struct *task, unsigned long *fp, + 8); if (regs->syscall_nr != -1) { - printk(KERN_INFO "-- trap0 -- syscall_nr: %ld", + printk("%s-- trap0 -- syscall_nr: %ld", loglvl, regs->syscall_nr); printk(KERN_CONT " psp: %lx elr: %lx\n", pt_psp(regs), pt_elr(regs)); @@ -155,7 +152,7 @@ static void do_show_stack(struct task_struct *task, unsigned long *fp, } else { /* really want to see more ... */ kstack_depth_to_print += 6; - printk(KERN_INFO "-- %s (0x%lx) badva: %lx\n", + printk("%s-- %s (0x%lx) badva: %lx\n", loglvl, ex_name(pt_cause(regs)), pt_cause(regs), pt_badva(regs)); } @@ -178,10 +175,10 @@ static void do_show_stack(struct task_struct *task, unsigned long *fp, } } -void show_stack(struct task_struct *task, unsigned long *fp) +void show_stack(struct task_struct *task, unsigned long *fp, const char *loglvl) { /* Saved link reg is one word above FP */ - do_show_stack(task, fp, 0); + do_show_stack(task, fp, 0, loglvl); } int die(const char *str, struct pt_regs *regs, long err) @@ -207,7 +204,7 @@ int die(const char *str, struct pt_regs *regs, long err) print_modules(); show_regs(regs); - do_show_stack(current, ®s->r30, pt_elr(regs)); + do_show_stack(current, ®s->r30, pt_elr(regs), KERN_EMERG); bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h index 7ff574d56429..b3aa46090101 100644 --- a/arch/ia64/include/asm/ptrace.h +++ b/arch/ia64/include/asm/ptrace.h @@ -114,7 +114,6 @@ static inline long regs_return_value(struct pt_regs *regs) struct task_struct; /* forward decl */ struct unw_frame_info; /* forward decl */ - extern void ia64_do_show_stack (struct unw_frame_info *, void *); extern unsigned long ia64_get_user_rbs_end (struct task_struct *, struct pt_regs *, unsigned long *); extern long ia64_peek (struct task_struct *, struct switch_stack *, unsigned long, diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 6fb54dfa1350..2703f7795672 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1631,7 +1631,7 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi if (read_trylock(&tasklist_lock)) { do_each_thread (g, t) { printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); - show_stack(t, NULL); + show_stack(t, NULL, KERN_DEFAULT); } while_each_thread (g, t); read_unlock(&tasklist_lock); } diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 10cb9382ab76..96dfb9e4b16f 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -64,12 +64,13 @@ EXPORT_SYMBOL(boot_option_idle_override); void (*pm_power_off) (void); EXPORT_SYMBOL(pm_power_off); -void +static void ia64_do_show_stack (struct unw_frame_info *info, void *arg) { unsigned long ip, sp, bsp; + const char *loglvl = arg; - printk("\nCall Trace:\n"); + printk("%s\nCall Trace:\n", loglvl); do { unw_get_ip(info, &ip); if (ip == 0) @@ -77,22 +78,22 @@ ia64_do_show_stack (struct unw_frame_info *info, void *arg) unw_get_sp(info, &sp); unw_get_bsp(info, &bsp); - printk(" [<%016lx>] %pS\n" + printk("%s [<%016lx>] %pS\n" " sp=%016lx bsp=%016lx\n", - ip, (void *)ip, sp, bsp); + loglvl, ip, (void *)ip, sp, bsp); } while (unw_unwind(info) >= 0); } void -show_stack (struct task_struct *task, unsigned long *sp) +show_stack (struct task_struct *task, unsigned long *sp, const char *loglvl) { if (!task) - unw_init_running(ia64_do_show_stack, NULL); + unw_init_running(ia64_do_show_stack, (void *)loglvl); else { struct unw_frame_info info; unw_init_from_blocked_task(&info, task); - ia64_do_show_stack(&info, NULL); + ia64_do_show_stack(&info, (void *)loglvl); } } @@ -150,7 +151,7 @@ show_regs (struct pt_regs *regs) ((i == sof - 1) || (i % 3) == 2) ? "\n" : " "); } } else - show_stack(NULL, NULL); + show_stack(NULL, NULL, KERN_DEFAULT); } /* local support for deprecated console_print */ diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 042911e670b8..9524af1c318c 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -358,3 +358,4 @@ # 435 reserved for clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common process_madvise sys_process_madvise diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index f4f49fcb76d0..8197050c097c 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 435 common clone3 __sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common process_madvise sys_process_madvise diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 344f93d36a9a..df6fc782754f 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -811,13 +811,13 @@ asmlinkage void buserr_c(struct frame *fp) static int kstack_depth_to_print = 48; -void show_trace(unsigned long *stack) +static void show_trace(unsigned long *stack, const char *loglvl) { unsigned long *endstack; unsigned long addr; int i; - pr_info("Call Trace:"); + printk("%sCall Trace:", loglvl); addr = (unsigned long)stack + THREAD_SIZE - 1; endstack = (unsigned long *)(addr & -THREAD_SIZE); i = 0; @@ -916,7 +916,7 @@ void show_registers(struct pt_regs *regs) default: pr_cont("\n"); } - show_stack(NULL, (unsigned long *)addr); + show_stack(NULL, (unsigned long *)addr, KERN_INFO); pr_info("Code:"); set_fs(KERNEL_DS); @@ -935,7 +935,8 @@ void show_registers(struct pt_regs *regs) pr_cont("\n"); } -void show_stack(struct task_struct *task, unsigned long *stack) +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) { unsigned long *p; unsigned long *endstack; @@ -949,7 +950,7 @@ void show_stack(struct task_struct *task, unsigned long *stack) } endstack = (unsigned long *)(((unsigned long)stack + THREAD_SIZE - 1) & -THREAD_SIZE); - pr_info("Stack from %08lx:", (unsigned long)stack); + printk("%sStack from %08lx:", loglvl, (unsigned long)stack); p = stack; for (i = 0; i < kstack_depth_to_print; i++) { if (p + 1 > endstack) @@ -959,7 +960,7 @@ void show_stack(struct task_struct *task, unsigned long *stack) pr_cont(" %08lx", *p++); } pr_cont("\n"); - show_trace(stack); + show_trace(stack, loglvl); } /* diff --git a/arch/microblaze/include/asm/unwind.h b/arch/microblaze/include/asm/unwind.h index c327d673622a..3db81777a887 100644 --- a/arch/microblaze/include/asm/unwind.h +++ b/arch/microblaze/include/asm/unwind.h @@ -20,7 +20,8 @@ extern struct trap_handler_info microblaze_trap_handlers; extern const char _hw_exception_handler; extern const char ex_handler_unhandled; -void microblaze_unwind(struct task_struct *task, struct stack_trace *trace); +void microblaze_unwind(struct task_struct *task, struct stack_trace *trace, + const char *loglvl); #endif /* __MICROBLAZE_UNWIND_H */ diff --git a/arch/microblaze/kernel/stacktrace.c b/arch/microblaze/kernel/stacktrace.c index b4debe283a79..b266c4d6ed9d 100644 --- a/arch/microblaze/kernel/stacktrace.c +++ b/arch/microblaze/kernel/stacktrace.c @@ -20,12 +20,12 @@ void save_stack_trace(struct stack_trace *trace) { /* Exclude our helper functions from the trace*/ trace->skip += 2; - microblaze_unwind(NULL, trace); + microblaze_unwind(NULL, trace, ""); } EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - microblaze_unwind(tsk, trace); + microblaze_unwind(tsk, trace, ""); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 4c67b11f9c9e..c5b6c8afe445 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -443,3 +443,4 @@ 435 common clone3 sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common process_madvise sys_process_madvise diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c index 45bbba9d919f..94b6fe93147d 100644 --- a/arch/microblaze/kernel/traps.c +++ b/arch/microblaze/kernel/traps.c @@ -31,7 +31,7 @@ static int __init kstack_setup(char *s) } __setup("kstack=", kstack_setup); -void show_stack(struct task_struct *task, unsigned long *sp) +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { unsigned long words_to_show; u32 fp = (u32) sp; @@ -50,7 +50,7 @@ void show_stack(struct task_struct *task, unsigned long *sp) if (kstack_depth_to_print && (words_to_show > kstack_depth_to_print)) words_to_show = kstack_depth_to_print; - pr_info("Kernel Stack:\n"); + printk("%sKernel Stack:\n", loglvl); /* * Make the first line an 'odd' size if necessary to get @@ -65,11 +65,11 @@ void show_stack(struct task_struct *task, unsigned long *sp) words_to_show -= line1_words; } } - print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 32, 4, (void *)fp, + print_hex_dump(loglvl, "", DUMP_PREFIX_ADDRESS, 32, 4, (void *)fp, words_to_show << 2, 0); - pr_info("\n\nCall Trace:\n"); - microblaze_unwind(task, NULL); - pr_info("\n"); + printk("%s\n\nCall Trace:\n", loglvl); + microblaze_unwind(task, NULL, loglvl); + printk("%s\n", loglvl); if (!task) task = current; diff --git a/arch/microblaze/kernel/unwind.c b/arch/microblaze/kernel/unwind.c index 34c270cb11fc..778a761af0a7 100644 --- a/arch/microblaze/kernel/unwind.c +++ b/arch/microblaze/kernel/unwind.c @@ -154,7 +154,8 @@ static int lookup_prev_stack_frame(unsigned long fp, unsigned long pc, static void microblaze_unwind_inner(struct task_struct *task, unsigned long pc, unsigned long fp, unsigned long leaf_return, - struct stack_trace *trace); + struct stack_trace *trace, + const char *loglvl); /** * unwind_trap - Unwind through a system trap, that stored previous state @@ -162,16 +163,18 @@ static void microblaze_unwind_inner(struct task_struct *task, */ #ifdef CONFIG_MMU static inline void unwind_trap(struct task_struct *task, unsigned long pc, - unsigned long fp, struct stack_trace *trace) + unsigned long fp, struct stack_trace *trace, + const char *loglvl) { /* To be implemented */ } #else static inline void unwind_trap(struct task_struct *task, unsigned long pc, - unsigned long fp, struct stack_trace *trace) + unsigned long fp, struct stack_trace *trace, + const char *loglvl) { const struct pt_regs *regs = (const struct pt_regs *) fp; - microblaze_unwind_inner(task, regs->pc, regs->r1, regs->r15, trace); + microblaze_unwind_inner(task, regs->pc, regs->r1, regs->r15, trace, loglvl); } #endif @@ -184,11 +187,13 @@ static inline void unwind_trap(struct task_struct *task, unsigned long pc, * the caller's return address. * @trace : Where to store stack backtrace (PC values). * NULL == print backtrace to kernel log + * @loglvl : Used for printk log level if (trace == NULL). */ static void microblaze_unwind_inner(struct task_struct *task, unsigned long pc, unsigned long fp, unsigned long leaf_return, - struct stack_trace *trace) + struct stack_trace *trace, + const char *loglvl) { int ofs = 0; @@ -214,11 +219,11 @@ static void microblaze_unwind_inner(struct task_struct *task, const struct pt_regs *regs = (const struct pt_regs *) fp; #endif - pr_info("HW EXCEPTION\n"); + printk("%sHW EXCEPTION\n", loglvl); #ifndef CONFIG_MMU microblaze_unwind_inner(task, regs->r17 - 4, fp + EX_HANDLER_STACK_SIZ, - regs->r15, trace); + regs->r15, trace, loglvl); #endif return; } @@ -228,8 +233,8 @@ static void microblaze_unwind_inner(struct task_struct *task, if ((return_to >= handler->start_addr) && (return_to <= handler->end_addr)) { if (!trace) - pr_info("%s\n", handler->trap_name); - unwind_trap(task, pc, fp, trace); + printk("%s%s\n", loglvl, handler->trap_name); + unwind_trap(task, pc, fp, trace, loglvl); return; } } @@ -248,13 +253,13 @@ static void microblaze_unwind_inner(struct task_struct *task, } else { /* Have we reached userland? */ if (unlikely(pc == task_pt_regs(task)->pc)) { - pr_info("[<%p>] PID %lu [%s]\n", - (void *) pc, + printk("%s[<%p>] PID %lu [%s]\n", + loglvl, (void *) pc, (unsigned long) task->pid, task->comm); break; } else - print_ip_sym(pc); + print_ip_sym(loglvl, pc); } /* Stop when we reach anything not part of the kernel */ @@ -282,14 +287,16 @@ static void microblaze_unwind_inner(struct task_struct *task, * @task : Task whose stack we are to unwind (NULL == current) * @trace : Where to store stack backtrace (PC values). * NULL == print backtrace to kernel log + * @loglvl : Used for printk log level if (trace == NULL). */ -void microblaze_unwind(struct task_struct *task, struct stack_trace *trace) +void microblaze_unwind(struct task_struct *task, struct stack_trace *trace, + const char *loglvl) { if (task) { if (task == current) { const struct pt_regs *regs = task_pt_regs(task); microblaze_unwind_inner(task, regs->pc, regs->r1, - regs->r15, trace); + regs->r15, trace, loglvl); } else { struct thread_info *thread_info = (struct thread_info *)(task->stack); @@ -299,7 +306,8 @@ void microblaze_unwind(struct task_struct *task, struct stack_trace *trace) microblaze_unwind_inner(task, (unsigned long) &_switch_to, cpu_context->r1, - cpu_context->r15, trace); + cpu_context->r15, + trace, loglvl); } } else { unsigned long pc, fp; @@ -314,7 +322,7 @@ void microblaze_unwind(struct task_struct *task, struct stack_trace *trace) ); /* Since we are not a leaf function, use leaf_return = 0 */ - microblaze_unwind_inner(current, pc, fp, 0, trace); + microblaze_unwind_inner(current, pc, fp, 0, trace, loglvl); } } diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 1f9e8ad636cc..1a20c9fcdcdb 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -376,3 +376,4 @@ 435 n32 clone3 __sys_clone3 437 n32 openat2 sys_openat2 438 n32 pidfd_getfd sys_pidfd_getfd +439 n32 process_madvise compat_sys_process_madvise diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index c0b9d802dbf6..0078f891bb92 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -352,3 +352,4 @@ 435 n64 clone3 __sys_clone3 437 n64 openat2 sys_openat2 438 n64 pidfd_getfd sys_pidfd_getfd +439 n64 process_madvise sys_process_madvise diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 31968cbd6464..4522902228b5 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -106,26 +106,26 @@ void (*board_bind_eic_interrupt)(int irq, int regset); void (*board_ebase_setup)(void); void(*board_cache_error_setup)(void); -static void show_raw_backtrace(unsigned long reg29) +static void show_raw_backtrace(unsigned long reg29, const char *loglvl) { unsigned long *sp = (unsigned long *)(reg29 & ~3); unsigned long addr; - printk("Call Trace:"); + printk("%sCall Trace:", loglvl); #ifdef CONFIG_KALLSYMS - printk("\n"); + printk("%s\n", loglvl); #endif while (!kstack_end(sp)) { unsigned long __user *p = (unsigned long __user *)(unsigned long)sp++; if (__get_user(addr, p)) { - printk(" (Bad stack address)"); + printk("%s (Bad stack address)", loglvl); break; } if (__kernel_text_address(addr)) - print_ip_sym(addr); + print_ip_sym(loglvl, addr); } - printk("\n"); + printk("%s\n", loglvl); } #ifdef CONFIG_KALLSYMS @@ -138,7 +138,8 @@ static int __init set_raw_show_trace(char *str) __setup("raw_show_trace", set_raw_show_trace); #endif -static void show_backtrace(struct task_struct *task, const struct pt_regs *regs) +static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, + const char *loglvl) { unsigned long sp = regs->regs[29]; unsigned long ra = regs->regs[31]; @@ -148,12 +149,12 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs) task = current; if (raw_show_trace || user_mode(regs) || !__kernel_text_address(pc)) { - show_raw_backtrace(sp); + show_raw_backtrace(sp, loglvl); return; } - printk("Call Trace:\n"); + printk("%sCall Trace:\n", loglvl); do { - print_ip_sym(pc); + print_ip_sym(loglvl, pc); pc = unwind_stack(task, &sp, pc, &ra); } while (pc); pr_cont("\n"); @@ -164,19 +165,19 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs) * with at least a bit of error checking ... */ static void show_stacktrace(struct task_struct *task, - const struct pt_regs *regs) + const struct pt_regs *regs, const char *loglvl) { const int field = 2 * sizeof(unsigned long); long stackdata; int i; unsigned long __user *sp = (unsigned long __user *)regs->regs[29]; - printk("Stack :"); + printk("%sStack :", loglvl); i = 0; while ((unsigned long) sp & (PAGE_SIZE - 1)) { if (i && ((i % (64 / field)) == 0)) { pr_cont("\n"); - printk(" "); + printk("%s ", loglvl); } if (i > 39) { pr_cont(" ..."); @@ -192,10 +193,10 @@ static void show_stacktrace(struct task_struct *task, i++; } pr_cont("\n"); - show_backtrace(task, regs); + show_backtrace(task, regs, loglvl); } -void show_stack(struct task_struct *task, unsigned long *sp) +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { struct pt_regs regs; mm_segment_t old_fs = get_fs(); @@ -219,7 +220,7 @@ void show_stack(struct task_struct *task, unsigned long *sp) * the stack in the kernel (not user) address space. */ set_fs(KERNEL_DS); - show_stacktrace(task, ®s); + show_stacktrace(task, ®s, loglvl); set_fs(old_fs); } @@ -371,7 +372,7 @@ void show_registers(struct pt_regs *regs) if (!user_mode(regs)) /* Necessary for getting the correct stack content */ set_fs(KERNEL_DS); - show_stacktrace(current, regs); + show_stacktrace(current, regs, KERN_DEFAULT); show_code((unsigned int __user *) regs->cp0_epc); printk("\n"); set_fs(old_fs); diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index f4d386b52622..6a9772ba7392 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -97,18 +97,19 @@ static void dump_instr(struct pt_regs *regs) } #define LOOP_TIMES (100) -static void __dump(struct task_struct *tsk, unsigned long *base_reg) +static void __dump(struct task_struct *tsk, unsigned long *base_reg, + const char *loglvl) { unsigned long ret_addr; int cnt = LOOP_TIMES, graph = 0; - pr_emerg("Call Trace:\n"); + printk("%sCall Trace:\n", loglvl); if (!IS_ENABLED(CONFIG_FRAME_POINTER)) { while (!kstack_end(base_reg)) { ret_addr = *base_reg++; if (__kernel_text_address(ret_addr)) { ret_addr = ftrace_graph_ret_addr( tsk, &graph, ret_addr, NULL); - print_ip_sym(ret_addr); + print_ip_sym(loglvl, ret_addr); } if (--cnt < 0) break; @@ -124,17 +125,17 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg) ret_addr = ftrace_graph_ret_addr( tsk, &graph, ret_addr, NULL); - print_ip_sym(ret_addr); + print_ip_sym(loglvl, ret_addr); } if (--cnt < 0) break; base_reg = (unsigned long *)next_fp; } } - pr_emerg("\n"); + printk("%s\n", loglvl); } -void show_stack(struct task_struct *tsk, unsigned long *sp) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { unsigned long *base_reg; @@ -151,7 +152,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) else __asm__ __volatile__("\tori\t%0, $fp, #0\n":"=r"(base_reg)); } - __dump(tsk, base_reg); + __dump(tsk, base_reg, loglvl); barrier(); } diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c index 486db793923c..b172da4eb1a9 100644 --- a/arch/nios2/kernel/traps.c +++ b/arch/nios2/kernel/traps.c @@ -52,12 +52,13 @@ void _exception(int signo, struct pt_regs *regs, int code, unsigned long addr) } /* - * The show_stack is an external API which we do not use ourselves. + * The show_stack() is external API which we do not use ourselves. */ int kstack_depth_to_print = 48; -void show_stack(struct task_struct *task, unsigned long *stack) +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) { unsigned long *endstack, addr; int i; @@ -72,16 +73,16 @@ void show_stack(struct task_struct *task, unsigned long *stack) addr = (unsigned long) stack; endstack = (unsigned long *) PAGE_ALIGN(addr); - pr_emerg("Stack from %08lx:", (unsigned long)stack); + printk("%sStack from %08lx:", loglvl, (unsigned long)stack); for (i = 0; i < kstack_depth_to_print; i++) { if (stack + 1 > endstack) break; if (i % 8 == 0) - pr_emerg("\n "); - pr_emerg(" %08lx", *stack++); + printk("%s\n ", loglvl); + printk("%s %08lx", loglvl, *stack++); } - pr_emerg("\nCall Trace:"); + printk("%s\nCall Trace:", loglvl); i = 0; while (stack + 1 <= endstack) { addr = *stack++; @@ -97,11 +98,11 @@ void show_stack(struct task_struct *task, unsigned long *stack) (addr <= (unsigned long) _etext))) { if (i % 4 == 0) pr_emerg("\n "); - pr_emerg(" [<%08lx>]", addr); + printk("%s [<%08lx>]", loglvl, addr); i++; } } - pr_emerg("\n"); + printk("%s\n", loglvl); } void __init trap_init(void) diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index c11aa2e17ce0..3022b0ad142c 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -41,18 +41,20 @@ unsigned long __user *lwa_addr; void print_trace(void *data, unsigned long addr, int reliable) { - pr_emerg("[<%p>] %s%pS\n", (void *) addr, reliable ? "" : "? ", + const char *loglvl = data; + + printk("%s[<%p>] %s%pS\n", loglvl, (void *) addr, reliable ? "" : "? ", (void *) addr); } /* displays a short stack trace */ -void show_stack(struct task_struct *task, unsigned long *esp) +void show_stack(struct task_struct *task, unsigned long *esp, const char *loglvl) { if (esp == NULL) esp = (unsigned long *)&esp; - pr_emerg("Call trace:\n"); - unwind_stack(NULL, esp, print_trace); + printk("%sCall trace:\n", loglvl); + unwind_stack((void *)loglvl, esp, print_trace); } void show_registers(struct pt_regs *regs) @@ -96,7 +98,7 @@ void show_registers(struct pt_regs *regs) if (in_kernel) { printk("\nStack: "); - show_stack(NULL, (unsigned long *)esp); + show_stack(NULL, (unsigned long *)esp, KERN_EMERG); printk("\nCode: "); if (regs->pc < PAGE_OFFSET) diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 52a15f5cd130..3cbf5f5edab5 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -435,3 +435,4 @@ 435 common clone3 sys_clone3_wrapper 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common process_madvise sys_process_madvise compat_sys_process_madvise diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 82fc01189488..0a89899f154a 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -49,7 +49,7 @@ #include "../math-emu/math-emu.h" /* for handle_fpe() */ static void parisc_show_stack(struct task_struct *task, - struct pt_regs *regs); + struct pt_regs *regs, const char *loglvl); static int printbinary(char *buf, unsigned long x, int nbits) { @@ -155,7 +155,7 @@ void show_regs(struct pt_regs *regs) printk("%s IAOQ[1]: %pS\n", level, (void *) regs->iaoq[1]); printk("%s RP(r2): %pS\n", level, (void *) regs->gr[2]); - parisc_show_stack(current, regs); + parisc_show_stack(current, regs, KERN_DEFAULT); } } @@ -170,37 +170,37 @@ static DEFINE_RATELIMIT_STATE(_hppa_rs, } -static void do_show_stack(struct unwind_frame_info *info) +static void do_show_stack(struct unwind_frame_info *info, const char *loglvl) { int i = 1; - printk(KERN_CRIT "Backtrace:\n"); + printk("%sBacktrace:\n", loglvl); while (i <= MAX_UNWIND_ENTRIES) { if (unwind_once(info) < 0 || info->ip == 0) break; if (__kernel_text_address(info->ip)) { - printk(KERN_CRIT " [<" RFMT ">] %pS\n", - info->ip, (void *) info->ip); + printk("%s [<" RFMT ">] %pS\n", + loglvl, info->ip, (void *) info->ip); i++; } } - printk(KERN_CRIT "\n"); + printk("%s\n", loglvl); } static void parisc_show_stack(struct task_struct *task, - struct pt_regs *regs) + struct pt_regs *regs, const char *loglvl) { struct unwind_frame_info info; unwind_frame_init_task(&info, task, regs); - do_show_stack(&info); + do_show_stack(&info, loglvl); } -void show_stack(struct task_struct *t, unsigned long *sp) +void show_stack(struct task_struct *t, unsigned long *sp, const char *loglvl) { - parisc_show_stack(t, NULL); + parisc_show_stack(t, NULL, loglvl); } int is_valid_bugaddr(unsigned long iaoq) @@ -446,7 +446,7 @@ void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long o /* show_stack(NULL, (unsigned long *)regs->gr[30]); */ struct unwind_frame_info info; unwind_frame_init(&info, current, regs); - do_show_stack(&info); + do_show_stack(&info, KERN_CRIT); } printk("\n"); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8479c762aef2..60ff743b2fcf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1445,7 +1445,7 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); #endif - show_stack(current, (unsigned long *) regs->gpr[1]); + show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT); if (!user_mode(regs)) show_instructions(regs); } @@ -2048,7 +2048,8 @@ unsigned long get_wchan(struct task_struct *p) static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; -void show_stack(struct task_struct *tsk, unsigned long *stack) +void show_stack(struct task_struct *tsk, unsigned long *stack, + const char *loglvl) { unsigned long sp, ip, lr, newsp; int count = 0; @@ -2073,7 +2074,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) } lr = 0; - printk("Call Trace:\n"); + printk("%sCall Trace:\n", loglvl); do { if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) break; @@ -2082,7 +2083,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) newsp = stack[0]; ip = stack[STACK_FRAME_LR_SAVE]; if (!firstframe || ip != lr) { - printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); + printk("%s["REG"] ["REG"] %pS", + loglvl, sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER ret_addr = ftrace_graph_ret_addr(current, &ftrace_idx, ip, stack); @@ -2104,8 +2106,9 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); lr = regs->link; - printk("--- interrupt: %lx at %pS\n LR = %pS\n", - regs->trap, (void *)regs->nip, (void *)lr); + printk("%s--- interrupt: %lx at %pS\n LR = %pS\n", + loglvl, regs->trap, + (void *)regs->nip, (void *)lr); firstframe = 1; } diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index c477b8585a29..b6440657ef92 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -260,7 +260,7 @@ static void raise_backtrace_ipi(cpumask_t *mask) pr_cont(" current pointer corrupt? (%px)\n", p->__current); pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1); - show_stack(p->__current, (unsigned long *)p->saved_r1); + show_stack(p->__current, (unsigned long *)p->saved_r1, KERN_WARNING); } } diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 220ae11555f2..03b8bc7707db 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -527,3 +527,4 @@ 435 spu clone3 sys_ni_syscall 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common process_madvise sys_process_madvise compat_sys_process_madvise diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 25db70be4c9c..266a6ca5e15e 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -127,7 +127,7 @@ static void update_csb(struct vas_window *window, return; } - use_mm(window->mm); + kthread_use_mm(window->mm); rc = copy_to_user(csb_addr, &csb, sizeof(csb)); /* * User space polls on csb.flags (first byte). So add barrier @@ -139,7 +139,7 @@ static void update_csb(struct vas_window *window, smp_mb(); rc = copy_to_user(csb_addr, &csb, sizeof(u8)); } - unuse_mm(window->mm); + kthread_unuse_mm(window->mm); put_task_struct(tsk); /* Success */ diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 6c854875ac74..38141a3c70f7 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -99,17 +99,18 @@ static void notrace walk_stackframe(struct task_struct *task, static bool print_trace_address(unsigned long pc, void *arg) { - print_ip_sym(pc); + const char *loglvl = arg; + + print_ip_sym(loglvl, pc); return false; } -void show_stack(struct task_struct *task, unsigned long *sp) +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { pr_cont("Call Trace:\n"); - walk_stackframe(task, NULL, print_trace_address, NULL); + walk_stackframe(task, NULL, print_trace_address, (void *)loglvl); } - static bool save_wchan(unsigned long pc, void *arg) { if (!in_sched_functions(pc)) { diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 2c122d8bab93..0dc4b258b98d 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -126,15 +126,16 @@ unknown: return -EINVAL; } -void show_stack(struct task_struct *task, unsigned long *stack) +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) { struct unwind_state state; - printk("Call Trace:\n"); + printk("%sCall Trace:\n", loglvl); unwind_for_each_frame(&state, task, NULL, (unsigned long) stack) - printk(state.reliable ? " [<%016lx>] %pSR \n" : - "([<%016lx>] %pSR)\n", - state.ip, (void *) state.ip); + printk(state.reliable ? "%s [<%016lx>] %pSR \n" : + "%s([<%016lx>] %pSR)\n", + loglvl, state.ip, (void *) state.ip); debug_show_held_locks(task ? : current); } @@ -175,7 +176,7 @@ void show_regs(struct pt_regs *regs) show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) - show_stack(NULL, (unsigned long *) regs->gprs[15]); + show_stack(NULL, (unsigned long *) regs->gprs[15], KERN_DEFAULT); show_last_breaking_event(regs); } diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index bd7bd3581a0f..69f4a5459f0e 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -440,3 +440,4 @@ 435 common clone3 sys_clone3 sys_clone3 437 common openat2 sys_openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd +439 common process_madvise sys_process_madvise compat_sys_process_madvise diff --git a/arch/sh/include/asm/kdebug.h b/arch/sh/include/asm/kdebug.h index 5212f5fcd752..960545306afa 100644 --- a/arch/sh/include/asm/kdebug.h +++ b/arch/sh/include/asm/kdebug.h @@ -12,7 +12,9 @@ enum die_val { }; /* arch/sh/kernel/dumpstack.c */ -extern void printk_address(unsigned long address, int reliable); -extern void dump_mem(const char *str, unsigned long bottom, unsigned long top); +extern void printk_address(unsigned long address, int reliable, + const char *loglvl); +extern void dump_mem(const char *str, const char *loglvl, + unsigned long bottom, unsigned long top); #endif /* __ASM_SH_KDEBUG_H */ diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h index 0e0ecc0132e3..d44409413418 100644 --- a/arch/sh/include/asm/processor_32.h +++ b/arch/sh/include/asm/processor_32.h @@ -171,7 +171,7 @@ static __inline__ void enable_fpu(void) #define thread_saved_pc(tsk) (tsk->thread.pc) void show_trace(struct task_struct *tsk, unsigned long *sp, - struct pt_regs *regs); + struct pt_regs *regs, const char *loglvl); #ifdef CONFIG_DUMP_CODE void show_code(struct pt_regs *regs); diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index 9f1c9c11d62d..a13c045804ed 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -16,36 +16,37 @@ #include <asm/unwinder.h> #include <asm/stacktrace.h> -void dump_mem(const char *str, unsigned long bottom, unsigned long top) +void dump_mem(const char *str, const char *loglvl, + unsigned long bottom, unsigned long top) { unsigned long p; int i; - printk("%s(0x%08lx to 0x%08lx)\n", str, bottom, top); + printk("%s%s(0x%08lx to 0x%08lx)\n", loglvl, str, bottom, top); for (p = bottom & ~31; p < top; ) { - printk("%04lx: ", p & 0xffff); + printk("%s%04lx: ", loglvl, p & 0xffff); for (i = 0; i < 8; i++, p += 4) { unsigned int val; if (p < bottom || p >= top) - printk(" "); + printk("%s ", loglvl); else { if (__get_user(val, (unsigned int __user *)p)) { - printk("\n"); + printk("%s\n", loglvl); return; } - printk("%08x ", val); + printk("%s%08x ", loglvl, val); } } - printk("\n"); + printk("%s\n", loglvl); } } -void printk_address(unsigned long address, int reliable) +void printk_address(unsigned long address, int reliable, const char *loglvl) { - printk(" [<%p>] %s%pS\n", (void *) address, + printk("%s [<%p>] %s%pS\n", loglvl, (void *) address, reliable ? "" : "? ", (void *) address); } @@ -117,8 +118,7 @@ static int print_trace_stack(void *data, char *name) */ static void print_trace_address(void *data, unsigned long addr, int reliable) { - printk("%s", (char *)data); - printk_address(addr, reliable); + printk_address(addr, reliable, (char *)data); } static const struct stacktrace_ops print_trace_ops = { @@ -127,16 +127,16 @@ static const struct stacktrace_ops print_trace_ops = { }; void show_trace(struct task_struct *tsk, unsigned long *sp, - struct pt_regs *regs) + struct pt_regs *regs, const char *loglvl) { if (regs && user_mode(regs)) return; - printk("\nCall trace:\n"); + printk("%s\nCall trace:\n", loglvl); - unwind_stack(tsk, regs, sp, &print_trace_ops, ""); + unwind_stack(tsk, regs, sp, &print_trace_ops, (void *)loglvl); - printk("\n"); + printk("%s\n", loglvl); if (!tsk) tsk = current; @@ -144,7 +144,7 @@ void show_trace(struct task_struct *tsk, unsigned long *sp, debug_show_held_locks(tsk); } -void show_stack(struct task_struct *tsk, unsigned long *sp) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { unsigned long stack; @@ -156,7 +156,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) sp = (unsigned long *)tsk->thread.sp; stack = (unsigned long)sp; - dump_mem("Stack: ", stack, THREAD_SIZE + + dump_mem("Stack: ", loglvl, stack, THREAD_SIZE + (unsigned long)task_stack_page(tsk)); - show_trace(tsk, sp, NULL); + show_trace(tsk, sp, NULL, loglvl); } diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index a094633874c3..456cc8d171f7 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -59,7 +59,7 @@ void show_regs(struct pt_regs * regs) printk("MACH: %08lx MACL: %08lx GBR : %08lx PR : %08lx\n", regs->mach, regs->macl, regs->gbr, regs->pr); - show_trace(NULL, (unsigned long *)regs->regs[15], regs); + show_trace(NULL, (unsigned long *)regs->regs[15], regs, KERN_DEFAULT); show_code(regs); } diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index c2844a2e18cd..4c2144225c8e 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -275,13 +275,12 @@ void show_regs(struct pt_regs *regs) * If we're in kernel mode, dump the stack too.. */ if (!user_mode(regs)) { - void show_stack(struct task_struct *tsk, unsigned long *sp); unsigned long sp = regs->regs[15] & 0xffffffff; struct task_struct *tsk = get_current(); tsk->thread.kregs = regs; - show_stack(tsk, (unsigned long *)sp); + show_stack(tsk, (unsigned long *)sp, KERN_DEFAULT); } } diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index c7a30fcd135f..e69d98040777 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -440,3 +440,4 @@ # 435 reserved for clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common process_madvise sys_process_madvise diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index 63cf17bc760d..faad65409075 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -38,8 +38,8 @@ void die(const char *str, struct pt_regs *regs, long err) task_pid_nr(current), task_stack_page(current) + 1); if (!user_mode(regs) || in_interrupt()) - dump_mem("Stack: ", regs->regs[15], THREAD_SIZE + - (unsigned long)task_stack_page(current)); + dump_mem("Stack: ", KERN_DEFAULT, regs->regs[15], + THREAD_SIZE + (unsigned long)task_stack_page(current)); notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV); diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 8b3ab65c81c4..1185954bc9d6 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -213,7 +213,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long address) : "paging request", address); pr_alert("PC:"); - printk_address(regs->pc, 1); + printk_address(regs->pc, 1, KERN_ALERT); show_pte(NULL, address); } diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 26cca65e9246..65c0d5207b0c 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -145,10 +145,10 @@ void show_regs(struct pt_regs *r) } /* - * The show_stack is an external API which we do not use ourselves. + * The show_stack() is external API which we do not use ourselves. * The oops is printed in die_if_kernel. */ -void show_stack(struct task_struct *tsk, unsigned long *_ksp) +void show_stack(struct task_struct *tsk, unsigned long *_ksp, const char *loglvl) { unsigned long pc, fp; unsigned long task_base; @@ -170,11 +170,11 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) break; rw = (struct reg_window32 *) fp; pc = rw->ins[7]; - printk("[%08lx : ", pc); - printk("%pS ] ", (void *) pc); + printk("%s[%08lx : ", loglvl, pc); + printk("%s%pS ] ", loglvl, (void *) pc); fp = rw->ins[6]; } while (++count < 16); - printk("\n"); + printk("%s\n", loglvl); } /* diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 4282116e28e7..b2e3fe729b7e 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -195,7 +195,7 @@ void show_regs(struct pt_regs *regs) regs->u_regs[15]); printk("RPC: <%pS>\n", (void *) regs->u_regs[15]); show_regwindow(regs); - show_stack(current, (unsigned long *) regs->u_regs[UREG_FP]); + show_stack(current, (unsigned long *)regs->u_regs[UREG_FP], KERN_DEFAULT); } union global_cpu_snapshot global_cpu_snapshot[NR_CPUS]; diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index f13615ecdecc..bff349fcba0d 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -483,3 +483,4 @@ # 435 reserved for clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common process_madvise sys_process_madvise compat_sys_process_madvise diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 27778b65a965..96d92f107551 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -18,6 +18,7 @@ #include <linux/smp.h> #include <linux/mm.h> #include <linux/init.h> +#include <linux/kallsyms.h> #include <linux/kdebug.h> #include <linux/ftrace.h> #include <linux/reboot.h> @@ -2452,7 +2453,7 @@ static void user_instruction_dump(unsigned int __user *pc) printk("\n"); } -void show_stack(struct task_struct *tsk, unsigned long *_ksp) +void show_stack(struct task_struct *tsk, unsigned long *_ksp, const char *loglvl) { unsigned long fp, ksp; struct thread_info *tp; @@ -2476,7 +2477,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) fp = ksp + STACK_BIAS; - printk("Call Trace:\n"); + printk("%sCall Trace:\n", loglvl); do { struct sparc_stackf *sf; struct pt_regs *regs; @@ -2497,14 +2498,14 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) fp = (unsigned long)sf->fp + STACK_BIAS; } - printk(" [%016lx] %pS\n", pc, (void *) pc); + print_ip_sym(loglvl, pc); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((pc + 8UL) == (unsigned long) &return_to_handler) { struct ftrace_ret_stack *ret_stack; ret_stack = ftrace_graph_get_ret_stack(tsk, graph); if (ret_stack) { pc = ret_stack->ret; - printk(" [%016lx] %pS\n", pc, (void *) pc); + print_ip_sym(loglvl, pc); graph++; } } diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 30575bd92975..a2e680f7d39f 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -648,7 +648,7 @@ static void stack_proc(void *arg) { struct task_struct *task = arg; - show_stack(task, NULL); + show_stack(task, NULL, KERN_INFO); } /* diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index c71b5ef7ea8c..acbc879d2773 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -17,7 +17,9 @@ static void _print_addr(void *data, unsigned long address, int reliable) { - pr_info(" [<%08lx>] %s%pS\n", address, reliable ? "" : "? ", + const char *loglvl = data; + + printk("%s [<%08lx>] %s%pS\n", loglvl, address, reliable ? "" : "? ", (void *)address); } @@ -25,9 +27,9 @@ static const struct stacktrace_ops stackops = { .address = _print_addr }; -void show_stack(struct task_struct *task, unsigned long *stack) +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) { - unsigned long *sp = stack; struct pt_regs *segv_regs = current->thread.segv_regs; int i; @@ -38,20 +40,19 @@ void show_stack(struct task_struct *task, unsigned long *stack) } if (!stack) - sp = get_stack_pointer(task, segv_regs); + stack = get_stack_pointer(task, segv_regs); - pr_info("Stack:\n"); - stack = sp; + printk("%sStack:\n", loglvl); for (i = 0; i < 3 * STACKSLOTS_PER_LINE; i++) { if (kstack_end(stack)) break; if (i && ((i % STACKSLOTS_PER_LINE) == 0)) - pr_cont("\n"); + printk("%s\n", loglvl); pr_cont(" %08lx", *stack++); } - pr_cont("\n"); + printk("%s\n", loglvl); - pr_info("Call Trace:\n"); - dump_trace(current, &stackops, NULL); - pr_info("\n"); + printk("%sCall Trace:\n", loglvl); + dump_trace(current, &stackops, (void *)loglvl); + printk("%s\n", loglvl); } diff --git a/arch/unicore32/kernel/setup.h b/arch/unicore32/kernel/setup.h index e40d3603c7e7..967352323185 100644 --- a/arch/unicore32/kernel/setup.h +++ b/arch/unicore32/kernel/setup.h @@ -29,7 +29,7 @@ extern void kernel_thread_helper(void); extern void __init early_signal_init(void); extern asmlinkage void __backtrace(void); -extern asmlinkage void c_backtrace(unsigned long fp, int pmode); +extern asmlinkage void c_backtrace(unsigned long fp, const char *loglvl); extern void __show_regs(struct pt_regs *); diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index e24f67283864..a3ac01df1a2e 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -135,44 +135,42 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) set_fs(fs); } -static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl) { - unsigned int fp, mode; + unsigned int fp; int ok = 1; - printk(KERN_DEFAULT "Backtrace: "); + printk("%sBacktrace: ", loglvl); if (!tsk) tsk = current; - if (regs) { + if (regs) fp = regs->UCreg_fp; - mode = processor_mode(regs); - } else if (tsk != current) { + else if (tsk != current) fp = thread_saved_fp(tsk); - mode = 0x10; - } else { + else asm("mov %0, fp" : "=r" (fp) : : "cc"); - mode = 0x10; - } if (!fp) { - printk("no frame pointer"); + printk("%sno frame pointer", loglvl); ok = 0; } else if (verify_stack(fp)) { - printk("invalid frame pointer 0x%08x", fp); + printk("%sinvalid frame pointer 0x%08x", loglvl, fp); ok = 0; } else if (fp < (unsigned long)end_of_stack(tsk)) - printk("frame pointer underflow"); - printk("\n"); + printk("%sframe pointer underflow", loglvl); + printk("%s\n", loglvl); if (ok) - c_backtrace(fp, mode); + c_backtrace(fp, loglvl); } -void show_stack(struct task_struct *tsk, unsigned long *sp) +void show_stack(struct task_struct *tsk, unsigned long *sp, + const char *loglvl) { - dump_backtrace(NULL, tsk); + dump_backtrace(NULL, tsk, loglvl); barrier(); } @@ -200,7 +198,7 @@ static int __die(const char *str, int err, struct thread_info *thread, if (!user_mode(regs) || in_interrupt()) { dump_mem(KERN_EMERG, "Stack: ", regs->UCreg_sp, THREAD_SIZE + (unsigned long)task_stack_page(tsk)); - dump_backtrace(regs, tsk); + dump_backtrace(regs, tsk, KERN_EMERG); dump_instr(KERN_EMERG, regs); } diff --git a/arch/unicore32/lib/backtrace.S b/arch/unicore32/lib/backtrace.S index f303671e2a4e..6221944b81f3 100644 --- a/arch/unicore32/lib/backtrace.S +++ b/arch/unicore32/lib/backtrace.S @@ -16,6 +16,7 @@ #define sv_fp v5 #define sv_pc v6 #define offset v8 +#define loglvl v9 ENTRY(__backtrace) mov r0, fp @@ -27,10 +28,11 @@ ENTRY(c_backtrace) ENDPROC(__backtrace) ENDPROC(c_backtrace) #else - stm.w (v4 - v8, lr), [sp-] @ Save an extra register + stm.w (v4 - v10, lr), [sp-] @ Save an extra register @ so we have a location... mov.a frame, r0 @ if frame pointer is zero beq no_frame @ we have no stack frames + mov loglvl, r1 1: stm.w (pc), [sp-] @ calculate offset of PC stored ldw.w r0, [sp]+, #4 @ by stmfd for this CPU @@ -95,9 +97,10 @@ for_each_frame: bua for_each_frame 1006: adr r0, .Lbad - mov r1, frame + mov r1, loglvl + mov r2, frame b.l printk -no_frame: ldm.w (v4 - v8, pc), [sp]+ +no_frame: ldm.w (v4 - v10, pc), [sp]+ ENDPROC(__backtrace) ENDPROC(c_backtrace) @@ -128,8 +131,11 @@ ENDPROC(c_backtrace) add v7, v7, #1 cxor.a v7, #6 cmoveq v7, #1 - cmoveq r1, #'\n' - cmovne r1, #' ' + bne 201f + adr r0, .Lcr + mov r1, loglvl + b.l printk +201: ldw.w r3, [stack]+, #-4 mov r2, reg csub.a r2, #8 @@ -141,18 +147,20 @@ ENDPROC(c_backtrace) add r2, r2, #0x10 @ so r2 need add 16 201: adr r0, .Lfp + mov r1, loglvl b.l printk 2: sub.a reg, reg, #1 bns 1b cxor.a v7, #0 beq 201f adr r0, .Lcr + mov r1, loglvl b.l printk 201: ldm.w (instr, reg, stack, v7, pc), [sp]+ -.Lfp: .asciz "%cr%d:%08x" -.Lcr: .asciz "\n" -.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" +.Lfp: .asciz "%sr%d:%08x " +.Lcr: .asciz "%s\n" +.Lbad: .asciz "%sBacktrace aborted due to bad frame pointer <%p>\n" .align .Ldsi: .word 0x92eec000 >> 14 @ stm.w sp, (... fp, ip, lr, pc) .word 0x92e10000 >> 14 @ stm.w sp, () diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 54581ac671b4..fd8032ddffb2 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -442,3 +442,4 @@ 435 i386 clone3 sys_clone3 437 i386 openat2 sys_openat2 438 i386 pidfd_getfd sys_pidfd_getfd +439 i386 process_madvise sys_process_madvise compat_sys_process_madvise diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 37b844f839bc..3ff4be446922 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -359,6 +359,7 @@ 435 common clone3 sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 64 process_madvise sys_process_madvise # # x32-specific system call numbers start at 512 to avoid cache impact @@ -402,3 +403,4 @@ 545 x32 execveat compat_sys_execveat 546 x32 preadv2 compat_sys_preadv64v2 547 x32 pwritev2 compat_sys_pwritev64v2 +548 x32 process_madvise compat_sys_process_madvise diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 14db05086bbf..5ae5a68e469d 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -87,7 +87,7 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs) } void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, char *log_lvl); + unsigned long *stack, const char *log_lvl); /* The form of the top of the frame on the stack */ struct stack_frame { diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 16133819415c..5f816861f5d2 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -159,7 +159,7 @@ static void dump_leak(void) return; dump = 1; - show_stack(NULL, NULL); + show_stack(NULL, NULL, KERN_ERR); debug_dma_dump_mappings(NULL); } #endif diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ae64ec7f752f..456511b2284e 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -65,7 +65,7 @@ bool in_entry_stack(unsigned long *stack, struct stack_info *info) } static void printk_stack_address(unsigned long address, int reliable, - char *log_lvl) + const char *log_lvl) { touch_nmi_watchdog(); printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); @@ -160,7 +160,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, } void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, char *log_lvl) + unsigned long *stack, const char *log_lvl) { struct unwind_state state; struct stack_info stack_info = {0}; @@ -279,7 +279,8 @@ next: } } -void show_stack(struct task_struct *task, unsigned long *sp) +void show_stack(struct task_struct *task, unsigned long *sp, + const char *loglvl) { task = task ? : current; @@ -290,7 +291,7 @@ void show_stack(struct task_struct *task, unsigned long *sp) if (!sp && task == current) sp = get_stack_pointer(current, NULL); - show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT); + show_trace_log_lvl(task, NULL, sp, loglvl); } void show_stack_regs(struct pt_regs *regs) diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 85a9ab1bc04d..165cae047770 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -408,3 +408,4 @@ 435 common clone3 sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common process_madvise sys_process_madvise diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 0976e27b8d5d..1671fbb816fa 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -479,25 +479,29 @@ void show_regs(struct pt_regs * regs) static int show_trace_cb(struct stackframe *frame, void *data) { + const char *loglvl = data; + if (kernel_text_address(frame->pc)) - pr_cont(" [<%08lx>] %pB\n", frame->pc, (void *)frame->pc); + printk("%s [<%08lx>] %pB\n", + loglvl, frame->pc, (void *)frame->pc); return 0; } -void show_trace(struct task_struct *task, unsigned long *sp) +static void show_trace(struct task_struct *task, unsigned long *sp, + const char *loglvl) { if (!sp) sp = stack_pointer(task); - pr_info("Call Trace:\n"); - walk_stackframe(sp, show_trace_cb, NULL); + printk("%sCall Trace:\n", loglvl); + walk_stackframe(sp, show_trace_cb, (void *)loglvl); } #define STACK_DUMP_ENTRY_SIZE 4 #define STACK_DUMP_LINE_SIZE 32 static size_t kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; -void show_stack(struct task_struct *task, unsigned long *sp) +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { size_t len; @@ -507,11 +511,11 @@ void show_stack(struct task_struct *task, unsigned long *sp) len = min((-(size_t)sp) & (THREAD_SIZE - STACK_DUMP_ENTRY_SIZE), kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE); - pr_info("Stack:\n"); - print_hex_dump(KERN_INFO, " ", DUMP_PREFIX_NONE, + printk("%sStack:\n", loglvl); + print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE, STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE, sp, len, false); - show_trace(task, sp); + show_trace(task, stack, loglvl); } DEFINE_SPINLOCK(die_lock); @@ -530,7 +534,7 @@ void die(const char * str, struct pt_regs * regs, long err) pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter, pr); show_regs(regs); if (!user_mode(regs)) - show_stack(NULL, (unsigned long*)regs->areg[1]); + show_stack(NULL, (unsigned long *)regs->areg[1], KERN_INFO); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die_lock); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index bb98b813554f..9dd85bea4026 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -519,7 +519,7 @@ static void dpm_watchdog_handler(struct timer_list *t) struct dpm_watchdog *wd = from_timer(wd, t, timer); dev_emerg(wd->dev, "**** DPM device timeout ****\n"); - show_stack(wd->tsk, NULL); + show_stack(wd->tsk, NULL, KERN_EMERG); panic("%s %s: unrecoverable failure\n", dev_driver_string(wd->dev), dev_name(wd->dev)); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 3f2b695cf19e..2affc6a2fea8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -27,6 +27,7 @@ #include <linux/types.h> #include <linux/mm.h> +#include <linux/kthread.h> #include <linux/workqueue.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> @@ -195,10 +196,10 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s pagefault_disable(); \ if ((mmptr) == current->mm) { \ valid = !get_user((dst), (wptr)); \ - } else if (current->mm == NULL) { \ - use_mm(mmptr); \ + } else if (current->flags & PF_KTHREAD) { \ + kthread_use_mm(mmptr); \ valid = !get_user((dst), (wptr)); \ - unuse_mm(mmptr); \ + kthread_unuse_mm(mmptr); \ } \ pagefault_enable(); \ } \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 6529caca88fe..35d4a5ab0228 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -22,7 +22,6 @@ #include <linux/module.h> #include <linux/fdtable.h> #include <linux/uaccess.h> -#include <linux/mmu_context.h> #include <linux/firmware.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..bf927f432506 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -19,7 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gc/gc_10_1_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 0b7e78748540..7d01420c0c85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "cikd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index ccd635b812b5..635cd1a26bed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gfx_v8_0.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..c7fd0c47b254 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -19,8 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gc/gc_9_0_offset.h" diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index eee530453aa6..ad8a9df49f29 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -31,7 +31,7 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/mm.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/sched/mm.h> #include <linux/types.h> #include <linux/list.h> diff --git a/drivers/media/platform/sti/delta/delta-ipc.c b/drivers/media/platform/sti/delta/delta-ipc.c index 186d88f02ecd..371429d81ea1 100644 --- a/drivers/media/platform/sti/delta/delta-ipc.c +++ b/drivers/media/platform/sti/delta/delta-ipc.c @@ -175,8 +175,8 @@ int delta_ipc_open(struct delta_ctx *pctx, const char *name, msg.ipc_buf_size = ipc_buf_size; msg.ipc_buf_paddr = ctx->ipc_buf->paddr; - memcpy(msg.name, name, sizeof(msg.name)); - msg.name[sizeof(msg.name) - 1] = 0; + memset(msg.name, 0, sizeof(msg.name)); + strcpy(msg.name, name); msg.param_size = param->size; memcpy(ctx->ipc_buf->vaddr, param->data, msg.param_size); diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 0dc3878794fd..bb958bbb7077 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -233,7 +233,7 @@ static void showacpu(void *dummy) raw_spin_lock_irqsave(&show_lock, flags); pr_info("CPU%d:\n", smp_processor_id()); - show_stack(NULL, NULL); + show_stack(NULL, NULL, KERN_INFO); raw_spin_unlock_irqrestore(&show_lock, flags); } diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 10f01f974f67..12294a122f6e 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -32,7 +32,7 @@ #include <linux/usb/functionfs.h> #include <linux/aio.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/poll.h> #include <linux/eventfd.h> @@ -824,13 +824,9 @@ static void ffs_user_copy_worker(struct work_struct *work) bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD; if (io_data->read && ret > 0) { - mm_segment_t oldfs = get_fs(); - - set_fs(USER_DS); - use_mm(io_data->mm); + kthread_use_mm(io_data->mm); ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data); - unuse_mm(io_data->mm); - set_fs(oldfs); + kthread_unuse_mm(io_data->mm); } io_data->kiocb->ki_complete(io_data->kiocb, ret, ret); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 3afddd3bea6e..9ee0bfe7bcda 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -21,7 +21,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/poll.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/aio.h> #include <linux/uio.h> #include <linux/refcount.h> @@ -462,9 +462,9 @@ static void ep_user_copy_worker(struct work_struct *work) struct kiocb *iocb = priv->iocb; size_t ret; - use_mm(mm); + kthread_use_mm(mm); ret = copy_to_iter(priv->buf, priv->actual, &priv->to); - unuse_mm(mm); + kthread_unuse_mm(mm); if (!ret) ret = -EFAULT; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index cc1d64765ce7..a0c60f895b24 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -27,7 +27,7 @@ #include <linux/iommu.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/rbtree.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> @@ -2333,7 +2333,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, return -EPERM; if (kthread) - use_mm(mm); + kthread_use_mm(mm); else if (current->mm != mm) goto out; @@ -2351,7 +2351,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, *copied = copy_from_user(data, (void __user *)vaddr, count) ? 0 : count; if (kthread) - unuse_mm(mm); + kthread_unuse_mm(mm); out: mmput(mm); return *copied ? 0 : -EFAULT; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 81be9827668a..47060d80cd02 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -14,7 +14,6 @@ #include <linux/vhost.h> #include <linux/uio.h> #include <linux/mm.h> -#include <linux/mmu_context.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/poll.h> @@ -332,10 +331,8 @@ static int vhost_worker(void *data) struct vhost_dev *dev = data; struct vhost_work *work, *work_next; struct llist_node *node; - mm_segment_t oldfs = get_fs(); - set_fs(USER_DS); - use_mm(dev->mm); + kthread_use_mm(dev->mm); for (;;) { /* mb paired w/ kthread_stop */ @@ -363,8 +360,7 @@ static int vhost_worker(void *data) schedule(); } } - unuse_mm(dev->mm); - set_fs(oldfs); + kthread_unuse_mm(dev->mm); return 0; } @@ -27,7 +27,6 @@ #include <linux/file.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/mmu_context.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/timer.h> diff --git a/fs/io-wq.c b/fs/io-wq.c index 4023c9846860..a5e90ac39e4d 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -10,7 +10,6 @@ #include <linux/errno.h> #include <linux/sched/signal.h> #include <linux/mm.h> -#include <linux/mmu_context.h> #include <linux/sched/mm.h> #include <linux/percpu.h> #include <linux/slab.h> @@ -170,8 +169,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) dropped_lock = true; } __set_current_state(TASK_RUNNING); - set_fs(KERNEL_DS); - unuse_mm(worker->mm); + kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; } @@ -418,18 +416,15 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) { if (worker->mm) { - unuse_mm(worker->mm); + kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; } - if (!work->mm) { - set_fs(KERNEL_DS); + if (!work->mm) return; - } + if (mmget_not_zero(work->mm)) { - use_mm(work->mm); - if (!worker->mm) - set_fs(USER_DS); + kthread_use_mm(work->mm); worker->mm = work->mm; /* hang on to this mm */ work->mm = NULL; diff --git a/fs/io_uring.c b/fs/io_uring.c index 0b91b0631173..e5dfbbd2aa34 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -55,7 +55,6 @@ #include <linux/fdtable.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/mmu_context.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/kthread.h> @@ -3268,7 +3267,7 @@ static int io_madvise(struct io_kiocb *req, bool force_nonblock) if (force_nonblock) return -EAGAIN; - ret = do_madvise(ma->addr, ma->len, ma->advice); + ret = do_madvise(NULL, current->mm, ma->addr, ma->len, ma->advice); if (ret < 0) req_set_fail_links(req); io_cqring_add_event(req, ret); @@ -5825,7 +5824,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[req->opcode].needs_mm && !current->mm) { if (unlikely(!mmget_not_zero(ctx->sqo_mm))) return -EFAULT; - use_mm(ctx->sqo_mm); + kthread_use_mm(ctx->sqo_mm); } sqe_flags = READ_ONCE(sqe->flags); @@ -5939,7 +5938,7 @@ static inline void io_sq_thread_drop_mm(struct io_ring_ctx *ctx) struct mm_struct *mm = current->mm; if (mm) { - unuse_mm(mm); + kthread_unuse_mm(mm); mmput(mm); } } @@ -5948,15 +5947,12 @@ static int io_sq_thread(void *data) { struct io_ring_ctx *ctx = data; const struct cred *old_cred; - mm_segment_t old_fs; DEFINE_WAIT(wait); unsigned long timeout; int ret = 0; complete(&ctx->completions[1]); - old_fs = get_fs(); - set_fs(USER_DS); old_cred = override_creds(ctx->creds); timeout = jiffies + ctx->sq_thread_idle; @@ -6059,7 +6055,6 @@ static int io_sq_thread(void *data) if (current->task_works) task_work_run(); - set_fs(old_fs); io_sq_thread_drop_mm(ctx); revert_creds(old_cred); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index df2143e05c57..15030784566c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -14,6 +14,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/bpf-cgroup.h> +#include <linux/mount.h> #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -1703,3 +1704,144 @@ int __init proc_sys_init(void) return sysctl_init(); } + +struct sysctl_alias { + const char *kernel_param; + const char *sysctl_param; +}; + +/* + * Historically some settings had both sysctl and a command line parameter. + * With the generic sysctl. parameter support, we can handle them at a single + * place and only keep the historical name for compatibility. This is not meant + * to add brand new aliases. When adding existing aliases, consider whether + * the possibly different moment of changing the value (e.g. from early_param + * to the moment do_sysctl_args() is called) is an issue for the specific + * parameter. + */ +static const struct sysctl_alias sysctl_aliases[] = { + {"numa_zonelist_order", "vm.numa_zonelist_order" }, + {"hung_task_panic", "kernel.hung_task_panic" }, + { } +}; + +static const char *sysctl_find_alias(char *param) +{ + const struct sysctl_alias *alias; + + for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) { + if (strcmp(alias->kernel_param, param) == 0) + return alias->sysctl_param; + } + + return NULL; +} + +/* Set sysctl value passed on kernel command line. */ +static int process_sysctl_arg(char *param, char *val, + const char *unused, void *arg) +{ + char *path; + struct vfsmount **proc_mnt = arg; + struct file_system_type *proc_fs_type; + struct file *file; + int len; + int err; + loff_t pos = 0; + ssize_t wret; + + if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) { + param += sizeof("sysctl") - 1; + + if (param[0] != '/' && param[0] != '.') + return 0; + + param++; + } else { + param = (char *) sysctl_find_alias(param); + if (!param) + return 0; + } + + /* + * To set sysctl options, we use a temporary mount of proc, look up the + * respective sys/ file and write to it. To avoid mounting it when no + * options were given, we mount it only when the first sysctl option is + * found. Why not a persistent mount? There are problems with a + * persistent mount of proc in that it forces userspace not to use any + * proc mount options. + */ + if (!*proc_mnt) { + proc_fs_type = get_fs_type("proc"); + if (!proc_fs_type) { + pr_err("Failed to find procfs to set sysctl from command line\n"); + return 0; + } + *proc_mnt = kern_mount(proc_fs_type); + put_filesystem(proc_fs_type); + if (IS_ERR(*proc_mnt)) { + pr_err("Failed to mount procfs to set sysctl from command line\n"); + return 0; + } + } + + path = kasprintf(GFP_KERNEL, "sys/%s", param); + if (!path) + panic("%s: Failed to allocate path for %s\n", __func__, param); + strreplace(path, '.', '/'); + + file = file_open_root((*proc_mnt)->mnt_root, *proc_mnt, path, O_WRONLY, 0); + if (IS_ERR(file)) { + err = PTR_ERR(file); + if (err == -ENOENT) + pr_err("Failed to set sysctl parameter '%s=%s': parameter not found\n", + param, val); + else if (err == -EACCES) + pr_err("Failed to set sysctl parameter '%s=%s': permission denied (read-only?)\n", + param, val); + else + pr_err("Error %pe opening proc file to set sysctl parameter '%s=%s'\n", + file, param, val); + goto out; + } + len = strlen(val); + wret = kernel_write(file, val, len, &pos); + if (wret < 0) { + err = wret; + if (err == -EINVAL) + pr_err("Failed to set sysctl parameter '%s=%s': invalid value\n", + param, val); + else + pr_err("Error %pe writing to proc file to set sysctl parameter '%s=%s'\n", + ERR_PTR(err), param, val); + } else if (wret != len) { + pr_err("Wrote only %zd bytes of %d writing to proc file %s to set sysctl parameter '%s=%s\n", + wret, len, path, param, val); + } + + err = filp_close(file, NULL); + if (err) + pr_err("Error %pe closing proc file to set sysctl parameter '%s=%s\n", + ERR_PTR(err), param, val); +out: + kfree(path); + return 0; +} + +void do_sysctl_args(void) +{ + char *command_line; + struct vfsmount *proc_mnt = NULL; + + command_line = kstrdup(saved_command_line, GFP_KERNEL); + if (!command_line) + panic("%s: Failed to allocate copy of command line\n", __func__); + + parse_args("Setting sysctl args", command_line, + NULL, 0, -1, -1, &proc_mnt, process_sysctl_arg); + + if (proc_mnt) + kern_unmount(proc_mnt); + + kfree(command_line); +} diff --git a/include/linux/compat.h b/include/linux/compat.h index 0480ba4db592..19c524513cbb 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -820,6 +820,10 @@ asmlinkage long compat_sys_pwritev64v2(unsigned long fd, unsigned long vlen, loff_t pos, rwf_t flags); #endif +asmlinkage ssize_t compat_sys_process_madvise(compat_int_t which, + compat_pid_t upid, const struct compat_iovec __user *vec, + compat_ulong_t vlen, compat_int_t behavior, + compat_ulong_t flags); /* * Deprecated system calls which are still defined in diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 657a83b943f0..98338dc6b5d2 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -165,9 +165,9 @@ static inline int kallsyms_show_value(void) #endif /*CONFIG_KALLSYMS*/ -static inline void print_ip_sym(unsigned long ip) +static inline void print_ip_sym(const char *loglvl, unsigned long ip) { - printk("[<%px>] %pS\n", (void *) ip, (void *) ip); + printk("%s[<%px>] %pS\n", loglvl, (void *) ip, (void *) ip); } #endif /*_LINUX_KALLSYMS_H*/ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 9b7a8d74a9d6..04a5885cec1b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -520,6 +520,12 @@ static inline u32 int_sqrt64(u64 x) } #endif +#ifdef CONFIG_SMP +extern unsigned int sysctl_oops_all_cpu_backtrace; +#else +#define sysctl_oops_all_cpu_backtrace 0 +#endif /* CONFIG_SMP */ + extern void bust_spinlocks(int yes); extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ extern int panic_timeout; diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8bbcaad7ef0f..12258ea077cf 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -5,6 +5,8 @@ #include <linux/err.h> #include <linux/sched.h> +struct mm_struct; + __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, @@ -198,6 +200,9 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); void kthread_destroy_worker(struct kthread_worker *worker); +void kthread_use_mm(struct mm_struct *mm); +void kthread_unuse_mm(struct mm_struct *mm); + struct cgroup_subsys_state; #ifdef CONFIG_BLK_CGROUP diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2f80fa5e24ad..b67dd43aaa4b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -545,7 +545,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, struct mem_cgroup_per_node *mz; mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return mz->lru_zone_size[zone_idx][lru]; + return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } void mem_cgroup_handle_over_high(void); diff --git a/include/linux/mm.h b/include/linux/mm.h index 7d219d28637c..bb8d3716bfe4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1046,6 +1046,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); static inline enum zone_type page_zonenum(const struct page *page) { + ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } @@ -2516,7 +2517,8 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf, bool downgrade); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); -extern int do_madvise(unsigned long start, size_t len_in, int behavior); +extern int do_madvise(struct task_struct *target_task, struct mm_struct *mm, + unsigned long start, size_t len_in, int behavior); static inline unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index d9a543a9e1cc..c51a84132d7c 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -4,11 +4,6 @@ #include <asm/mmu_context.h> -struct mm_struct; - -void use_mm(struct mm_struct *mm); -void unuse_mm(struct mm_struct *mm); - /* Architectures that care about IRQ state in switch_mm can override this. */ #ifndef switch_mm_irqs_off # define switch_mm_irqs_off switch_mm diff --git a/include/linux/pid.h b/include/linux/pid.h index 176d6cf80e7c..86e0e7cb7872 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -77,6 +77,7 @@ extern const struct file_operations pidfd_fops; struct file; extern struct pid *pidfd_pid(const struct file *file); +struct pid *pidfd_get_pid(unsigned int fd); static inline struct pid *get_pid(struct pid *pid) { diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index 95fb9e025247..00c45a0e6abe 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -30,7 +30,8 @@ extern void show_regs(struct pt_regs *); * task), SP is the stack pointer of the first frame that should be shown in the back * trace (or NULL if the entire call-chain of the task should be shown). */ -extern void show_stack(struct task_struct *task, unsigned long *sp); +extern void show_stack(struct task_struct *task, unsigned long *sp, + const char *loglvl); extern void sched_show_task(struct task_struct *p); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 7b4d3a49b6c5..660ac49f2b53 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -7,6 +7,13 @@ struct ctl_table; #ifdef CONFIG_DETECT_HUNG_TASK + +#ifdef CONFIG_SMP +extern unsigned int sysctl_hung_task_all_cpu_backtrace; +#else +#define sysctl_hung_task_all_cpu_backtrace 0 +#endif /* CONFIG_SMP */ + extern int sysctl_hung_task_check_count; extern unsigned int sysctl_hung_task_panic; extern unsigned long sysctl_hung_task_timeout_secs; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 1815065d52f3..35cf5ecc0f80 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -876,6 +876,9 @@ asmlinkage long sys_munlockall(void); asmlinkage long sys_mincore(unsigned long start, size_t len, unsigned char __user * vec); asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); +asmlinkage long sys_process_madvise(int which, pid_t upid, + const struct iovec __user *vec, unsigned long vlen, + int behavior, unsigned long flags); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index f2401e45a3c2..50bb7f383a1b 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -197,6 +197,7 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); +void do_sysctl_args(void); extern int pwrsw_enabled; extern int unaligned_enabled; @@ -235,6 +236,9 @@ static inline void setup_sysctl_set(struct ctl_table_set *p, { } +static inline void do_sysctl_args(void) +{ +} #endif /* CONFIG_SYSCTL */ int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 3a3201e4618e..7fde54d6a8ac 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -855,9 +855,12 @@ __SYSCALL(__NR_clone3, sys_clone3) __SYSCALL(__NR_openat2, sys_openat2) #define __NR_pidfd_getfd 438 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) +#define __NR_process_madvise 439 +__SC_COMP(__NR_process_madvise, sys_process_madvise, \ + compat_sys_process_madvise) #undef __NR_syscalls -#define __NR_syscalls 439 +#define __NR_syscalls 440 /* * 32 bit systems traditionally used different diff --git a/init/main.c b/init/main.c index 295aec3a1a7a..6e4984316d23 100644 --- a/init/main.c +++ b/init/main.c @@ -1374,6 +1374,8 @@ static int __ref kernel_init(void *unused) rcu_end_inkernel_boot(); + do_sysctl_args(); + if (ramdisk_execute_command) { ret = run_init_process(ramdisk_execute_command); if (!ret) diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 3de0cc780c16..18e03aba2cfc 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -21,17 +21,18 @@ static void kdb_show_stack(struct task_struct *p, void *addr) { - int old_lvl = console_loglevel; - - console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; kdb_trap_printk++; - if (!addr && kdb_task_has_cpu(p)) + if (!addr && kdb_task_has_cpu(p)) { + int old_lvl = console_loglevel; + + console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; kdb_dump_stack_on_cpu(kdb_process_cpu(p)); - else - show_stack(p, addr); + console_loglevel = old_lvl; + } else { + show_stack(p, addr, KERN_EMERG); + } - console_loglevel = old_lvl; kdb_trap_printk--; } diff --git a/kernel/exit.c b/kernel/exit.c index ce2a75bc0ade..e1f73d634d7f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1465,23 +1465,6 @@ end: return retval; } -static struct pid *pidfd_get_pid(unsigned int fd) -{ - struct fd f; - struct pid *pid; - - f = fdget(fd); - if (!f.file) - return ERR_PTR(-EBADF); - - pid = pidfd_pid(f.file); - if (!IS_ERR(pid)) - get_pid(pid); - - fdput(f); - return pid; -} - static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, int options, struct rusage *ru) { diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 69f54848af79..a672db830a94 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -53,9 +53,18 @@ int __read_mostly sysctl_hung_task_warnings = 10; static int __read_mostly did_panic; static bool hung_task_show_lock; static bool hung_task_call_panic; +static bool hung_task_show_all_bt; static struct task_struct *watchdog_task; +#ifdef CONFIG_SMP +/* + * Should we dump all CPUs backtraces in a hung task event? + * Defaults to 0, can be changed via sysctl. + */ +unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace; +#endif /* CONFIG_SMP */ + /* * Should we panic (and reboot, if panic_timeout= is set) when a * hung task is detected: @@ -63,16 +72,6 @@ static struct task_struct *watchdog_task; unsigned int __read_mostly sysctl_hung_task_panic = CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE; -static int __init hung_task_panic_setup(char *str) -{ - int rc = kstrtouint(str, 0, &sysctl_hung_task_panic); - - if (rc) - return rc; - return 1; -} -__setup("hung_task_panic=", hung_task_panic_setup); - static int hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) { @@ -137,6 +136,9 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) " disables this message.\n"); sched_show_task(t); hung_task_show_lock = true; + + if (sysctl_hung_task_all_cpu_backtrace) + hung_task_show_all_bt = true; } touch_nmi_watchdog(); @@ -245,10 +247,14 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) rcu_read_unlock(); if (hung_task_show_lock) debug_show_all_locks(); - if (hung_task_call_panic) { + + if (hung_task_show_all_bt) { + hung_task_show_all_bt = false; trigger_all_cpu_backtrace(); - panic("hung_task: blocked tasks"); } + + if (hung_task_call_panic) + panic("hung_task: blocked tasks"); } static long hung_timeout_jiffies(unsigned long last_checked, diff --git a/kernel/kthread.c b/kernel/kthread.c index bfbfa481be3a..86357cd38eb2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1,13 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only /* Kernel thread helper functions. * Copyright (C) 2004 IBM Corporation, Rusty Russell. + * Copyright (C) 2009 Red Hat, Inc. * * Creation is done via kthreadd, so that we get a clean environment * even if we're invoked from userspace (think modprobe, hotplug cpu, * etc.). */ #include <uapi/linux/sched/types.h> +#include <linux/mm.h> +#include <linux/mmu_context.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/sched/task.h> #include <linux/kthread.h> #include <linux/completion.h> @@ -25,6 +29,7 @@ #include <linux/numa.h> #include <trace/events/sched.h> + static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; @@ -47,6 +52,7 @@ struct kthread { unsigned long flags; unsigned int cpu; void *data; + mm_segment_t oldfs; struct completion parked; struct completion exited; #ifdef CONFIG_BLK_CGROUP @@ -1203,6 +1209,61 @@ void kthread_destroy_worker(struct kthread_worker *worker) } EXPORT_SYMBOL(kthread_destroy_worker); +/** + * kthread_use_mm - make the calling kthread operate on an address space + * @mm: address space to operate on + */ +void kthread_use_mm(struct mm_struct *mm) +{ + struct mm_struct *active_mm; + struct task_struct *tsk = current; + + WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); + WARN_ON_ONCE(tsk->mm); + + task_lock(tsk); + active_mm = tsk->active_mm; + if (active_mm != mm) { + mmgrab(mm); + tsk->active_mm = mm; + } + tsk->mm = mm; + switch_mm(active_mm, mm, tsk); + task_unlock(tsk); +#ifdef finish_arch_post_lock_switch + finish_arch_post_lock_switch(); +#endif + + if (active_mm != mm) + mmdrop(active_mm); + + to_kthread(tsk)->oldfs = get_fs(); + set_fs(USER_DS); +} +EXPORT_SYMBOL_GPL(kthread_use_mm); + +/** + * kthread_unuse_mm - reverse the effect of kthread_use_mm() + * @mm: address space to operate on + */ +void kthread_unuse_mm(struct mm_struct *mm) +{ + struct task_struct *tsk = current; + + WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); + WARN_ON_ONCE(!tsk->mm); + + set_fs(to_kthread(tsk)->oldfs); + + task_lock(tsk); + sync_mm_rss(mm); + tsk->mm = NULL; + /* active_mm is still 'mm' */ + enter_lazy_tlb(mm, tsk); + task_unlock(tsk); +} +EXPORT_SYMBOL_GPL(kthread_unuse_mm); + #ifdef CONFIG_BLK_CGROUP /** * kthread_associate_blkcg - associate blkcg to current kthread diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index cfdff122905b..2fadc2635946 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4399,7 +4399,7 @@ static void print_unlock_imbalance_bug(struct task_struct *curr, curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); pr_cont(") at:\n"); - print_ip_sym(ip); + print_ip_sym(KERN_WARNING, ip); pr_warn("but there are no more locks to release!\n"); pr_warn("\nother info that might help us debug this:\n"); lockdep_print_held_locks(curr); @@ -5050,7 +5050,7 @@ static void print_lock_contention_bug(struct task_struct *curr, curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); pr_cont(") at:\n"); - print_ip_sym(ip); + print_ip_sym(KERN_WARNING, ip); pr_warn("but there are no locks held!\n"); pr_warn("\nother info that might help us debug this:\n"); lockdep_print_held_locks(curr); diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index fd4fe1f5b458..36e69100e8e0 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -125,7 +125,7 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task_pid_nr(task)); - show_stack(task, NULL); + show_stack(task, NULL, KERN_DEFAULT); printk("\n%s/%d's [current] stackdump:\n\n", current->comm, task_pid_nr(current)); dump_stack(); diff --git a/kernel/panic.c b/kernel/panic.c index b69ee9e76cb2..ec6d7d788ce7 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -36,6 +36,14 @@ #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 +#ifdef CONFIG_SMP +/* + * Should we dump all CPUs backtraces in an oops event? + * Defaults to 0, can be changed via sysctl. + */ +unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; +#endif /* CONFIG_SMP */ + int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; static unsigned long tainted_mask = IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0; @@ -515,6 +523,9 @@ void oops_enter(void) /* can't trust the integrity of the kernel anymore: */ debug_locks_off(); do_oops_enter_exit(); + + if (sysctl_oops_all_cpu_backtrace) + trigger_all_cpu_backtrace(); } /* diff --git a/kernel/pid.c b/kernel/pid.c index f1496b757162..3122043fe364 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -518,6 +518,23 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) return idr_get_next(&ns->idr, &nr); } +struct pid *pidfd_get_pid(unsigned int fd) +{ + struct fd f; + struct pid *pid; + + f = fdget(fd); + if (!f.file) + return ERR_PTR(-EBADF); + + pid = pidfd_pid(f.file); + if (!IS_ERR(pid)) + get_pid(pid); + + fdput(f); + return pid; +} + /** * pidfd_create() - Create a new pid file descriptor. * diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 85efa82adbda..e675d8167385 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3906,8 +3906,7 @@ static noinline void __schedule_bug(struct task_struct *prev) if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) && in_atomic_preempt_off()) { pr_err("Preemption disabled at:"); - print_ip_sym(preempt_disable_ip); - pr_cont("\n"); + print_ip_sym(KERN_ERR, preempt_disable_ip); } if (panic_on_warn) panic("scheduling while atomic\n"); @@ -6000,7 +5999,7 @@ void sched_show_task(struct task_struct *p) (unsigned long)task_thread_info(p)->flags); print_worker_info(KERN_INFO, p); - show_stack(p, NULL); + show_stack(p, NULL, KERN_INFO); put_task_stack(p); } EXPORT_SYMBOL_GPL(sched_show_task); @@ -6842,8 +6841,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset) if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) && !preempt_count_equals(preempt_offset)) { pr_err("Preemption disabled at:"); - print_ip_sym(preempt_disable_ip); - pr_cont("\n"); + print_ip_sym(KERN_ERR, preempt_disable_ip); } dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 3b69a560a7ac..16fca3a43633 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -280,6 +280,8 @@ COND_SYSCALL(mlockall); COND_SYSCALL(munlockall); COND_SYSCALL(mincore); COND_SYSCALL(madvise); +COND_SYSCALL(process_madvise); +COND_SYSCALL_COMPAT(process_madvise); COND_SYSCALL(remap_file_pages); COND_SYSCALL(mbind); COND_SYSCALL_COMPAT(mbind); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7adfe5dbce9d..86ed5dd29a61 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2140,6 +2140,17 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_SMP + { + .procname = "oops_all_cpu_backtrace", + .data = &sysctl_oops_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_SMP */ { .procname = "pid_max", .data = &pid_max, @@ -2333,30 +2344,32 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#if defined(CONFIG_X86) + +#if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \ + defined(CONFIG_DEBUG_STACKOVERFLOW) { - .procname = "panic_on_unrecovered_nmi", - .data = &panic_on_unrecovered_nmi, + .procname = "panic_on_stackoverflow", + .data = &sysctl_panic_on_stackoverflow, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, +#endif +#if defined(CONFIG_X86) { - .procname = "panic_on_io_nmi", - .data = &panic_on_io_nmi, + .procname = "panic_on_unrecovered_nmi", + .data = &panic_on_unrecovered_nmi, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, -#ifdef CONFIG_DEBUG_STACKOVERFLOW { - .procname = "panic_on_stackoverflow", - .data = &sysctl_panic_on_stackoverflow, + .procname = "panic_on_io_nmi", + .data = &panic_on_io_nmi, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, -#endif { .procname = "bootloader_type", .data = &bootloader_type, @@ -2425,6 +2438,17 @@ static struct ctl_table kern_table[] = { }, #endif #ifdef CONFIG_DETECT_HUNG_TASK +#ifdef CONFIG_SMP + { + .procname = "hung_task_all_cpu_backtrace", + .data = &sysctl_hung_task_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_SMP */ { .procname = "hung_task_panic", .data = &sysctl_hung_task_panic, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b5765aeea698..7d0ebd104706 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2020,12 +2020,12 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) case -EFAULT: FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on modifying "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); break; case -EINVAL: FTRACE_WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); print_ip_ins(" actual: ", (unsigned char *)ip); pr_cont("\n"); if (ftrace_expected) { @@ -2036,12 +2036,12 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) case -EPERM: FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on writing "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); break; default: FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on unknown error "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); } print_bug_type(); if (rec) { diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 33ffbf308853..a00ee6eedc7c 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -74,7 +74,7 @@ void show_regs_print_info(const char *log_lvl) static void __dump_stack(void) { dump_stack_print_info(KERN_DEFAULT); - show_stack(NULL, NULL); + show_stack(NULL, NULL, KERN_DEFAULT); } /** diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c index 566dad3f4196..84eaae22d3a6 100644 --- a/lib/test_sysctl.c +++ b/lib/test_sysctl.c @@ -44,6 +44,8 @@ struct test_sysctl_data { int int_0002; int int_0003[4]; + int boot_int; + unsigned int uint_0001; char string_0001[65]; @@ -61,6 +63,8 @@ static struct test_sysctl_data test_data = { .int_0003[2] = 2, .int_0003[3] = 3, + .boot_int = 0, + .uint_0001 = 314, .string_0001 = "(none)", @@ -92,6 +96,15 @@ static struct ctl_table test_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "boot_int", + .data = &test_data.boot_int, + .maxlen = sizeof(test_data.boot_int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { .procname = "uint_0001", .data = &test_data.uint_0001, .maxlen = sizeof(unsigned int), diff --git a/mm/Makefile b/mm/Makefile index fa91e963c2f9..6e9d46b2efc9 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -49,7 +49,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ - mm_init.o mmu_context.o percpu.o slab_common.o \ + mm_init.o percpu.o slab_common.o \ compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o \ debug.o gup.o $(mmu-y) diff --git a/mm/filemap.c b/mm/filemap.c index de6d01a519e4..af1c6adad5bd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2372,6 +2372,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct address_space *mapping = file->f_mapping; struct file *fpin = NULL; pgoff_t offset = vmf->pgoff; + unsigned int mmap_miss; /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ) @@ -2387,14 +2388,15 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) } /* Avoid banging the cache line if not needed */ - if (ra->mmap_miss < MMAP_LOTSAMISS * 10) - ra->mmap_miss++; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss < MMAP_LOTSAMISS * 10) + WRITE_ONCE(ra->mmap_miss, ++mmap_miss); /* * Do we miss much more than hit in this file? If so, * stop bothering with read-ahead. It will only hurt. */ - if (ra->mmap_miss > MMAP_LOTSAMISS) + if (mmap_miss > MMAP_LOTSAMISS) return fpin; /* @@ -2420,13 +2422,15 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, struct file_ra_state *ra = &file->f_ra; struct address_space *mapping = file->f_mapping; struct file *fpin = NULL; + unsigned int mmap_miss; pgoff_t offset = vmf->pgoff; /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) return fpin; - if (ra->mmap_miss > 0) - ra->mmap_miss--; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss) + WRITE_ONCE(ra->mmap_miss, --mmap_miss); if (PageReadahead(page)) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); page_cache_async_readahead(mapping, ra, file, @@ -2593,6 +2597,7 @@ void filemap_map_pages(struct vm_fault *vmf, unsigned long max_idx; XA_STATE(xas, &mapping->i_pages, start_pgoff); struct page *page; + unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); rcu_read_lock(); xas_for_each(&xas, page, end_pgoff) { @@ -2629,8 +2634,8 @@ void filemap_map_pages(struct vm_fault *vmf, if (page->index >= max_idx) goto unlock; - if (file->f_ra.mmap_miss > 0) - file->f_ra.mmap_miss--; + if (mmap_miss > 0) + mmap_miss--; vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT; if (vmf->pte) @@ -2650,6 +2655,7 @@ next: break; } rcu_read_unlock(); + WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); } EXPORT_SYMBOL(filemap_map_pages); diff --git a/mm/frontswap.c b/mm/frontswap.c index bfa3a339253e..5c8c66bbedde 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -61,16 +61,16 @@ static u64 frontswap_failed_stores; static u64 frontswap_invalidates; static inline void inc_frontswap_loads(void) { - frontswap_loads++; + data_race(frontswap_loads++); } static inline void inc_frontswap_succ_stores(void) { - frontswap_succ_stores++; + data_race(frontswap_succ_stores++); } static inline void inc_frontswap_failed_stores(void) { - frontswap_failed_stores++; + data_race(frontswap_failed_stores++); } static inline void inc_frontswap_invalidates(void) { - frontswap_invalidates++; + data_race(frontswap_invalidates++); } #else static inline void inc_frontswap_loads(void) { } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e362dc3d2028..5e252d91eb14 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1169,8 +1169,10 @@ static bool update_checksum(struct kmemleak_object *object) u32 old_csum = object->checksum; kasan_disable_current(); + kcsan_disable_current(); object->checksum = crc32(0, (void *)object->pointer, object->size); kasan_enable_current(); + kcsan_enable_current(); return object->checksum != old_csum; } diff --git a/mm/list_lru.c b/mm/list_lru.c index 9222910ab1cb..dbff67afe197 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -180,7 +180,7 @@ unsigned long list_lru_count_one(struct list_lru *lru, rcu_read_lock(); l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); - count = l->nr_items; + count = READ_ONCE(l->nr_items); rcu_read_unlock(); return count; diff --git a/mm/madvise.c b/mm/madvise.c index 8cbd8c1bfe15..d7f4f1dc6076 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -17,17 +17,20 @@ #include <linux/falloc.h> #include <linux/fadvise.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/ksm.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> +#include <linux/compat.h> #include <linux/pagewalk.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/shmem_fs.h> #include <linux/mmu_notifier.h> #include <linux/sched/mm.h> +#include <linux/uio.h> #include <asm/tlb.h> @@ -36,6 +39,7 @@ struct madvise_walk_private { struct mmu_gather *tlb; bool pageout; + struct task_struct *target_task; }; /* @@ -255,6 +259,7 @@ static long madvise_willneed(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { + struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; loff_t offset; @@ -289,12 +294,12 @@ static long madvise_willneed(struct vm_area_struct *vma, */ *prev = NULL; /* tell sys_madvise we drop mmap_sem */ get_file(file); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); offset = (loff_t)(start - vma->vm_start) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED); fput(file); - down_read(¤t->mm->mmap_sem); + down_read(&mm->mmap_sem); return 0; } @@ -315,6 +320,10 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (fatal_signal_pending(current)) return -EINTR; + if (private->target_task && + fatal_signal_pending(private->target_task)) + return -EINTR; + #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (pmd_trans_huge(*pmd)) { pmd_t orig_pmd; @@ -476,12 +485,14 @@ static const struct mm_walk_ops cold_walk_ops = { }; static void madvise_cold_page_range(struct mmu_gather *tlb, + struct task_struct *task, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct madvise_walk_private walk_private = { .pageout = false, .tlb = tlb, + .target_task = task, }; tlb_start_vma(tlb, vma); @@ -489,7 +500,8 @@ static void madvise_cold_page_range(struct mmu_gather *tlb, tlb_end_vma(tlb, vma); } -static long madvise_cold(struct vm_area_struct *vma, +static long madvise_cold(struct task_struct *task, + struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start_addr, unsigned long end_addr) { @@ -502,19 +514,21 @@ static long madvise_cold(struct vm_area_struct *vma, lru_add_drain(); tlb_gather_mmu(&tlb, mm, start_addr, end_addr); - madvise_cold_page_range(&tlb, vma, start_addr, end_addr); + madvise_cold_page_range(&tlb, task, vma, start_addr, end_addr); tlb_finish_mmu(&tlb, start_addr, end_addr); return 0; } static void madvise_pageout_page_range(struct mmu_gather *tlb, + struct task_struct *task, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct madvise_walk_private walk_private = { .pageout = true, .tlb = tlb, + .target_task = task, }; tlb_start_vma(tlb, vma); @@ -538,7 +552,8 @@ static inline bool can_do_pageout(struct vm_area_struct *vma) inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; } -static long madvise_pageout(struct vm_area_struct *vma, +static long madvise_pageout(struct task_struct *task, + struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start_addr, unsigned long end_addr) { @@ -554,7 +569,7 @@ static long madvise_pageout(struct vm_area_struct *vma, lru_add_drain(); tlb_gather_mmu(&tlb, mm, start_addr, end_addr); - madvise_pageout_page_range(&tlb, vma, start_addr, end_addr); + madvise_pageout_page_range(&tlb, task, vma, start_addr, end_addr); tlb_finish_mmu(&tlb, start_addr, end_addr); return 0; @@ -683,7 +698,6 @@ out: if (nr_swap) { if (current->mm == mm) sync_mm_rss(mm); - add_mm_counter(mm, MM_SWAPENTS, nr_swap); } arch_leave_lazy_mmu_mode(); @@ -763,6 +777,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, unsigned long start, unsigned long end, int behavior) { + struct mm_struct *mm = vma->vm_mm; + *prev = vma; if (!can_madv_lru_vma(vma)) return -EINVAL; @@ -770,8 +786,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, if (!userfaultfd_remove(vma, start, end)) { *prev = NULL; /* mmap_sem has been dropped, prev is stale */ - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, start); + down_read(&mm->mmap_sem); + vma = find_vma(mm, start); if (!vma) return -ENOMEM; if (start < vma->vm_start) { @@ -825,6 +841,7 @@ static long madvise_remove(struct vm_area_struct *vma, loff_t offset; int error; struct file *f; + struct mm_struct *mm = vma->vm_mm; *prev = NULL; /* tell sys_madvise we drop mmap_sem */ @@ -852,13 +869,13 @@ static long madvise_remove(struct vm_area_struct *vma, get_file(f); if (userfaultfd_remove(vma, start, end)) { /* mmap_sem was not released by userfaultfd_remove() */ - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); } error = vfs_fallocate(f, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, end - start); fput(f); - down_read(¤t->mm->mmap_sem); + down_read(&mm->mmap_sem); return error; } @@ -932,7 +949,8 @@ static int madvise_inject_error(int behavior, #endif static long -madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, +madvise_vma(struct task_struct *task, struct vm_area_struct *vma, + struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) { switch (behavior) { @@ -941,9 +959,9 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, case MADV_WILLNEED: return madvise_willneed(vma, prev, start, end); case MADV_COLD: - return madvise_cold(vma, prev, start, end); + return madvise_cold(task, vma, prev, start, end); case MADV_PAGEOUT: - return madvise_pageout(vma, prev, start, end); + return madvise_pageout(task, vma, prev, start, end); case MADV_FREE: case MADV_DONTNEED: return madvise_dontneed_free(vma, prev, start, end, behavior); @@ -990,6 +1008,22 @@ madvise_behavior_valid(int behavior) } } +static bool +process_madvise_behavior_valid(int behavior) +{ + switch (behavior) { + case MADV_COLD: + case MADV_PAGEOUT: +#ifdef CONFIG_KSM + case MADV_MERGEABLE: + case MADV_UNMERGEABLE: +#endif + return true; + default: + return false; + } +} + /* * The madvise(2) system call. * @@ -1037,6 +1071,11 @@ madvise_behavior_valid(int behavior) * MADV_DONTDUMP - the application wants to prevent pages in the given range * from being included in its core dump. * MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump. + * MADV_COLD - the application is not expected to use this memory soon, + * deactivate pages in this range so that they can be reclaimed + * easily if memory pressure hanppens. + * MADV_PAGEOUT - the application is not expected to use this memory soon, + * page out the pages in this range immediately. * * return values: * zero - success @@ -1051,7 +1090,8 @@ madvise_behavior_valid(int behavior) * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. */ -int do_madvise(unsigned long start, size_t len_in, int behavior) +int do_madvise(struct task_struct *target_task, struct mm_struct *mm, + unsigned long start, size_t len_in, int behavior) { unsigned long end, tmp; struct vm_area_struct *vma, *prev; @@ -1089,7 +1129,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior) write = madvise_need_mmap_write(behavior); if (write) { - if (down_write_killable(¤t->mm->mmap_sem)) + if (down_write_killable(&mm->mmap_sem)) return -EINTR; /* @@ -1104,12 +1144,12 @@ int do_madvise(unsigned long start, size_t len_in, int behavior) * but for now we have the mmget_still_valid() * model. */ - if (!mmget_still_valid(current->mm)) { - up_write(¤t->mm->mmap_sem); + if (!mmget_still_valid(mm)) { + up_write(&mm->mmap_sem); return -EINTR; } } else { - down_read(¤t->mm->mmap_sem); + down_read(&mm->mmap_sem); } /* @@ -1117,7 +1157,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior) * ranges, just ignore them, but return -ENOMEM at the end. * - different from the way of handling in mlock etc. */ - vma = find_vma_prev(current->mm, start, &prev); + vma = find_vma_prev(mm, start, &prev); if (vma && start > vma->vm_start) prev = vma; @@ -1142,7 +1182,8 @@ int do_madvise(unsigned long start, size_t len_in, int behavior) tmp = end; /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ - error = madvise_vma(vma, &prev, start, tmp, behavior); + error = madvise_vma(target_task, vma, &prev, + start, tmp, behavior); if (error) goto out; start = tmp; @@ -1154,19 +1195,141 @@ int do_madvise(unsigned long start, size_t len_in, int behavior) if (prev) vma = prev->vm_next; else /* madvise_remove dropped mmap_sem */ - vma = find_vma(current->mm, start); + vma = find_vma(mm, start); } out: blk_finish_plug(&plug); if (write) - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); else - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return error; } SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) { - return do_madvise(start, len_in, behavior); + return do_madvise(current, current->mm, start, len_in, behavior); } + +static int process_madvise_vec(struct task_struct *target_task, + struct mm_struct *mm, struct iov_iter *iter, int behavior) +{ + struct iovec iovec; + int ret = 0; + + while (iov_iter_count(iter)) { + iovec = iov_iter_iovec(iter); + ret = do_madvise(target_task, mm, (unsigned long)iovec.iov_base, + iovec.iov_len, behavior); + if (ret < 0) + break; + iov_iter_advance(iter, iovec.iov_len); + } + + return ret; +} + +static ssize_t do_process_madvise(int which, pid_t upid, struct iov_iter *iter, + int behavior, unsigned long flags) +{ + ssize_t ret; + struct pid *pid; + struct task_struct *task; + struct mm_struct *mm; + size_t total_len = iov_iter_count(iter); + + if (flags != 0) + return -EINVAL; + + switch (which) { + case P_PID: + if (upid <= 0) + return -EINVAL; + + pid = find_get_pid(upid); + if (!pid) + return -ESRCH; + break; + case P_PIDFD: + if (upid < 0) + return -EINVAL; + + pid = pidfd_get_pid(upid); + if (IS_ERR(pid)) + return PTR_ERR(pid); + break; + default: + return -EINVAL; + } + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) { + ret = -ESRCH; + goto put_pid; + } + + if (task->mm != current->mm && + !process_madvise_behavior_valid(behavior)) { + ret = -EINVAL; + goto release_task; + } + + mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS); + if (IS_ERR_OR_NULL(mm)) { + ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; + goto release_task; + } + + ret = process_madvise_vec(task, mm, iter, behavior); + if (ret >= 0) + ret = total_len - iov_iter_count(iter); + + mmput(mm); +release_task: + put_task_struct(task); +put_pid: + put_pid(pid); + return ret; +} + +SYSCALL_DEFINE6(process_madvise, int, which, pid_t, upid, + const struct iovec __user *, vec, unsigned long, vlen, + int, behavior, unsigned long, flags) +{ + ssize_t ret; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter iter; + + ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); + if (ret >= 0) { + ret = do_process_madvise(which, upid, &iter, behavior, flags); + kfree(iov); + } + return ret; +} + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE6(process_madvise, compat_int_t, which, + compat_pid_t, upid, + const struct compat_iovec __user *, vec, + compat_ulong_t, vlen, + compat_int_t, behavior, + compat_ulong_t, flags) + +{ + ssize_t ret; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter iter; + + ret = compat_import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), + &iov, &iter); + if (ret >= 0) { + ret = do_process_madvise(which, upid, &iter, behavior, flags); + kfree(iov); + } + return ret; +} +#endif diff --git a/mm/memory.c b/mm/memory.c index 0ef318c39e38..0ad29c7274de 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3123,8 +3123,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!page) { struct swap_info_struct *si = swp_swap_info(entry); - if (si->flags & SWP_SYNCHRONOUS_IO && - __swap_count(entry) == 1) { + if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && + __swap_count(entry) == 1) { /* skip swapcache */ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); diff --git a/mm/mempool.c b/mm/mempool.c index 85efab3da720..79bff63ecf27 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -489,7 +489,7 @@ void mempool_free(void *element, mempool_t *pool) * ensures that there will be frees which return elements to the * pool waking up the waiters. */ - if (unlikely(pool->curr_nr < pool->min_nr)) { + if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) { spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr < pool->min_nr)) { add_element(pool, element); diff --git a/mm/mmap.c b/mm/mmap.c index 82c381fd8daa..fae283e3c6fa 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -62,6 +62,14 @@ #define arch_mmap_check(addr, len, flags) (0) #endif +#ifndef arch_get_mmap_end +#define arch_get_mmap_end(addr) (TASK_SIZE) +#endif + +#ifndef arch_get_mmap_base +#define arch_get_mmap_base(addr, base) (base) +#endif + #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN; const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX; @@ -1369,6 +1377,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long pgoff, unsigned long *populate, struct list_head *uf) { + const unsigned long mmap_end = arch_get_mmap_end(addr); struct mm_struct *mm = current->mm; int pkey = 0; @@ -1391,8 +1400,12 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (flags & MAP_FIXED_NOREPLACE) flags |= MAP_FIXED; - if (!(flags & MAP_FIXED)) + if (flags & MAP_FIXED) { + if ((addr < mmap_min_addr) || (addr > mmap_end)) + return -ENOMEM; + } else { addr = round_hint_to_min(addr); + } /* Careful about overflows.. */ len = PAGE_ALIGN(len); @@ -2089,14 +2102,6 @@ unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info) return addr; } -#ifndef arch_get_mmap_end -#define arch_get_mmap_end(addr) (TASK_SIZE) -#endif - -#ifndef arch_get_mmap_base -#define arch_get_mmap_base(addr, base) (base) -#endif - /* Get an address range which is currently unmapped. * For shmat() with addr=0. * @@ -2208,12 +2213,13 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + const unsigned long mmap_end = arch_get_mmap_end(addr); unsigned long error = arch_mmap_check(addr, len, flags); if (error) return error; /* Careful about overflows.. */ - if (len > TASK_SIZE) + if (len > mmap_end - mmap_min_addr) return -ENOMEM; get_area = current->mm->get_unmapped_area; @@ -2234,7 +2240,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, if (IS_ERR_VALUE(addr)) return addr; - if (addr > TASK_SIZE - len) + if ((addr < mmap_min_addr) || (addr > mmap_end - len)) return -ENOMEM; if (offset_in_page(addr)) return -EINVAL; diff --git a/mm/mmu_context.c b/mm/mmu_context.c deleted file mode 100644 index 3e612ae748e9..000000000000 --- a/mm/mmu_context.c +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (C) 2009 Red Hat, Inc. - * - * See ../COPYING for licensing terms. - */ - -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/sched/mm.h> -#include <linux/sched/task.h> -#include <linux/mmu_context.h> -#include <linux/export.h> - -#include <asm/mmu_context.h> - -/* - * use_mm - * Makes the calling kernel thread take on the specified - * mm context. - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -void use_mm(struct mm_struct *mm) -{ - struct mm_struct *active_mm; - struct task_struct *tsk = current; - - task_lock(tsk); - active_mm = tsk->active_mm; - if (active_mm != mm) { - mmgrab(mm); - tsk->active_mm = mm; - } - tsk->mm = mm; - switch_mm(active_mm, mm, tsk); - task_unlock(tsk); -#ifdef finish_arch_post_lock_switch - finish_arch_post_lock_switch(); -#endif - - if (active_mm != mm) - mmdrop(active_mm); -} -EXPORT_SYMBOL_GPL(use_mm); - -/* - * unuse_mm - * Reverses the effect of use_mm, i.e. releases the - * specified mm context which was earlier taken on - * by the calling kernel thread - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -void unuse_mm(struct mm_struct *mm) -{ - struct task_struct *tsk = current; - - task_lock(tsk); - sync_mm_rss(mm); - tsk->mm = NULL; - /* active_mm is still 'mm' */ - enter_lazy_tlb(mm, tsk); - task_unlock(tsk); -} -EXPORT_SYMBOL_GPL(unuse_mm); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 4daedf7b91f6..463b3d74a64a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -126,7 +126,7 @@ static bool oom_cpuset_eligible(struct task_struct *tsk, struct oom_control *oc) /* * The process p may have detached its own ->mm while exiting or through - * use_mm(), but one or more of its subthreads may still have a valid + * kthread_use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ @@ -919,8 +919,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) continue; } /* - * No use_mm() user needs to read from the userspace so we are - * ok to reap it. + * No kthead_use_mm() user needs to read from the userspace so + * we are ok to reap it. */ if (unlikely(p->flags & PF_KTHREAD)) continue; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 54986a00f0d1..4c5a5630c769 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5527,15 +5527,6 @@ static int __parse_numa_zonelist_order(char *s) return 0; } -static __init int setup_numa_zonelist_order(char *s) -{ - if (!s) - return 0; - - return __parse_numa_zonelist_order(s); -} -early_param("numa_zonelist_order", setup_numa_zonelist_order); - char numa_zonelist_order[] = "Node"; /* diff --git a/mm/page_counter.c b/mm/page_counter.c index c56db2d5e159..e3a205275a0b 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -77,8 +77,8 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) * This is indeed racy, but we can live with some * inaccuracy in the watermark. */ - if (new > c->watermark) - c->watermark = new; + if (new > READ_ONCE(c->watermark)) + WRITE_ONCE(c->watermark, new); } } @@ -119,9 +119,10 @@ bool page_counter_try_charge(struct page_counter *counter, propagate_protected_usage(counter, new); /* * This is racy, but we can live with some - * inaccuracy in the failcnt. + * inaccuracy in the failcnt which is only used + * to report stats. */ - c->failcnt++; + data_race(c->failcnt++); *fail = c; goto failed; } @@ -130,8 +131,8 @@ bool page_counter_try_charge(struct page_counter *counter, * Just like with failcnt, we can live with some * inaccuracy in the watermark. */ - if (new > c->watermark) - c->watermark = new; + if (new > READ_ONCE(c->watermark)) + WRITE_ONCE(c->watermark, new); } return true; diff --git a/mm/page_io.c b/mm/page_io.c index 76965be1d40e..26935db0676c 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -86,7 +86,7 @@ static void swap_slot_free_notify(struct page *page) return; sis = page_swap_info(page); - if (!(sis->flags & SWP_BLKDEV)) + if (data_race(!(sis->flags & SWP_BLKDEV))) return; /* @@ -286,7 +286,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, struct swap_info_struct *sis = page_swap_info(page); VM_BUG_ON_PAGE(!PageSwapCache(page), page); - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct kiocb kiocb; struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; @@ -377,7 +377,7 @@ int swap_readpage(struct page *page, bool synchronous) goto out; } - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; @@ -439,7 +439,7 @@ int swap_set_page_dirty(struct page *page) { struct swap_info_struct *sis = page_swap_info(page); - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct address_space *mapping = sis->swap_file->f_mapping; VM_BUG_ON_PAGE(!PageSwapCache(page), page); diff --git a/mm/rmap.c b/mm/rmap.c index f79a206b271a..2126fd4a254b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -672,7 +672,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) */ void flush_tlb_batched_pending(struct mm_struct *mm) { - if (mm->tlb_flush_batched) { + if (data_race(mm->tlb_flush_batched)) { flush_tlb_mm(mm); /* diff --git a/mm/swap.c b/mm/swap.c index bf9a79fed62d..a37bd7b202ac 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -599,7 +599,8 @@ void lru_add_drain_cpu(int cpu) __pagevec_lru_add(pvec); pvec = &per_cpu(lru_rotate_pvecs, cpu); - if (pagevec_count(pvec)) { + /* Disabling interrupts below acts as a compiler barrier. */ + if (data_race(pagevec_count(pvec))) { unsigned long flags; /* No harm done if a racing interrupt already did this */ @@ -744,7 +745,7 @@ void lru_add_drain_all(void) struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || - pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || + data_race(pagevec_count(&per_cpu(lru_rotate_pvecs, cpu))) || pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) || diff --git a/mm/swap_state.c b/mm/swap_state.c index 8238954ae781..558e224138d1 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -58,8 +58,8 @@ static bool enable_vma_readahead __read_mostly = true; #define GET_SWAP_RA_VAL(vma) \ (atomic_long_read(&(vma)->swap_readahead_info) ? : 4) -#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) -#define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0) +#define INC_CACHE_INFO(x) data_race(swap_cache_info.x++) +#define ADD_CACHE_INFO(x, nr) data_race(swap_cache_info.x += (nr)) static struct { unsigned long add_total; diff --git a/mm/swapfile.c b/mm/swapfile.c index b365cbe99cc3..afd8999071f0 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -671,7 +671,7 @@ static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset, if (offset == si->lowest_bit) si->lowest_bit += nr_entries; if (end == si->highest_bit) - si->highest_bit -= nr_entries; + WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries); si->inuse_pages += nr_entries; if (si->inuse_pages == si->pages) { si->lowest_bit = si->max; @@ -703,7 +703,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, if (end > si->highest_bit) { bool was_full = !si->highest_bit; - si->highest_bit = end; + WRITE_ONCE(si->highest_bit, end); if (was_full && (si->flags & SWP_WRITEOK)) add_to_avail_list(si); } @@ -830,7 +830,7 @@ checks: else goto done; } - si->swap_map[offset] = usage; + WRITE_ONCE(si->swap_map[offset], usage); inc_cluster_info_page(si, si->cluster_info, offset); unlock_cluster(ci); @@ -889,12 +889,13 @@ done: scan: spin_unlock(&si->lock); - while (++offset <= si->highest_bit) { - if (!si->swap_map[offset]) { + while (++offset <= READ_ONCE(si->highest_bit)) { + if (data_race(!si->swap_map[offset])) { spin_lock(&si->lock); goto checks; } - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { + if (vm_swap_full() && + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { spin_lock(&si->lock); goto checks; } @@ -906,11 +907,12 @@ scan: } offset = si->lowest_bit; while (offset < scan_base) { - if (!si->swap_map[offset]) { + if (data_race(!si->swap_map[offset])) { spin_lock(&si->lock); goto checks; } - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { + if (vm_swap_full() && + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { spin_lock(&si->lock); goto checks; } @@ -1109,7 +1111,7 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry) p = swp_swap_info(entry); if (!p) goto bad_nofile; - if (!(p->flags & SWP_USED)) + if (data_race(!(p->flags & SWP_USED))) goto bad_device; offset = swp_offset(entry); if (offset >= p->max) @@ -1135,7 +1137,7 @@ static struct swap_info_struct *_swap_info_get(swp_entry_t entry) p = __swap_info_get(entry); if (!p) goto out; - if (!p->swap_map[swp_offset(entry)]) + if (data_race(!p->swap_map[swp_offset(entry)])) goto bad_free; return p; @@ -1204,7 +1206,10 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, } usage = count | has_cache; - p->swap_map[offset] = usage ? : SWAP_HAS_CACHE; + if (usage) + WRITE_ONCE(p->swap_map[offset], usage); + else + WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE); return usage; } @@ -1256,7 +1261,7 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry) goto bad_nofile; rcu_read_lock(); - if (!(si->flags & SWP_VALID)) + if (data_race(!(si->flags & SWP_VALID))) goto unlock_out; offset = swp_offset(entry); if (offset >= si->max) @@ -3438,7 +3443,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) } else err = -ENOENT; /* unused swap entry */ - p->swap_map[offset] = count | has_cache; + WRITE_ONCE(p->swap_map[offset], count | has_cache); unlock_out: unlock_cluster_or_swap_info(p, ci); diff --git a/mm/util.c b/mm/util.c index 41b47d8cae09..e789461478de 100644 --- a/mm/util.c +++ b/mm/util.c @@ -796,8 +796,12 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { long allowed; - VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) < - -(s64)vm_committed_as_batch * num_online_cpus(), + /* + * A transient decrease in the value is unlikely, so no need + * READ_ONCE() for vm_committed_as.count. + */ + VM_WARN_ONCE(data_race(percpu_counter_read(&vm_committed_as) < + -(s64)vm_committed_as_batch * num_online_cpus()), "memory commitment underflow"); vm_acct_memory(pages); diff --git a/mm/vmacache.c b/mm/vmacache.c index cdc32a3b02fa..ceedbab82106 100644 --- a/mm/vmacache.c +++ b/mm/vmacache.c @@ -25,8 +25,8 @@ * task's vmacache pertains to a different mm (ie, its own). There is * nothing we can do here. * - * Also handle the case where a kernel thread has adopted this mm via use_mm(). - * That kernel thread's vmacache is not applicable to this mm. + * Also handle the case where a kernel thread has adopted this mm via + * kthread_use_mm(). That kernel thread's vmacache is not applicable to this mm. */ static inline bool vmacache_valid_mm(struct mm_struct *mm) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8c1250103959..e2939ce39025 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1734,14 +1734,48 @@ int tcp_mmap(struct file *file, struct socket *sock, } EXPORT_SYMBOL(tcp_mmap); +static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, + struct page **pages, + unsigned long pages_to_map, + unsigned long *insert_addr, + u32 *length_with_pending, + u32 *seq, + struct tcp_zerocopy_receive *zc) +{ + unsigned long pages_remaining = pages_to_map; + int bytes_mapped; + int ret; + + ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining); + bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining); + /* Even if vm_insert_pages fails, it may have partially succeeded in + * mapping (some but not all of the pages). + */ + *seq += bytes_mapped; + *insert_addr += bytes_mapped; + if (ret) { + /* But if vm_insert_pages did fail, we have to unroll some state + * we speculatively touched before. + */ + const int bytes_not_mapped = PAGE_SIZE * pages_remaining; + *length_with_pending -= bytes_not_mapped; + zc->recv_skip_hint += bytes_not_mapped; + } + return ret; +} + static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc) { unsigned long address = (unsigned long)zc->address; u32 length = 0, seq, offset, zap_len; + #define PAGE_BATCH_SIZE 8 + struct page *pages[PAGE_BATCH_SIZE]; const skb_frag_t *frags = NULL; struct vm_area_struct *vma; struct sk_buff *skb = NULL; + unsigned long pg_idx = 0; + unsigned long curr_addr; struct tcp_sock *tp; int inq; int ret; @@ -1754,6 +1788,8 @@ static int tcp_zerocopy_receive(struct sock *sk, sock_rps_record_flow(sk); + tp = tcp_sk(sk); + down_read(¤t->mm->mmap_sem); ret = -EINVAL; @@ -1762,7 +1798,6 @@ static int tcp_zerocopy_receive(struct sock *sk, goto out; zc->length = min_t(unsigned long, zc->length, vma->vm_end - address); - tp = tcp_sk(sk); seq = tp->copied_seq; inq = tcp_inq(sk); zc->length = min_t(u32, zc->length, inq); @@ -1774,8 +1809,20 @@ static int tcp_zerocopy_receive(struct sock *sk, zc->recv_skip_hint = zc->length; } ret = 0; + curr_addr = address; while (length + PAGE_SIZE <= zc->length) { if (zc->recv_skip_hint < PAGE_SIZE) { + /* If we're here, finish the current batch. */ + if (pg_idx) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, + pg_idx, + &curr_addr, + &length, + &seq, zc); + if (ret) + goto out; + pg_idx = 0; + } if (skb) { if (zc->recv_skip_hint > 0) break; @@ -1784,7 +1831,6 @@ static int tcp_zerocopy_receive(struct sock *sk, } else { skb = tcp_recv_skb(sk, seq, &offset); } - zc->recv_skip_hint = skb->len - offset; offset -= skb_headlen(skb); if ((int)offset < 0 || skb_has_frag_list(skb)) @@ -1808,14 +1854,24 @@ static int tcp_zerocopy_receive(struct sock *sk, zc->recv_skip_hint -= remaining; break; } - ret = vm_insert_page(vma, address + length, - skb_frag_page(frags)); - if (ret) - break; + pages[pg_idx] = skb_frag_page(frags); + pg_idx++; length += PAGE_SIZE; - seq += PAGE_SIZE; zc->recv_skip_hint -= PAGE_SIZE; frags++; + if (pg_idx == PAGE_BATCH_SIZE) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, + &curr_addr, &length, + &seq, zc); + if (ret) + goto out; + pg_idx = 0; + } + } + if (pg_idx) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, + &curr_addr, &length, &seq, + zc); } out: up_read(¤t->mm->mmap_sem); diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index 89ca6fe257cc..efb6c3f5f2a9 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -20,7 +20,7 @@ static inline const char *kallsyms_lookup(unsigned long addr, #include <execinfo.h> #include <stdlib.h> -static inline void print_ip_sym(unsigned long ip) +static inline void print_ip_sym(const char *loglvl, unsigned long ip) { char **name; diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index 6a970b127c9b..ef6417b8067b 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -39,10 +39,11 @@ ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002" ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001" ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003" ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001" +ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int" test_modprobe() { - if [ ! -d $DIR ]; then + if [ ! -d $SYSCTL ]; then echo "$0: $DIR not present" >&2 echo "You must have the following enabled in your kernel:" >&2 cat $TEST_DIR/config >&2 @@ -122,9 +123,9 @@ test_reqs() function load_req_mod() { - if [ ! -d $DIR ]; then + if [ ! -d $DIR -a ! -d $SYSCTL ]; then if ! modprobe -q -n $TEST_DRIVER; then - echo "$0: module $TEST_DRIVER not found [SKIP]" + echo "$0: module $TEST_DRIVER not found and not built-in [SKIP]" exit $ksft_skip fi modprobe $TEST_DRIVER @@ -752,6 +753,40 @@ sysctl_test_0006() run_bitmaptest } +sysctl_test_0007() +{ + TARGET="${SYSCTL}/boot_int" + if [ -d $DIR ]; then + echo "Boot param test only possible sysctl_test is built-in, not module:" + cat $TEST_DIR/config >&2 + return 0 + fi + + echo -n "Testing if $TARGET is set to 1 ..." + ORIG=$(cat "${TARGET}") + + if [ x$ORIG = "x1" ]; then + echo "ok" + return 0 + fi + echo "FAIL" + echo "Checking if /proc/cmdline contains setting of the expected parameter ..." + if [ ! -f /proc/cmdline ]; then + echo "/proc/cmdline does not exist, test inconclusive" + return 0 + fi + + FOUND=$(grep -c "sysctl[./]debug[./]test_sysctl[./]boot_int=1" /proc/cmdline) + if [ $FOUND = "1" ]; then + echo "Kernel param found but $TARGET is not 1, TEST FAILED" + rc=1 + test_rc + fi + + echo "Skipping test, expected kernel parameter missing." + echo "To perform this test, make sure kernel is booted with parameter: sysctl.debug.test_sysctl.boot_int=1" +} + list_tests() { echo "Test ID list:" @@ -766,6 +801,7 @@ list_tests() echo "0004 x $(get_test_count 0004) - tests proc_douintvec()" echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array" echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()" + echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param" } usage() |