diff options
32 files changed, 442 insertions, 270 deletions
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index 4ba4664ce5c3..9cb9138f7a79 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -71,35 +71,50 @@ Look at the current lock statistics: # less /proc/lock_stat -01 lock_stat version 0.2 +01 lock_stat version 0.3 02 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total 04 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 05 -06 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 -07 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 -08 -------------------------- -09 &inode->i_data.tree_lock 0 [<ffffffff8027c08f>] add_to_page_cache+0x5f/0x190 -10 -11 ............................................................................................................................................................................................... -12 -13 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 -14 ----------- -15 dcache_lock 180 [<ffffffff802c0d7e>] sys_getcwd+0x11e/0x230 -16 dcache_lock 165 [<ffffffff802c002a>] d_alloc+0x15a/0x210 -17 dcache_lock 33 [<ffffffff8035818d>] _atomic_dec_and_lock+0x4d/0x70 -18 dcache_lock 1 [<ffffffff802beef8>] shrink_dcache_parent+0x18/0x130 +06 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34 +07 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88 +08 --------------- +09 &mm->mmap_sem 487 [<ffffffff8053491f>] do_page_fault+0x466/0x928 +10 &mm->mmap_sem 179 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d +11 &mm->mmap_sem 279 [<ffffffff80210a57>] sys_mmap+0x75/0xce +12 &mm->mmap_sem 76 [<ffffffff802a490b>] sys_munmap+0x32/0x59 +13 --------------- +14 &mm->mmap_sem 270 [<ffffffff80210a57>] sys_mmap+0x75/0xce +15 &mm->mmap_sem 431 [<ffffffff8053491f>] do_page_fault+0x466/0x928 +16 &mm->mmap_sem 138 [<ffffffff802a490b>] sys_munmap+0x32/0x59 +17 &mm->mmap_sem 145 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d +18 +19 ............................................................................................................................................................................................... +20 +21 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41 +22 ----------- +23 dcache_lock 179 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54 +24 dcache_lock 113 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb +25 dcache_lock 99 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44 +26 dcache_lock 104 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a +27 ----------- +28 dcache_lock 192 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54 +29 dcache_lock 98 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44 +30 dcache_lock 72 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb +31 dcache_lock 112 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a This excerpt shows the first two lock class statistics. Line 01 shows the output version - each time the format changes this will be updated. Line 02-04 -show the header with column descriptions. Lines 05-10 and 13-18 show the actual +show the header with column descriptions. Lines 05-18 and 20-31 show the actual statistics. These statistics come in two parts; the actual stats separated by a -short separator (line 08, 14) from the contention points. +short separator (line 08, 13) from the contention points. -The first lock (05-10) is a read/write lock, and shows two lines above the +The first lock (05-18) is a read/write lock, and shows two lines above the short separator. The contention points don't match the column descriptors, -they have two: contentions and [<IP>] symbol. +they have two: contentions and [<IP>] symbol. The second set of contention +points are the points we're contending with. +The integer part of the time values is in us. View the top contending locks: diff --git a/arch/um/include/asm/system.h b/arch/um/include/asm/system.h index 753346e2cdfd..ae5f94d6317d 100644 --- a/arch/um/include/asm/system.h +++ b/arch/um/include/asm/system.h @@ -11,21 +11,21 @@ extern int get_signals(void); extern void block_signals(void); extern void unblock_signals(void); -#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ +#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \ (flags) = get_signals(); } while(0) -#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ +#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \ set_signals(flags); } while(0) -#define local_irq_save(flags) do { local_save_flags(flags); \ - local_irq_disable(); } while(0) +#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \ + raw_local_irq_disable(); } while(0) -#define local_irq_enable() unblock_signals() -#define local_irq_disable() block_signals() +#define raw_local_irq_enable() unblock_signals() +#define raw_local_irq_disable() block_signals() #define irqs_disabled() \ ({ \ unsigned long flags; \ - local_save_flags(flags); \ + raw_local_save_flags(flags); \ (flags == 0); \ }) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ece72053ba63..313b3d692595 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -134,6 +134,11 @@ do { \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ + case 8: \ + asm(op "q %1,"__percpu_seg"%0" \ + : "+m" (var) \ + : "ri" ((T__)val)); \ + break; \ default: __bad_percpu_size(); \ } \ } while (0) @@ -157,16 +162,84 @@ do { \ : "=r" (ret__) \ : "m" (var)); \ break; \ + case 8: \ + asm(op "q "__percpu_seg"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ default: __bad_percpu_size(); \ } \ ret__; \ }) -#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) -#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) -#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) -#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) -#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_addr_op(op, var) \ +({ \ + switch (sizeof(var)) { \ + case 1: \ + asm(op "b "__percpu_seg"%0" \ + : : "m"(var)); \ + break; \ + case 2: \ + asm(op "w "__percpu_seg"%0" \ + : : "m"(var)); \ + break; \ + case 4: \ + asm(op "l "__percpu_seg"%0" \ + : : "m"(var)); \ + break; \ + case 8: \ + asm(op "q "__percpu_seg"%0" \ + : : "m"(var)); \ + break; \ + default: __bad_percpu_size(); \ + } \ +}) + +#define percpu_cmpxchg_op(var, old, new) \ +({ \ + typeof(var) prev; \ + switch (sizeof(var)) { \ + case 1: \ + asm("cmpxchgb %b1, "__percpu_seg"%2" \ + : "=a"(prev) \ + : "q"(new), "m"(var), "0"(old) \ + : "memory"); \ + break; \ + case 2: \ + asm("cmpxchgw %w1, "__percpu_seg"%2" \ + : "=a"(prev) \ + : "r"(new), "m"(var), "0"(old) \ + : "memory"); \ + break; \ + case 4: \ + asm("cmpxchgl %k1, "__percpu_seg"%2" \ + : "=a"(prev) \ + : "r"(new), "m"(var), "0"(old) \ + : "memory"); \ + break; \ + case 8: \ + asm("cmpxchgq %1, "__percpu_seg"%2" \ + : "=a"(prev) \ + : "r"(new), "m"(var), "0"(old) \ + : "memory"); \ + break; \ + default: \ + __bad_percpu_size(); \ + } \ + return prev; \ +}) + +#define x86_read_percpu(var) percpu_from_op("mov", per_cpu_var(var)) +#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu_var(var), val) +#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu_var(var), val) +#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu_var(var), val) +#define x86_inc_percpu(var) percpu_addr_op("inc", per_cpu_var(var)) +#define x86_dec_percpu(var) percpu_addr_op("dec", per_cpu_var(var)) +#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu_var(var), val) +#define x86_xchg_percpu(var, val) percpu_to_op("xchg", per_cpu_var(var), val) +#define x86_cmpxchg_percpu(var, old, new) \ + percpu_cmpxchg_op(per_cpu_var(var), old, new) + #endif /* !__ASSEMBLY__ */ #endif /* !CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 35c54921b2e4..99192bb55a53 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -157,6 +157,7 @@ extern int __get_user_bad(void); int __ret_gu; \ unsigned long __val_gu; \ __chk_user_ptr(ptr); \ + might_fault(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_x(1, __ret_gu, __val_gu, ptr); \ @@ -241,6 +242,7 @@ extern void __put_user_8(void); int __ret_pu; \ __typeof__(*(ptr)) __pu_val; \ __chk_user_ptr(ptr); \ + might_fault(); \ __pu_val = x; \ switch (sizeof(*(ptr))) { \ case 1: \ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index d095a3aeea1b..5e06259e90e5 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { - might_sleep(); - return __copy_to_user_inatomic(to, from, n); + might_fault(); + return __copy_to_user_inatomic(to, from, n); } static __always_inline unsigned long @@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { - might_sleep(); + might_fault(); if (__builtin_constant_p(n)) { unsigned long ret; @@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user_nocache(void *to, const void __user *from, unsigned long n) { - might_sleep(); + might_fault(); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f8cfd00db450..84210c479fca 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -29,6 +29,8 @@ static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { int ret = 0; + + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -71,6 +73,8 @@ static __always_inline __must_check int __copy_to_user(void __user *dst, const void *src, unsigned size) { int ret = 0; + + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { @@ -113,6 +117,8 @@ static __always_inline __must_check int __copy_in_user(void __user *dst, const void __user *src, unsigned size) { int ret = 0; + + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, (__force void *)src, size); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 9e68075544f6..4a20b2f9a381 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon #define __do_strncpy_from_user(dst, src, count, res) \ do { \ int __d0, __d1, __d2; \ - might_sleep(); \ + might_fault(); \ __asm__ __volatile__( \ " testl %1,%1\n" \ " jz 2f\n" \ @@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user); #define __do_clear_user(addr,size) \ do { \ int __d0; \ - might_sleep(); \ + might_fault(); \ __asm__ __volatile__( \ "0: rep; stosl\n" \ " movl %2,%0\n" \ @@ -155,7 +155,7 @@ do { \ unsigned long clear_user(void __user *to, unsigned long n) { - might_sleep(); + might_fault(); if (access_ok(VERIFY_WRITE, to, n)) __do_clear_user(to, n); return n; @@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n) unsigned long mask = -__addr_ok(s); unsigned long res, tmp; - might_sleep(); + might_fault(); __asm__ __volatile__( " testl %0, %0\n" diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index f4df6e7c718b..64d6c84e6353 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -15,7 +15,7 @@ #define __do_strncpy_from_user(dst,src,count,res) \ do { \ long __d0, __d1, __d2; \ - might_sleep(); \ + might_fault(); \ __asm__ __volatile__( \ " testq %1,%1\n" \ " jz 2f\n" \ @@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user); unsigned long __clear_user(void __user *addr, unsigned long size) { long __d0; - might_sleep(); + might_fault(); /* no memory constraint because it doesn't change any memory gcc knows about */ asm volatile( diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b0e63c672ebd..1c02250353ee 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -45,7 +45,12 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; * Only S390 provides its own means of moving the pointer. */ #ifndef SHIFT_PERCPU_PTR -#define SHIFT_PERCPU_PTR(__p, __offset) RELOC_HIDE((__p), (__offset)) +# ifdef CONFIG_HAVE_ZERO_BASED_PER_CPU +# define SHIFT_PERCPU_PTR(__p, __offset) \ + ((__typeof(__p))(((void *)(__p)) + (__offset))) +# else +# define SHIFT_PERCPU_PTR(__p, __offset) RELOC_HIDE((__p), (__offset)) +# endif /* CONFIG_HAVE_ZERO_BASED_PER_CPU */ #endif /* @@ -70,6 +75,10 @@ extern void setup_per_cpu_areas(void); #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var))) #define __get_cpu_var(var) per_cpu_var(var) #define __raw_get_cpu_var(var) per_cpu_var(var) +#ifndef SHIFT_PERCPU_PTR +# define SHIFT_PERCPU_PTR(__p, __offset) (__p) +#endif +#define per_cpu_offset(x) 0L #endif /* SMP */ diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 79a7ff925bf8..51c15f3a14a5 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -9,7 +9,17 @@ extern char __bss_start[], __bss_stop[]; extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; extern char _end[]; +#ifdef CONFIG_HAVE_ZERO_BASED_PER_CPU +extern char __per_cpu_load[]; +extern char ____per_cpu_size[]; +#define __per_cpu_size ((unsigned long)&____per_cpu_size) +#define __per_cpu_start ((char *)0) +#define __per_cpu_end ((char *)__per_cpu_size) +#else extern char __per_cpu_start[], __per_cpu_end[]; +#define __per_cpu_load __per_cpu_start +#define __per_cpu_size (__per_cpu_end - __per_cpu_start) +#endif extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __start_rodata[], __end_rodata[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 80744606bad1..63709a5ec3c0 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -401,6 +401,21 @@ *(.initcall7.init) \ *(.initcall7s.init) +#ifdef CONFIG_HAVE_ZERO_BASED_PER_CPU +#define PERCPU(align) \ + . = ALIGN(align); \ + percpu : { } :percpu \ + __per_cpu_load = .; \ + .data.percpu 0 : AT(__per_cpu_load - LOAD_OFFSET) { \ + *(.data.percpu.first) \ + *(.data.percpu.shared_aligned) \ + *(.data.percpu) \ + *(.data.percpu.page_aligned) \ + ____per_cpu_size = .; \ + } \ + . = __per_cpu_load + ____per_cpu_size; \ + data : { } :data +#else #define PERCPU(align) \ . = ALIGN(align); \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ @@ -410,3 +425,4 @@ *(.data.percpu.shared_aligned) \ } \ VMLINUX_SYMBOL(__per_cpu_end) = .; +#endif diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 4aaa4afb1cb9..096476f1fb35 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -17,7 +17,7 @@ extern int debug_locks_off(void); ({ \ int __ret = 0; \ \ - if (unlikely(c)) { \ + if (!oops_in_progress && unlikely(c)) { \ if (debug_locks_off() && !debug_locks_silent) \ WARN_ON(1); \ __ret = 1; \ diff --git a/include/linux/futex.h b/include/linux/futex.h index 586ab56a3ec3..8f627b9ae2b1 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -164,6 +164,8 @@ union futex_key { } both; }; +#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } + #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); extern void exit_pi_state_list(struct task_struct *curr); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index dc7e0d0a6474..269df5a17b30 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -141,6 +141,15 @@ extern int _cond_resched(void); (__x < 0) ? -__x : __x; \ }) +#ifdef CONFIG_PROVE_LOCKING +void might_fault(void); +#else +static inline void might_fault(void) +{ + might_sleep(); +} +#endif + extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(long time); NORET_TYPE void panic(const char * fmt, ...) @@ -188,6 +197,8 @@ extern unsigned long long memparse(const char *ptr, char **retptr); extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); +extern int func_ptr_is_kernel_text(void *ptr); + struct pid; extern struct pid *session_of_pgrp(struct pid *pgrp); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 29aec6e10020..8956daf64abd 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -73,6 +73,8 @@ struct lock_class_key { struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; }; +#define LOCKSTAT_POINTS 4 + /* * The lock-class itself: */ @@ -119,7 +121,8 @@ struct lock_class { int name_version; #ifdef CONFIG_LOCK_STAT - unsigned long contention_point[4]; + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; #endif }; @@ -144,6 +147,7 @@ enum bounce_type { struct lock_class_stats { unsigned long contention_point[4]; + unsigned long contending_point[4]; struct lock_time read_waittime; struct lock_time write_waittime; struct lock_time read_holdtime; @@ -165,6 +169,7 @@ struct lockdep_map { const char *name; #ifdef CONFIG_LOCK_STAT int cpu; + unsigned long ip; #endif }; @@ -356,7 +361,7 @@ struct lock_class_key { }; #ifdef CONFIG_LOCK_STAT extern void lock_contended(struct lockdep_map *lock, unsigned long ip); -extern void lock_acquired(struct lockdep_map *lock); +extern void lock_acquired(struct lockdep_map *lock, unsigned long ip); #define LOCK_CONTENDED(_lock, try, lock) \ do { \ @@ -364,13 +369,13 @@ do { \ lock_contended(&(_lock)->dep_map, _RET_IP_); \ lock(_lock); \ } \ - lock_acquired(&(_lock)->dep_map); \ + lock_acquired(&(_lock)->dep_map, _RET_IP_); \ } while (0) #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) -#define lock_acquired(lockdep_map) do {} while (0) +#define lock_acquired(lockdep_map, ip) do {} while (0) #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) @@ -481,4 +486,22 @@ static inline void print_irqtrace_events(struct task_struct *curr) # define lock_map_release(l) do { } while (0) #endif +#ifdef CONFIG_PROVE_LOCKING +# define might_lock(lock) \ +do { \ + typecheck(struct lockdep_map *, &(lock)->dep_map); \ + lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \ + lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ +} while (0) +# define might_lock_read(lock) \ +do { \ + typecheck(struct lockdep_map *, &(lock)->dep_map); \ + lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \ + lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ +} while (0) +#else +# define might_lock(lock) do { } while (0) +# define might_lock_read(lock) do { } while (0) +#endif + #endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bc6da10ceee0..7a0e5c4f8072 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -144,6 +144,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); /* * NOTE: mutex_trylock() follows the spin_trylock() convention, * not the down_trylock() convention! + * + * Returns 1 if the mutex has been acquired successfully, and 0 on contention. */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 9f2a3751873a..e1f87085d39a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -27,7 +27,18 @@ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ __attribute__((__section__(".data.percpu.page_aligned"))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#ifdef CONFIG_HAVE_ZERO_BASED_PER_CPU +#define DEFINE_PER_CPU_FIRST(type, name) \ + __attribute__((__section__(".data.percpu.first"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #else +#define DEFINE_PER_CPU_FIRST(type, name) \ + DEFINE_PER_CPU(type, name) +#endif + +#else /* !CONFIG_SMP */ + #define DEFINE_PER_CPU(type, name) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name @@ -36,7 +47,11 @@ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ DEFINE_PER_CPU(type, name) -#endif + +#define DEFINE_PER_CPU_FIRST(type, name) \ + DEFINE_PER_CPU(type, name) + +#endif /* !CONFIG_SMP */ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index fec6decfb983..6b58367d145e 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, \ set_fs(KERNEL_DS); \ pagefault_disable(); \ - ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \ + ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \ pagefault_enable(); \ set_fs(old_fs); \ ret; \ diff --git a/kernel/exit.c b/kernel/exit.c index 2d8be7ebb0f7..30fcdf16737a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1321,10 +1321,10 @@ static int wait_task_zombie(struct task_struct *p, int options, * group, which consolidates times for all threads in the * group including the group leader. */ + thread_group_cputime(p, &cputime); spin_lock_irq(&p->parent->sighand->siglock); psig = p->parent->signal; sig = p->signal; - thread_group_cputime(p, &cputime); psig->cutime = cputime_add(psig->cutime, cputime_add(cputime.utime, diff --git a/kernel/extable.c b/kernel/extable.c index a26cb2e17023..adf0cc9c02d6 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -66,3 +66,19 @@ int kernel_text_address(unsigned long addr) return 1; return module_text_address(addr) != NULL; } + +/* + * On some architectures (PPC64, IA64) function pointers + * are actually only tokens to some data that then holds the + * real function address. As a result, to find if a function + * pointer is part of the kernel text, we need to do some + * special dereferencing first. + */ +int func_ptr_is_kernel_text(void *ptr) +{ + unsigned long addr; + addr = (unsigned long) dereference_function_descriptor(ptr); + if (core_kernel_text(addr)) + return 1; + return module_text_address(addr) != NULL; +} diff --git a/kernel/futex.c b/kernel/futex.c index 8af10027514b..e10c5c8786a6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -123,24 +123,6 @@ struct futex_hash_bucket { static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; /* - * Take mm->mmap_sem, when futex is shared - */ -static inline void futex_lock_mm(struct rw_semaphore *fshared) -{ - if (fshared) - down_read(fshared); -} - -/* - * Release mm->mmap_sem, when the futex is shared - */ -static inline void futex_unlock_mm(struct rw_semaphore *fshared) -{ - if (fshared) - up_read(fshared); -} - -/* * We hash on the keys returned from get_futex_key (see below). */ static struct futex_hash_bucket *hash_futex(union futex_key *key) @@ -161,6 +143,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2) && key1->both.offset == key2->both.offset); } +/* + * Take a reference to the resource addressed by a key. + * Can be called while holding spinlocks. + * + */ +static void get_futex_key_refs(union futex_key *key) +{ + if (!key->both.ptr) + return; + + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { + case FUT_OFF_INODE: + atomic_inc(&key->shared.inode->i_count); + break; + case FUT_OFF_MMSHARED: + atomic_inc(&key->private.mm->mm_count); + break; + } +} + +/* + * Drop a reference to the resource addressed by a key. + * The hash bucket spinlock must not be held. + */ +static void drop_futex_key_refs(union futex_key *key) +{ + if (!key->both.ptr) + return; + + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { + case FUT_OFF_INODE: + iput(key->shared.inode); + break; + case FUT_OFF_MMSHARED: + mmdrop(key->private.mm); + break; + } +} + /** * get_futex_key - Get parameters which are the keys for a futex. * @uaddr: virtual address of the futex @@ -179,12 +200,10 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2) * For other futexes, it points to ¤t->mm->mmap_sem and * caller must have taken the reader lock. but NOT any spinlocks. */ -static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, - union futex_key *key) +static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; struct page *page; int err; @@ -208,100 +227,50 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, return -EFAULT; key->private.mm = mm; key->private.address = address; + get_futex_key_refs(key); return 0; } - /* - * The futex is hashed differently depending on whether - * it's in a shared or private mapping. So check vma first. - */ - vma = find_extend_vma(mm, address); - if (unlikely(!vma)) - return -EFAULT; - /* - * Permissions. - */ - if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) - return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; +again: + err = get_user_pages_fast(address, 1, 0, &page); + if (err < 0) + return err; + + lock_page(page); + if (!page->mapping) { + unlock_page(page); + put_page(page); + goto again; + } /* * Private mappings are handled in a simple way. * * NOTE: When userspace waits on a MAP_SHARED mapping, even if * it's a read-only handle, it's expected that futexes attach to - * the object not the particular process. Therefore we use - * VM_MAYSHARE here, not VM_SHARED which is restricted to shared - * mappings of _writable_ handles. + * the object not the particular process. */ - if (likely(!(vma->vm_flags & VM_MAYSHARE))) { - key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ + if (PageAnon(page)) { + key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; - return 0; + } else { + key->both.offset |= FUT_OFF_INODE; /* inode-based key */ + key->shared.inode = page->mapping->host; + key->shared.pgoff = page->index; } - /* - * Linear file mappings are also simple. - */ - key->shared.inode = vma->vm_file->f_path.dentry->d_inode; - key->both.offset |= FUT_OFF_INODE; /* inode-based key. */ - if (likely(!(vma->vm_flags & VM_NONLINEAR))) { - key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT) - + vma->vm_pgoff); - return 0; - } + get_futex_key_refs(key); - /* - * We could walk the page table to read the non-linear - * pte, and get the page index without fetching the page - * from swap. But that's a lot of code to duplicate here - * for a rare case, so we simply fetch the page. - */ - err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL); - if (err >= 0) { - key->shared.pgoff = - page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - put_page(page); - return 0; - } - return err; -} - -/* - * Take a reference to the resource addressed by a key. - * Can be called while holding spinlocks. - * - */ -static void get_futex_key_refs(union futex_key *key) -{ - if (key->both.ptr == NULL) - return; - switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { - case FUT_OFF_INODE: - atomic_inc(&key->shared.inode->i_count); - break; - case FUT_OFF_MMSHARED: - atomic_inc(&key->private.mm->mm_count); - break; - } + unlock_page(page); + put_page(page); + return 0; } -/* - * Drop a reference to the resource addressed by a key. - * The hash bucket spinlock must not be held. - */ -static void drop_futex_key_refs(union futex_key *key) +static inline +void put_futex_key(int fshared, union futex_key *key) { - if (!key->both.ptr) - return; - switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { - case FUT_OFF_INODE: - iput(key->shared.inode); - break; - case FUT_OFF_MMSHARED: - mmdrop(key->private.mm); - break; - } + drop_futex_key_refs(key); } static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) @@ -328,10 +297,8 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from) /* * Fault handling. - * if fshared is non NULL, current->mm->mmap_sem is already held */ -static int futex_handle_fault(unsigned long address, - struct rw_semaphore *fshared, int attempt) +static int futex_handle_fault(unsigned long address, int attempt) { struct vm_area_struct * vma; struct mm_struct *mm = current->mm; @@ -340,8 +307,7 @@ static int futex_handle_fault(unsigned long address, if (attempt > 2) return ret; - if (!fshared) - down_read(&mm->mmap_sem); + down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (vma && address >= vma->vm_start && (vma->vm_flags & VM_WRITE)) { @@ -361,8 +327,7 @@ static int futex_handle_fault(unsigned long address, current->min_flt++; } } - if (!fshared) - up_read(&mm->mmap_sem); + up_read(&mm->mmap_sem); return ret; } @@ -385,6 +350,7 @@ static int refill_pi_state_cache(void) /* pi_mutex gets initialized later */ pi_state->owner = NULL; atomic_set(&pi_state->refcount, 1); + pi_state->key = FUTEX_KEY_INIT; current->pi_state_cache = pi_state; @@ -462,7 +428,7 @@ void exit_pi_state_list(struct task_struct *curr) struct list_head *next, *head = &curr->pi_state_list; struct futex_pi_state *pi_state; struct futex_hash_bucket *hb; - union futex_key key; + union futex_key key = FUTEX_KEY_INIT; if (!futex_cmpxchg_enabled) return; @@ -719,20 +685,17 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: */ -static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, - int nr_wake, u32 bitset) +static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) { struct futex_hash_bucket *hb; struct futex_q *this, *next; struct plist_head *head; - union futex_key key; + union futex_key key = FUTEX_KEY_INIT; int ret; if (!bitset) return -EINVAL; - futex_lock_mm(fshared); - ret = get_futex_key(uaddr, fshared, &key); if (unlikely(ret != 0)) goto out; @@ -760,7 +723,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, spin_unlock(&hb->lock); out: - futex_unlock_mm(fshared); + put_futex_key(fshared, &key); return ret; } @@ -769,19 +732,16 @@ out: * to this virtual address: */ static int -futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, - u32 __user *uaddr2, +futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, int nr_wake, int nr_wake2, int op) { - union futex_key key1, key2; + union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head; struct futex_q *this, *next; int ret, op_ret, attempt = 0; retryfull: - futex_lock_mm(fshared); - ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; @@ -826,18 +786,12 @@ retry: */ if (attempt++) { ret = futex_handle_fault((unsigned long)uaddr2, - fshared, attempt); + attempt); if (ret) goto out; goto retry; } - /* - * If we would have faulted, release mmap_sem, - * fault it in and start all over again. - */ - futex_unlock_mm(fshared); - ret = get_user(dummy, uaddr2); if (ret) return ret; @@ -873,7 +827,8 @@ retry: if (hb1 != hb2) spin_unlock(&hb2->lock); out: - futex_unlock_mm(fshared); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); return ret; } @@ -882,19 +837,16 @@ out: * Requeue all waiters hashed on one physical page to another * physical page. */ -static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, - u32 __user *uaddr2, +static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, int nr_wake, int nr_requeue, u32 *cmpval) { - union futex_key key1, key2; + union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; int ret, drop_count = 0; retry: - futex_lock_mm(fshared); - ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; @@ -917,12 +869,6 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, if (hb1 != hb2) spin_unlock(&hb2->lock); - /* - * If we would have faulted, release mmap_sem, fault - * it in and start all over again. - */ - futex_unlock_mm(fshared); - ret = get_user(curval, uaddr1); if (!ret) @@ -974,7 +920,8 @@ out_unlock: drop_futex_key_refs(&key1); out: - futex_unlock_mm(fshared); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); return ret; } @@ -1096,8 +1043,7 @@ static void unqueue_me_pi(struct futex_q *q) * private futexes. */ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *newowner, - struct rw_semaphore *fshared) + struct task_struct *newowner, int fshared) { u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; @@ -1176,7 +1122,7 @@ retry: handle_fault: spin_unlock(q->lock_ptr); - ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++); + ret = futex_handle_fault((unsigned long)uaddr, attempt++); spin_lock(q->lock_ptr); @@ -1200,7 +1146,7 @@ handle_fault: static long futex_wait_restart(struct restart_block *restart); -static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, +static int futex_wait(u32 __user *uaddr, int fshared, u32 val, ktime_t *abs_time, u32 bitset) { struct task_struct *curr = current; @@ -1218,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, q.pi_state = NULL; q.bitset = bitset; retry: - futex_lock_mm(fshared); - + q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q.key); if (unlikely(ret != 0)) goto out_release_sem; @@ -1251,12 +1196,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, if (unlikely(ret)) { queue_unlock(&q, hb); - /* - * If we would have faulted, release mmap_sem, fault it in and - * start all over again. - */ - futex_unlock_mm(fshared); - ret = get_user(uval, uaddr); if (!ret) @@ -1271,12 +1210,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, queue_me(&q, hb); /* - * Now the futex is queued and we have checked the data, we - * don't want to hold mmap_sem while we sleep. - */ - futex_unlock_mm(fshared); - - /* * There might have been scheduling since the queue_me(), as we * cannot hold a spinlock across the get_user() in case it * faults, and we cannot just set TASK_INTERRUPTIBLE state when @@ -1363,7 +1296,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, queue_unlock(&q, hb); out_release_sem: - futex_unlock_mm(fshared); + put_futex_key(fshared, &q.key); return ret; } @@ -1371,13 +1304,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, static long futex_wait_restart(struct restart_block *restart) { u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; - struct rw_semaphore *fshared = NULL; + int fshared = 0; ktime_t t; t.tv64 = restart->futex.time; restart->fn = do_no_restart_syscall; if (restart->futex.flags & FLAGS_SHARED) - fshared = ¤t->mm->mmap_sem; + fshared = 1; return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, restart->futex.bitset); } @@ -1389,7 +1322,7 @@ static long futex_wait_restart(struct restart_block *restart) * if there are waiters then it will block, it does PI, etc. (Due to * races the kernel might see a 0 value of the futex too.) */ -static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, +static int futex_lock_pi(u32 __user *uaddr, int fshared, int detect, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to = NULL; @@ -1412,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, q.pi_state = NULL; retry: - futex_lock_mm(fshared); - + q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q.key); if (unlikely(ret != 0)) goto out_release_sem; @@ -1502,7 +1434,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, * exit to complete. */ queue_unlock(&q, hb); - futex_unlock_mm(fshared); cond_resched(); goto retry; @@ -1534,12 +1465,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, */ queue_me(&q, hb); - /* - * Now the futex is queued and we have checked the data, we - * don't want to hold mmap_sem while we sleep. - */ - futex_unlock_mm(fshared); - WARN_ON(!q.pi_state); /* * Block on the PI mutex: @@ -1552,7 +1477,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, ret = ret ? 0 : -EWOULDBLOCK; } - futex_lock_mm(fshared); spin_lock(q.lock_ptr); if (!ret) { @@ -1618,7 +1542,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, /* Unqueue and drop the lock */ unqueue_me_pi(&q); - futex_unlock_mm(fshared); if (to) destroy_hrtimer_on_stack(&to->timer); @@ -1628,7 +1551,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, queue_unlock(&q, hb); out_release_sem: - futex_unlock_mm(fshared); + put_futex_key(fshared, &q.key); if (to) destroy_hrtimer_on_stack(&to->timer); return ret; @@ -1645,15 +1568,12 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, queue_unlock(&q, hb); if (attempt++) { - ret = futex_handle_fault((unsigned long)uaddr, fshared, - attempt); + ret = futex_handle_fault((unsigned long)uaddr, attempt); if (ret) goto out_release_sem; goto retry_unlocked; } - futex_unlock_mm(fshared); - ret = get_user(uval, uaddr); if (!ret && (uval != -EFAULT)) goto retry; @@ -1668,13 +1588,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, * This is the in-kernel slowpath: we look up the PI state (if any), * and do the rt-mutex unlock. */ -static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared) +static int futex_unlock_pi(u32 __user *uaddr, int fshared) { struct futex_hash_bucket *hb; struct futex_q *this, *next; u32 uval; struct plist_head *head; - union futex_key key; + union futex_key key = FUTEX_KEY_INIT; int ret, attempt = 0; retry: @@ -1685,10 +1605,6 @@ retry: */ if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) return -EPERM; - /* - * First take all the futex related locks: - */ - futex_lock_mm(fshared); ret = get_futex_key(uaddr, fshared, &key); if (unlikely(ret != 0)) @@ -1747,7 +1663,7 @@ retry_unlocked: out_unlock: spin_unlock(&hb->lock); out: - futex_unlock_mm(fshared); + put_futex_key(fshared, &key); return ret; @@ -1763,16 +1679,13 @@ pi_faulted: spin_unlock(&hb->lock); if (attempt++) { - ret = futex_handle_fault((unsigned long)uaddr, fshared, - attempt); + ret = futex_handle_fault((unsigned long)uaddr, attempt); if (ret) goto out; uval = 0; goto retry_unlocked; } - futex_unlock_mm(fshared); - ret = get_user(uval, uaddr); if (!ret && (uval != -EFAULT)) goto retry; @@ -1898,8 +1811,7 @@ retry: * PI futexes happens in exit_pi_state(): */ if (!pi && (uval & FUTEX_WAITERS)) - futex_wake(uaddr, &curr->mm->mmap_sem, 1, - FUTEX_BITSET_MATCH_ANY); + futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); } return 0; } @@ -1995,10 +1907,10 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, { int ret = -ENOSYS; int cmd = op & FUTEX_CMD_MASK; - struct rw_semaphore *fshared = NULL; + int fshared = 0; if (!(op & FUTEX_PRIVATE_FLAG)) - fshared = ¤t->mm->mmap_sem; + fshared = 1; switch (cmd) { case FUTEX_WAIT: diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 46a404173db2..4c9f91383394 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) #ifdef CONFIG_LOCK_STAT static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); -static int lock_contention_point(struct lock_class *class, unsigned long ip) +static int lock_point(unsigned long points[], unsigned long ip) { int i; - for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { - if (class->contention_point[i] == 0) { - class->contention_point[i] = ip; + for (i = 0; i < LOCKSTAT_POINTS; i++) { + if (points[i] == 0) { + points[i] = ip; break; } - if (class->contention_point[i] == ip) + if (points[i] == ip) break; } @@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class) for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) stats.contention_point[i] += pcs->contention_point[i]; + for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++) + stats.contending_point[i] += pcs->contending_point[i]; + lock_time_add(&pcs->read_waittime, &stats.read_waittime); lock_time_add(&pcs->write_waittime, &stats.write_waittime); @@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class) memset(cpu_stats, 0, sizeof(struct lock_class_stats)); } memset(class->contention_point, 0, sizeof(class->contention_point)); + memset(class->contending_point, 0, sizeof(class->contending_point)); } static struct lock_class_stats *get_lock_stats(struct lock_class *class) @@ -638,8 +642,8 @@ static int static_obj(void *obj) * percpu var? */ for_each_possible_cpu(i) { - start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); - end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM + start = (unsigned long) __per_cpu_start + per_cpu_offset(i); + end = (unsigned long) __per_cpu_start + PERCPU_ENOUGH_ROOM + per_cpu_offset(i); if ((addr >= start) && (addr < end)) @@ -2999,7 +3003,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) struct held_lock *hlock, *prev_hlock; struct lock_class_stats *stats; unsigned int depth; - int i, point; + int i, contention_point, contending_point; depth = curr->lockdep_depth; if (DEBUG_LOCKS_WARN_ON(!depth)) @@ -3023,18 +3027,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) found_it: hlock->waittime_stamp = sched_clock(); - point = lock_contention_point(hlock_class(hlock), ip); + contention_point = lock_point(hlock_class(hlock)->contention_point, ip); + contending_point = lock_point(hlock_class(hlock)->contending_point, + lock->ip); stats = get_lock_stats(hlock_class(hlock)); - if (point < ARRAY_SIZE(stats->contention_point)) - stats->contention_point[point]++; + if (contention_point < LOCKSTAT_POINTS) + stats->contention_point[contention_point]++; + if (contending_point < LOCKSTAT_POINTS) + stats->contending_point[contending_point]++; if (lock->cpu != smp_processor_id()) stats->bounces[bounce_contended + !!hlock->read]++; put_lock_stats(stats); } static void -__lock_acquired(struct lockdep_map *lock) +__lock_acquired(struct lockdep_map *lock, unsigned long ip) { struct task_struct *curr = current; struct held_lock *hlock, *prev_hlock; @@ -3083,6 +3091,7 @@ found_it: put_lock_stats(stats); lock->cpu = cpu; + lock->ip = ip; } void lock_contended(struct lockdep_map *lock, unsigned long ip) @@ -3104,7 +3113,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) } EXPORT_SYMBOL_GPL(lock_contended); -void lock_acquired(struct lockdep_map *lock) +void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; @@ -3117,7 +3126,7 @@ void lock_acquired(struct lockdep_map *lock) raw_local_irq_save(flags); check_flags(flags); current->lockdep_recursion = 1; - __lock_acquired(lock); + __lock_acquired(lock, ip); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 20dbcbf9c7dd..13716b813896 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -470,11 +470,12 @@ static void seq_line(struct seq_file *m, char c, int offset, int length) static void snprint_time(char *buf, size_t bufsiz, s64 nr) { - unsigned long rem; + s64 div; + s32 rem; nr += 5; /* for display rounding */ - rem = do_div(nr, 1000); /* XXX: do_div_signed */ - snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10); + div = div_s64_rem(nr, 1000, &rem); + snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10); } static void seq_time(struct seq_file *m, s64 time) @@ -556,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) if (stats->read_holdtime.nr) namelen += 2; - for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { + for (i = 0; i < LOCKSTAT_POINTS; i++) { char sym[KSYM_SYMBOL_LEN]; char ip[32]; @@ -573,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) stats->contention_point[i], ip, sym); } + for (i = 0; i < LOCKSTAT_POINTS; i++) { + char sym[KSYM_SYMBOL_LEN]; + char ip[32]; + + if (class->contending_point[i] == 0) + break; + + if (!i) + seq_line(m, '-', 40-namelen, namelen); + + sprint_symbol(sym, class->contending_point[i]); + snprintf(ip, sizeof(ip), "[<%p>]", + (void *)class->contending_point[i]); + seq_printf(m, "%40s %14lu %29s %s\n", name, + stats->contending_point[i], + ip, sym); + } if (i) { seq_puts(m, "\n"); seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); @@ -582,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) static void seq_header(struct seq_file *m) { - seq_printf(m, "lock_stat version 0.2\n"); + seq_printf(m, "lock_stat version 0.3\n"); seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " "%14s %14s\n", diff --git a/kernel/module.c b/kernel/module.c index 1f4cc00e0c20..48c323c97ae6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -47,6 +47,7 @@ #include <linux/rculist.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> +#include <asm/sections.h> #include <linux/license.h> #include <asm/sections.h> #include <linux/tracepoint.h> @@ -416,7 +417,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, align = PAGE_SIZE; } - ptr = __per_cpu_start; + ptr = __per_cpu_load; for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { /* Extra for alignment requirement. */ extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; @@ -451,7 +452,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, static void percpu_modfree(void *freeme) { unsigned int i; - void *ptr = __per_cpu_start + block_size(pcpu_size[0]); + void *ptr = __per_cpu_load + block_size(pcpu_size[0]); /* First entry is core kernel percpu data. */ for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { @@ -502,7 +503,7 @@ static int percpu_modinit(void) pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, GFP_KERNEL); /* Static in-kernel percpu data (used). */ - pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); + pcpu_size[0] = -__per_cpu_size; /* Free room. */ pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; if (pcpu_size[1] < 0) { diff --git a/kernel/mutex.c b/kernel/mutex.c index 12c779dc65d4..39a3816b68d9 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } done: - lock_acquired(&lock->dep_map); + lock_acquired(&lock->dep_map, ip); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, task_thread_info(task)); debug_mutex_set_owner(lock, task_thread_info(task)); diff --git a/kernel/notifier.c b/kernel/notifier.c index 4282c0a40a57..61d5aa5eced3 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -82,6 +82,14 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl, while (nb && nr_to_call) { next_nb = rcu_dereference(nb->next); + +#ifdef CONFIG_DEBUG_NOTIFIERS + if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) { + WARN(1, "Invalid notifier called!"); + nb = next_nb; + continue; + } +#endif ret = nb->notifier_call(nb, val, v); if (nr_calls) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 895337b16a24..3f4377e0aa04 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -58,21 +58,21 @@ void thread_group_cputime( struct task_struct *tsk, struct task_cputime *times) { - struct signal_struct *sig; + struct task_cputime *totals, *tot; int i; - struct task_cputime *tot; - sig = tsk->signal; - if (unlikely(!sig) || !sig->cputime.totals) { + totals = tsk->signal->cputime.totals; + if (!totals) { times->utime = tsk->utime; times->stime = tsk->stime; times->sum_exec_runtime = tsk->se.sum_exec_runtime; return; } + times->stime = times->utime = cputime_zero; times->sum_exec_runtime = 0; for_each_possible_cpu(i) { - tot = per_cpu_ptr(tsk->signal->cputime.totals, i); + tot = per_cpu_ptr(totals, i); times->utime = cputime_add(times->utime, tot->utime); times->stime = cputime_add(times->stime, tot->stime); times->sum_exec_runtime += tot->sum_exec_runtime; diff --git a/kernel/sched.c b/kernel/sched.c index b7480fb5c3dc..775b5f8d293a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4203,7 +4203,6 @@ void account_steal_time(struct task_struct *p, cputime_t steal) if (p == rq->idle) { p->stime = cputime_add(p->stime, steal); - account_group_system_time(p, steal); if (atomic_read(&rq->nr_iowait) > 0) cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else @@ -4339,7 +4338,7 @@ void __kprobes sub_preempt_count(int val) /* * Underflow? */ - if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) + if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) return; /* * Is the spinlock portion underflowing? diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 3953e4aed733..884e6cd2769c 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -164,7 +164,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = 1024; /* * Zero means infinite timeout - no checking done: */ -unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; +unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480; unsigned long __read_mostly sysctl_hung_task_warnings = 10; diff --git a/kernel/sys.c b/kernel/sys.c index 31deba8f7d16..5fc3a0cfb994 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -858,8 +858,8 @@ void do_sys_times(struct tms *tms) struct task_cputime cputime; cputime_t cutime, cstime; - spin_lock_irq(¤t->sighand->siglock); thread_group_cputime(current, &cputime); + spin_lock_irq(¤t->sighand->siglock); cutime = current->signal->cutime; cstime = current->signal->cstime; spin_unlock_irq(¤t->sighand->siglock); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b0f239e443bc..1e3fd3e3436a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -545,6 +545,16 @@ config DEBUG_SG If unsure, say N. +config DEBUG_NOTIFIERS + bool "Debug notifier call chains" + depends on DEBUG_KERNEL + help + Enable this to turn on sanity checking for notifier call chains. + This is most useful for kernel developers to make sure that + modules properly unregister themselves from notifier chains. + This is a relatively cheap check but if you care about maximum + performance, say N. + config FRAME_POINTER bool "Compile the kernel with frame pointers" depends on DEBUG_KERNEL && \ diff --git a/mm/memory.c b/mm/memory.c index 164951c47305..fc031d68327e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3049,3 +3049,18 @@ void print_vma_addr(char *prefix, unsigned long ip) } up_read(¤t->mm->mmap_sem); } + +#ifdef CONFIG_PROVE_LOCKING +void might_fault(void) +{ + might_sleep(); + /* + * it would be nicer only to annotate paths which are not under + * pagefault_disable, however that requires a larger audit and + * providing helpers like get_user_atomic. + */ + if (!in_atomic() && current->mm) + might_lock_read(¤t->mm->mmap_sem); +} +EXPORT_SYMBOL(might_fault); +#endif |