diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-02-04 17:26:31 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-02-04 17:26:31 +1100 |
commit | c6e0278d647b3b27bdbea4bc01f8946b104b6277 (patch) | |
tree | 06f15abc4462c9fd4bc48da0404e73791bb60bfc /arch/x86 | |
parent | 2208cd4bf0d9d6444f871c1c82a5df55b986e399 (diff) | |
parent | 774c21db21936e8ba41d66917ffa8f0e131bf122 (diff) |
Merge remote branch 'tip/auto-latest'
Conflicts:
kernel/kgdb.c
Diffstat (limited to 'arch/x86')
64 files changed, 2099 insertions, 1543 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index f20ddf84a893..a19829374e6a 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -319,7 +319,7 @@ config X86_L1_CACHE_SHIFT config X86_XADD def_bool y - depends on X86_32 && !M386 + depends on X86_64 || !M386 config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index f9f472462753..9046e4af66ce 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -297,7 +297,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) * size limits imposed on them by creating programs with large * arrays in the data or bss. */ - rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + rlim = rlimit(RLIMIT_DATA); if (rlim >= RLIM_INFINITY) rlim = ~0; if (ex.a_data + ex.a_bss > rlim) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 69b74a7b877f..3b5b828767b6 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -125,11 +125,16 @@ static inline void alternatives_smp_switch(int smp) {} asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ : output : "i" (0), ## input) +/* Like alternative_io, but for replacing a direct call with another one. */ +#define alternative_call(oldfunc, newfunc, feature, output, input...) \ + asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \ + : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) + /* * use this macro(s) if you need more than one output parameter * in alternative_io */ -#define ASM_OUTPUT2(a, b) a, b +#define ASM_OUTPUT2(a...) a struct paravirt_patch_site; #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 4e1b8873c474..8baaa719fa7f 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -1,5 +1,300 @@ +#ifndef _ASM_X86_ATOMIC_H +#define _ASM_X86_ATOMIC_H + +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/processor.h> +#include <asm/alternative.h> +#include <asm/cmpxchg.h> + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +#define ATOMIC_INIT(i) { (i) } + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +static inline int atomic_read(const atomic_t *v) +{ + return v->counter; +} + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic_set(atomic_t *v, int i) +{ + v->counter = i; +} + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ +static inline void atomic_add(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "addl %1,%0" + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_sub - subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic_sub(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "subl %1,%0" + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_sub_and_test(int i, atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "incl %0" + : "+m" (v->counter)); +} + +/** + * atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic_dec(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "decl %0" + : "+m" (v->counter)); +} + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic_dec_and_test(atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "decl %0; sete %1" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_inc_and_test(atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incl %0; sete %1" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int atomic_add_negative(int i, atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * atomic_add_return - add integer and return + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline int atomic_add_return(int i, atomic_t *v) +{ + int __i; +#ifdef CONFIG_M386 + unsigned long flags; + if (unlikely(boot_cpu_data.x86 <= 3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + __i = i; + asm volatile(LOCK_PREFIX "xaddl %0, %1" + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + local_irq_save(flags); + __i = atomic_read(v); + atomic_set(v, i + __i); + local_irq_restore(flags); + return i + __i; +#endif +} + +/** + * atomic_sub_return - subtract integer and return + * @v: pointer of type atomic_t + * @i: integer value to subtract + * + * Atomically subtracts @i from @v and returns @v - @i + */ +static inline int atomic_sub_return(int i, atomic_t *v) +{ + return atomic_add_return(-i, v); +} + +#define atomic_inc_return(v) (atomic_add_return(1, v)) +#define atomic_dec_return(v) (atomic_sub_return(1, v)) + +static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + return cmpxchg(&v->counter, old, new); +} + +static inline int atomic_xchg(atomic_t *v, int new) +{ + return xchg(&v->counter, new); +} + +/** + * atomic_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static inline int atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c != (u); +} + +#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) + +/** + * atomic_inc_short - increment of a short integer + * @v: pointer to type int + * + * Atomically adds 1 to @v + * Returns the new value of @u + */ +static inline short int atomic_inc_short(short int *v) +{ + asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); + return *v; +} + +#ifdef CONFIG_X86_64 +/** + * atomic_or_long - OR of two long integers + * @v1: pointer to type unsigned long + * @v2: pointer to type unsigned long + * + * Atomically ORs @v1 and @v2 + * Returns the result of the OR + */ +static inline void atomic_or_long(unsigned long *v1, unsigned long v2) +{ + asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2)); +} +#endif + +/* These are x86-specific, used by some header files */ +#define atomic_clear_mask(mask, addr) \ + asm volatile(LOCK_PREFIX "andl %0,%1" \ + : : "r" (~(mask)), "m" (*(addr)) : "memory") + +#define atomic_set_mask(mask, addr) \ + asm volatile(LOCK_PREFIX "orl %0,%1" \ + : : "r" ((unsigned)(mask)), "m" (*(addr)) \ + : "memory") + +/* Atomic operations are already serializing on x86 */ +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + #ifdef CONFIG_X86_32 -# include "atomic_32.h" +# include "atomic64_32.h" #else -# include "atomic_64.h" +# include "atomic64_64.h" #endif + +#include <asm-generic/atomic-long.h> +#endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h new file mode 100644 index 000000000000..03027bf28de5 --- /dev/null +++ b/arch/x86/include/asm/atomic64_32.h @@ -0,0 +1,160 @@ +#ifndef _ASM_X86_ATOMIC64_32_H +#define _ASM_X86_ATOMIC64_32_H + +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/processor.h> +//#include <asm/cmpxchg.h> + +/* An 64bit atomic type */ + +typedef struct { + u64 __aligned(8) counter; +} atomic64_t; + +#define ATOMIC64_INIT(val) { (val) } + +extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val); + +/** + * atomic64_xchg - xchg atomic64 variable + * @ptr: pointer to type atomic64_t + * @new_val: value to assign + * + * Atomically xchgs the value of @ptr to @new_val and returns + * the old value. + */ +extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val); + +/** + * atomic64_set - set atomic64 variable + * @ptr: pointer to type atomic64_t + * @new_val: value to assign + * + * Atomically sets the value of @ptr to @new_val. + */ +extern void atomic64_set(atomic64_t *ptr, u64 new_val); + +/** + * atomic64_read - read atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically reads the value of @ptr and returns it. + */ +static inline u64 atomic64_read(atomic64_t *ptr) +{ + u64 res; + + /* + * Note, we inline this atomic64_t primitive because + * it only clobbers EAX/EDX and leaves the others + * untouched. We also (somewhat subtly) rely on the + * fact that cmpxchg8b returns the current 64-bit value + * of the memory location we are touching: + */ + asm volatile( + "mov %%ebx, %%eax\n\t" + "mov %%ecx, %%edx\n\t" + LOCK_PREFIX "cmpxchg8b %1\n" + : "=&A" (res) + : "m" (*ptr) + ); + + return res; +} + +extern u64 atomic64_read(atomic64_t *ptr); + +/** + * atomic64_add_return - add and return + * @delta: integer value to add + * @ptr: pointer to type atomic64_t + * + * Atomically adds @delta to @ptr and returns @delta + *@ptr + */ +extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr); + +/* + * Other variants with different arithmetic operators: + */ +extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr); +extern u64 atomic64_inc_return(atomic64_t *ptr); +extern u64 atomic64_dec_return(atomic64_t *ptr); + +/** + * atomic64_add - add integer to atomic64 variable + * @delta: integer value to add + * @ptr: pointer to type atomic64_t + * + * Atomically adds @delta to @ptr. + */ +extern void atomic64_add(u64 delta, atomic64_t *ptr); + +/** + * atomic64_sub - subtract the atomic64 variable + * @delta: integer value to subtract + * @ptr: pointer to type atomic64_t + * + * Atomically subtracts @delta from @ptr. + */ +extern void atomic64_sub(u64 delta, atomic64_t *ptr); + +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @delta: integer value to subtract + * @ptr: pointer to type atomic64_t + * + * Atomically subtracts @delta from @ptr and returns + * true if the result is zero, or false for all + * other cases. + */ +extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr); + +/** + * atomic64_inc - increment atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically increments @ptr by 1. + */ +extern void atomic64_inc(atomic64_t *ptr); + +/** + * atomic64_dec - decrement atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically decrements @ptr by 1. + */ +extern void atomic64_dec(atomic64_t *ptr); + +/** + * atomic64_dec_and_test - decrement and test + * @ptr: pointer to type atomic64_t + * + * Atomically decrements @ptr by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +extern int atomic64_dec_and_test(atomic64_t *ptr); + +/** + * atomic64_inc_and_test - increment and test + * @ptr: pointer to type atomic64_t + * + * Atomically increments @ptr by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +extern int atomic64_inc_and_test(atomic64_t *ptr); + +/** + * atomic64_add_negative - add and test if negative + * @delta: integer value to add + * @ptr: pointer to type atomic64_t + * + * Atomically adds @delta to @ptr and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +extern int atomic64_add_negative(u64 delta, atomic64_t *ptr); + +#endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h new file mode 100644 index 000000000000..51c5b4056929 --- /dev/null +++ b/arch/x86/include/asm/atomic64_64.h @@ -0,0 +1,224 @@ +#ifndef _ASM_X86_ATOMIC64_64_H +#define _ASM_X86_ATOMIC64_64_H + +#include <linux/types.h> +#include <asm/alternative.h> +#include <asm/cmpxchg.h> + +/* The 64-bit atomic type */ + +#define ATOMIC64_INIT(i) { (i) } + +/** + * atomic64_read - read atomic64 variable + * @v: pointer of type atomic64_t + * + * Atomically reads the value of @v. + * Doesn't imply a read memory barrier. + */ +static inline long atomic64_read(const atomic64_t *v) +{ + return v->counter; +} + +/** + * atomic64_set - set atomic64 variable + * @v: pointer to type atomic64_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic64_set(atomic64_t *v, long i) +{ + v->counter = i; +} + +/** + * atomic64_add - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v. + */ +static inline void atomic64_add(long i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "addq %1,%0" + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); +} + +/** + * atomic64_sub - subtract the atomic64 variable + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic64_sub(long i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "subq %1,%0" + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); +} + +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_sub_and_test(long i, atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic64_inc - increment atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1. + */ +static inline void atomic64_inc(atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "incq %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_dec - decrement atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic64_dec(atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "decq %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_dec_and_test - decrement and test + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic64_dec_and_test(atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "decq %0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic64_inc_and_test - increment and test + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_inc_and_test(atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incq %0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int atomic64_add_negative(long i, atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic64_add_return - add and return + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline long atomic64_add_return(long i, atomic64_t *v) +{ + long __i = i; + asm volatile(LOCK_PREFIX "xaddq %0, %1;" + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +} + +static inline long atomic64_sub_return(long i, atomic64_t *v) +{ + return atomic64_add_return(-i, v); +} + +#define atomic64_inc_return(v) (atomic64_add_return(1, (v))) +#define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) + +static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) +{ + return cmpxchg(&v->counter, old, new); +} + +static inline long atomic64_xchg(atomic64_t *v, long new) +{ + return xchg(&v->counter, new); +} + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static inline int atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long c, old; + c = atomic64_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic64_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c != (u); +} + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + +#endif /* _ASM_X86_ATOMIC64_64_H */ diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h deleted file mode 100644 index dc5a667ff791..000000000000 --- a/arch/x86/include/asm/atomic_32.h +++ /dev/null @@ -1,415 +0,0 @@ -#ifndef _ASM_X86_ATOMIC_32_H -#define _ASM_X86_ATOMIC_32_H - -#include <linux/compiler.h> -#include <linux/types.h> -#include <asm/processor.h> -#include <asm/cmpxchg.h> - -/* - * Atomic operations that C can't guarantee us. Useful for - * resource counting etc.. - */ - -#define ATOMIC_INIT(i) { (i) } - -/** - * atomic_read - read atomic variable - * @v: pointer of type atomic_t - * - * Atomically reads the value of @v. - */ -static inline int atomic_read(const atomic_t *v) -{ - return v->counter; -} - -/** - * atomic_set - set atomic variable - * @v: pointer of type atomic_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ -static inline void atomic_set(atomic_t *v, int i) -{ - v->counter = i; -} - -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v. - */ -static inline void atomic_add(int i, atomic_t *v) -{ - asm volatile(LOCK_PREFIX "addl %1,%0" - : "+m" (v->counter) - : "ir" (i)); -} - -/** - * atomic_sub - subtract integer from atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v. - */ -static inline void atomic_sub(int i, atomic_t *v) -{ - asm volatile(LOCK_PREFIX "subl %1,%0" - : "+m" (v->counter) - : "ir" (i)); -} - -/** - * atomic_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ -static inline int atomic_sub_and_test(int i, atomic_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" - : "+m" (v->counter), "=qm" (c) - : "ir" (i) : "memory"); - return c; -} - -/** - * atomic_inc - increment atomic variable - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1. - */ -static inline void atomic_inc(atomic_t *v) -{ - asm volatile(LOCK_PREFIX "incl %0" - : "+m" (v->counter)); -} - -/** - * atomic_dec - decrement atomic variable - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1. - */ -static inline void atomic_dec(atomic_t *v) -{ - asm volatile(LOCK_PREFIX "decl %0" - : "+m" (v->counter)); -} - -/** - * atomic_dec_and_test - decrement and test - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -static inline int atomic_dec_and_test(atomic_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "decl %0; sete %1" - : "+m" (v->counter), "=qm" (c) - : : "memory"); - return c != 0; -} - -/** - * atomic_inc_and_test - increment and test - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -static inline int atomic_inc_and_test(atomic_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "incl %0; sete %1" - : "+m" (v->counter), "=qm" (c) - : : "memory"); - return c != 0; -} - -/** - * atomic_add_negative - add and test if negative - * @v: pointer of type atomic_t - * @i: integer value to add - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -static inline int atomic_add_negative(int i, atomic_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" - : "+m" (v->counter), "=qm" (c) - : "ir" (i) : "memory"); - return c; -} - -/** - * atomic_add_return - add integer and return - * @v: pointer of type atomic_t - * @i: integer value to add - * - * Atomically adds @i to @v and returns @i + @v - */ -static inline int atomic_add_return(int i, atomic_t *v) -{ - int __i; -#ifdef CONFIG_M386 - unsigned long flags; - if (unlikely(boot_cpu_data.x86 <= 3)) - goto no_xadd; -#endif - /* Modern 486+ processor */ - __i = i; - asm volatile(LOCK_PREFIX "xaddl %0, %1" - : "+r" (i), "+m" (v->counter) - : : "memory"); - return i + __i; - -#ifdef CONFIG_M386 -no_xadd: /* Legacy 386 processor */ - local_irq_save(flags); - __i = atomic_read(v); - atomic_set(v, i + __i); - local_irq_restore(flags); - return i + __i; -#endif -} - -/** - * atomic_sub_return - subtract integer and return - * @v: pointer of type atomic_t - * @i: integer value to subtract - * - * Atomically subtracts @i from @v and returns @v - @i - */ -static inline int atomic_sub_return(int i, atomic_t *v) -{ - return atomic_add_return(-i, v); -} - -static inline int atomic_cmpxchg(atomic_t *v, int old, int new) -{ - return cmpxchg(&v->counter, old, new); -} - -static inline int atomic_xchg(atomic_t *v, int new) -{ - return xchg(&v->counter, new); -} - -/** - * atomic_add_unless - add unless the number is already a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as @v was not already @u. - * Returns non-zero if @v was not @u, and zero otherwise. - */ -static inline int atomic_add_unless(atomic_t *v, int a, int u) -{ - int c, old; - c = atomic_read(v); - for (;;) { - if (unlikely(c == (u))) - break; - old = atomic_cmpxchg((v), c, c + (a)); - if (likely(old == c)) - break; - c = old; - } - return c != (u); -} - -#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) - -#define atomic_inc_return(v) (atomic_add_return(1, v)) -#define atomic_dec_return(v) (atomic_sub_return(1, v)) - -/* These are x86-specific, used by some header files */ -#define atomic_clear_mask(mask, addr) \ - asm volatile(LOCK_PREFIX "andl %0,%1" \ - : : "r" (~(mask)), "m" (*(addr)) : "memory") - -#define atomic_set_mask(mask, addr) \ - asm volatile(LOCK_PREFIX "orl %0,%1" \ - : : "r" (mask), "m" (*(addr)) : "memory") - -/* Atomic operations are already serializing on x86 */ -#define smp_mb__before_atomic_dec() barrier() -#define smp_mb__after_atomic_dec() barrier() -#define smp_mb__before_atomic_inc() barrier() -#define smp_mb__after_atomic_inc() barrier() - -/* An 64bit atomic type */ - -typedef struct { - u64 __aligned(8) counter; -} atomic64_t; - -#define ATOMIC64_INIT(val) { (val) } - -extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val); - -/** - * atomic64_xchg - xchg atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign - * - * Atomically xchgs the value of @ptr to @new_val and returns - * the old value. - */ -extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val); - -/** - * atomic64_set - set atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign - * - * Atomically sets the value of @ptr to @new_val. - */ -extern void atomic64_set(atomic64_t *ptr, u64 new_val); - -/** - * atomic64_read - read atomic64 variable - * @ptr: pointer to type atomic64_t - * - * Atomically reads the value of @ptr and returns it. - */ -static inline u64 atomic64_read(atomic64_t *ptr) -{ - u64 res; - - /* - * Note, we inline this atomic64_t primitive because - * it only clobbers EAX/EDX and leaves the others - * untouched. We also (somewhat subtly) rely on the - * fact that cmpxchg8b returns the current 64-bit value - * of the memory location we are touching: - */ - asm volatile( - "mov %%ebx, %%eax\n\t" - "mov %%ecx, %%edx\n\t" - LOCK_PREFIX "cmpxchg8b %1\n" - : "=&A" (res) - : "m" (*ptr) - ); - - return res; -} - -extern u64 atomic64_read(atomic64_t *ptr); - -/** - * atomic64_add_return - add and return - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr and returns @delta + *@ptr - */ -extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr); - -/* - * Other variants with different arithmetic operators: - */ -extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr); -extern u64 atomic64_inc_return(atomic64_t *ptr); -extern u64 atomic64_dec_return(atomic64_t *ptr); - -/** - * atomic64_add - add integer to atomic64 variable - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr. - */ -extern void atomic64_add(u64 delta, atomic64_t *ptr); - -/** - * atomic64_sub - subtract the atomic64 variable - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t - * - * Atomically subtracts @delta from @ptr. - */ -extern void atomic64_sub(u64 delta, atomic64_t *ptr); - -/** - * atomic64_sub_and_test - subtract value from variable and test result - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t - * - * Atomically subtracts @delta from @ptr and returns - * true if the result is zero, or false for all - * other cases. - */ -extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr); - -/** - * atomic64_inc - increment atomic64 variable - * @ptr: pointer to type atomic64_t - * - * Atomically increments @ptr by 1. - */ -extern void atomic64_inc(atomic64_t *ptr); - -/** - * atomic64_dec - decrement atomic64 variable - * @ptr: pointer to type atomic64_t - * - * Atomically decrements @ptr by 1. - */ -extern void atomic64_dec(atomic64_t *ptr); - -/** - * atomic64_dec_and_test - decrement and test - * @ptr: pointer to type atomic64_t - * - * Atomically decrements @ptr by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -extern int atomic64_dec_and_test(atomic64_t *ptr); - -/** - * atomic64_inc_and_test - increment and test - * @ptr: pointer to type atomic64_t - * - * Atomically increments @ptr by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -extern int atomic64_inc_and_test(atomic64_t *ptr); - -/** - * atomic64_add_negative - add and test if negative - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -extern int atomic64_add_negative(u64 delta, atomic64_t *ptr); - -#include <asm-generic/atomic-long.h> -#endif /* _ASM_X86_ATOMIC_32_H */ diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h deleted file mode 100644 index d605dc268e79..000000000000 --- a/arch/x86/include/asm/atomic_64.h +++ /dev/null @@ -1,485 +0,0 @@ -#ifndef _ASM_X86_ATOMIC_64_H -#define _ASM_X86_ATOMIC_64_H - -#include <linux/types.h> -#include <asm/alternative.h> -#include <asm/cmpxchg.h> - -/* - * Atomic operations that C can't guarantee us. Useful for - * resource counting etc.. - */ - -#define ATOMIC_INIT(i) { (i) } - -/** - * atomic_read - read atomic variable - * @v: pointer of type atomic_t - * - * Atomically reads the value of @v. - */ -static inline int atomic_read(const atomic_t *v) -{ - return v->counter; -} - -/** - * atomic_set - set atomic variable - * @v: pointer of type atomic_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ -static inline void atomic_set(atomic_t *v, int i) -{ - v->counter = i; -} - -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v. - */ -static inline void atomic_add(int i, atomic_t *v) -{ - asm volatile(LOCK_PREFIX "addl %1,%0" - : "=m" (v->counter) - : "ir" (i), "m" (v->counter)); -} - -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v. - */ -static inline void atomic_sub(int i, atomic_t *v) -{ - asm volatile(LOCK_PREFIX "subl %1,%0" - : "=m" (v->counter) - : "ir" (i), "m" (v->counter)); -} - -/** - * atomic_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ -static inline int atomic_sub_and_test(int i, atomic_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "ir" (i), "m" (v->counter) : "memory"); - return c; -} - -/** - * atomic_inc - increment atomic variable - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1. - */ -static inline void atomic_inc(atomic_t *v) -{ - asm volatile(LOCK_PREFIX "incl %0" - : "=m" (v->counter) - : "m" (v->counter)); -} - -/** - * atomic_dec - decrement atomic variable - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1. - */ -static inline void atomic_dec(atomic_t *v) -{ - asm volatile(LOCK_PREFIX "decl %0" - : "=m" (v->counter) - : "m" (v->counter)); -} - -/** - * atomic_dec_and_test - decrement and test - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -static inline int atomic_dec_and_test(atomic_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "decl %0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "m" (v->counter) : "memory"); - return c != 0; -} - -/** - * atomic_inc_and_test - increment and test - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -static inline int atomic_inc_and_test(atomic_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "incl %0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "m" (v->counter) : "memory"); - return c != 0; -} - -/** - * atomic_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -static inline int atomic_add_negative(int i, atomic_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" - : "=m" (v->counter), "=qm" (c) - : "ir" (i), "m" (v->counter) : "memory"); - return c; -} - -/** - * atomic_add_return - add and return - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns @i + @v - */ -static inline int atomic_add_return(int i, atomic_t *v) -{ - int __i = i; - asm volatile(LOCK_PREFIX "xaddl %0, %1" - : "+r" (i), "+m" (v->counter) - : : "memory"); - return i + __i; -} - -static inline int atomic_sub_return(int i, atomic_t *v) -{ - return atomic_add_return(-i, v); -} - -#define atomic_inc_return(v) (atomic_add_return(1, v)) -#define atomic_dec_return(v) (atomic_sub_return(1, v)) - -/* The 64-bit atomic type */ - -#define ATOMIC64_INIT(i) { (i) } - -/** - * atomic64_read - read atomic64 variable - * @v: pointer of type atomic64_t - * - * Atomically reads the value of @v. - * Doesn't imply a read memory barrier. - */ -static inline long atomic64_read(const atomic64_t *v) -{ - return v->counter; -} - -/** - * atomic64_set - set atomic64 variable - * @v: pointer to type atomic64_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ -static inline void atomic64_set(atomic64_t *v, long i) -{ - v->counter = i; -} - -/** - * atomic64_add - add integer to atomic64 variable - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v. - */ -static inline void atomic64_add(long i, atomic64_t *v) -{ - asm volatile(LOCK_PREFIX "addq %1,%0" - : "=m" (v->counter) - : "er" (i), "m" (v->counter)); -} - -/** - * atomic64_sub - subtract the atomic64 variable - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v. - */ -static inline void atomic64_sub(long i, atomic64_t *v) -{ - asm volatile(LOCK_PREFIX "subq %1,%0" - : "=m" (v->counter) - : "er" (i), "m" (v->counter)); -} - -/** - * atomic64_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ -static inline int atomic64_sub_and_test(long i, atomic64_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "er" (i), "m" (v->counter) : "memory"); - return c; -} - -/** - * atomic64_inc - increment atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1. - */ -static inline void atomic64_inc(atomic64_t *v) -{ - asm volatile(LOCK_PREFIX "incq %0" - : "=m" (v->counter) - : "m" (v->counter)); -} - -/** - * atomic64_dec - decrement atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1. - */ -static inline void atomic64_dec(atomic64_t *v) -{ - asm volatile(LOCK_PREFIX "decq %0" - : "=m" (v->counter) - : "m" (v->counter)); -} - -/** - * atomic64_dec_and_test - decrement and test - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -static inline int atomic64_dec_and_test(atomic64_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "decq %0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "m" (v->counter) : "memory"); - return c != 0; -} - -/** - * atomic64_inc_and_test - increment and test - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -static inline int atomic64_inc_and_test(atomic64_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "incq %0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "m" (v->counter) : "memory"); - return c != 0; -} - -/** - * atomic64_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -static inline int atomic64_add_negative(long i, atomic64_t *v) -{ - unsigned char c; - - asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" - : "=m" (v->counter), "=qm" (c) - : "er" (i), "m" (v->counter) : "memory"); - return c; -} - -/** - * atomic64_add_return - add and return - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns @i + @v - */ -static inline long atomic64_add_return(long i, atomic64_t *v) -{ - long __i = i; - asm volatile(LOCK_PREFIX "xaddq %0, %1;" - : "+r" (i), "+m" (v->counter) - : : "memory"); - return i + __i; -} - -static inline long atomic64_sub_return(long i, atomic64_t *v) -{ - return atomic64_add_return(-i, v); -} - -#define atomic64_inc_return(v) (atomic64_add_return(1, (v))) -#define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) - -static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) -{ - return cmpxchg(&v->counter, old, new); -} - -static inline long atomic64_xchg(atomic64_t *v, long new) -{ - return xchg(&v->counter, new); -} - -static inline long atomic_cmpxchg(atomic_t *v, int old, int new) -{ - return cmpxchg(&v->counter, old, new); -} - -static inline long atomic_xchg(atomic_t *v, int new) -{ - return xchg(&v->counter, new); -} - -/** - * atomic_add_unless - add unless the number is a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as it was not @u. - * Returns non-zero if @v was not @u, and zero otherwise. - */ -static inline int atomic_add_unless(atomic_t *v, int a, int u) -{ - int c, old; - c = atomic_read(v); - for (;;) { - if (unlikely(c == (u))) - break; - old = atomic_cmpxchg((v), c, c + (a)); - if (likely(old == c)) - break; - c = old; - } - return c != (u); -} - -#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) - -/** - * atomic64_add_unless - add unless the number is a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as it was not @u. - * Returns non-zero if @v was not @u, and zero otherwise. - */ -static inline int atomic64_add_unless(atomic64_t *v, long a, long u) -{ - long c, old; - c = atomic64_read(v); - for (;;) { - if (unlikely(c == (u))) - break; - old = atomic64_cmpxchg((v), c, c + (a)); - if (likely(old == c)) - break; - c = old; - } - return c != (u); -} - -/** - * atomic_inc_short - increment of a short integer - * @v: pointer to type int - * - * Atomically adds 1 to @v - * Returns the new value of @u - */ -static inline short int atomic_inc_short(short int *v) -{ - asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); - return *v; -} - -/** - * atomic_or_long - OR of two long integers - * @v1: pointer to type unsigned long - * @v2: pointer to type unsigned long - * - * Atomically ORs @v1 and @v2 - * Returns the result of the OR - */ -static inline void atomic_or_long(unsigned long *v1, unsigned long v2) -{ - asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2)); -} - -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) - -/* These are x86-specific, used by some header files */ -#define atomic_clear_mask(mask, addr) \ - asm volatile(LOCK_PREFIX "andl %0,%1" \ - : : "r" (~(mask)), "m" (*(addr)) : "memory") - -#define atomic_set_mask(mask, addr) \ - asm volatile(LOCK_PREFIX "orl %0,%1" \ - : : "r" ((unsigned)(mask)), "m" (*(addr)) \ - : "memory") - -/* Atomic operations are already serializing on x86 */ -#define smp_mb__before_atomic_dec() barrier() -#define smp_mb__after_atomic_dec() barrier() -#define smp_mb__before_atomic_inc() barrier() -#define smp_mb__after_atomic_inc() barrier() - -#include <asm-generic/atomic-long.h> -#endif /* _ASM_X86_ATOMIC_64_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 8240f76b531e..b81002f23614 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -14,6 +14,9 @@ which debugging register was responsible for the trap. The other bits are either reserved or not of interest to us. */ +/* Define reserved bits in DR6 which are always set to 1 */ +#define DR6_RESERVED (0xFFFF0FF0) + #define DR_TRAP0 (0x1) /* db0 */ #define DR_TRAP1 (0x2) /* db1 */ #define DR_TRAP2 (0x4) /* db2 */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 14f9890eb495..635f03bb4995 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -118,14 +118,20 @@ enum fixed_addresses { * 256 temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. * - * We round it up to the next 256 pages boundary so that we - * can have a single pgd entry and a single pte table: + * If necessary we round it up to the next 256 pages boundary so + * that we can have a single pgd entry and a single pte table: */ #define NR_FIX_BTMAPS 64 #define FIX_BTMAPS_SLOTS 4 - FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - - (__end_of_permanent_fixed_addresses & 255), - FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + FIX_BTMAP_END = + (__end_of_permanent_fixed_addresses ^ + (__end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - 1)) & + -PTRS_PER_PTE + ? __end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - + (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1)) + : __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index ebfb8a9e11f7..1cd5d43fee49 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -32,9 +32,11 @@ extern void __math_state_restore(void); extern void init_thread_xstate(void); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); -extern user_regset_active_fn fpregs_active, xfpregs_active; -extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; -extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; +extern user_regset_active_fn fpregs_active, xfpregs_active, xstateregs_active; +extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, + xstateregs_get; +extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, + xstateregs_set; extern struct _fpx_sw_bytes fx_sw_reserved; #ifdef CONFIG_IA32_EMULATION diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 5458380b6ef8..262292729fc4 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -48,5 +48,6 @@ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); extern int vector_used_by_percpu_irq(unsigned int vector); extern void init_ISA_irqs(void); +extern int nr_legacy_irqs; #endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4611f085cd43..8767d99c4f64 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -28,28 +28,33 @@ #define MCE_VECTOR 0x12 /* - * IDT vectors usable for external interrupt sources start - * at 0x20: + * IDT vectors usable for external interrupt sources start at 0x20. + * (0x80 is the syscall vector, 0x30-0x3f are for ISA) */ #define FIRST_EXTERNAL_VECTOR 0x20 - -#ifdef CONFIG_X86_32 -# define SYSCALL_VECTOR 0x80 -# define IA32_SYSCALL_VECTOR 0x80 -#else -# define IA32_SYSCALL_VECTOR 0x80 -#endif +/* + * We start allocating at 0x21 to spread out vectors evenly between + * priority levels. (0x80 is the syscall vector) + */ +#define VECTOR_OFFSET_START 1 /* - * Reserve the lowest usable priority level 0x20 - 0x2f for triggering - * cleanup after irq migration. + * Reserve the lowest usable vector (and hence lowest priority) 0x20 for + * triggering cleanup after irq migration. 0x21-0x2f will still be used + * for device interrupts. */ #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR +#define IA32_SYSCALL_VECTOR 0x80 +#ifdef CONFIG_X86_32 +# define SYSCALL_VECTOR 0x80 +#endif + /* * Vectors 0x30-0x3f are used for ISA interrupts. + * round up to the next 16-vector boundary */ -#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) +#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) #define IRQ1_VECTOR (IRQ0_VECTOR + 1) #define IRQ2_VECTOR (IRQ0_VECTOR + 2) @@ -120,13 +125,6 @@ */ #define MCE_SELF_VECTOR 0xeb -/* - * First APIC vector available to drivers: (vectors 0x30-0xee) we - * start at 0x31(0x41) to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) - #define NR_VECTORS 256 #define FPU_IRQ 13 @@ -154,21 +152,21 @@ static inline int invalid_vm86_irq(int irq) #define NR_IRQS_LEGACY 16 -#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) #ifdef CONFIG_X86_IO_APIC # ifdef CONFIG_SPARSE_IRQ +# define CPU_VECTOR_LIMIT (64 * NR_CPUS) # define NR_IRQS \ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ (NR_VECTORS + CPU_VECTOR_LIMIT) : \ (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) # else -# if NR_CPUS < MAX_IO_APICS -# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) -# else -# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) -# endif +# define CPU_VECTOR_LIMIT (32 * NR_CPUS) +# define NR_IRQS \ + (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \ + (NR_VECTORS + CPU_VECTOR_LIMIT) : \ + (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) # endif #else /* !CONFIG_X86_IO_APIC: */ # define NR_IRQS NR_IRQS_LEGACY diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 139d4c1a33a7..93da9c3f3341 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -19,7 +19,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); -extern int avail_to_resrv_perfctr_nmi(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); extern int reserve_evntsel_nmi(unsigned int); diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 642fe34b36a2..a667f24c7254 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -40,7 +40,6 @@ #ifndef __ASSEMBLY__ -extern int page_is_ram(unsigned long pagenr); extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 1380367dabd9..befd172c82ad 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -27,7 +27,14 @@ /* * Includes eventsel and unit mask as well: */ -#define ARCH_PERFMON_EVENT_MASK 0xffff + + +#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL +#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL +#define INTEL_ARCH_EDGE_MASK 0x00040000ULL +#define INTEL_ARCH_INV_MASK 0x00800000ULL +#define INTEL_ARCH_CNT_MASK 0xFF000000ULL +#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) /* * filter mask to validate fixed counter events. @@ -38,7 +45,12 @@ * The other filters are supported by fixed counters. * The any-thread option is supported starting with v3. */ -#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 +#define INTEL_ARCH_FIXED_MASK \ + (INTEL_ARCH_CNT_MASK| \ + INTEL_ARCH_INV_MASK| \ + INTEL_ARCH_EDGE_MASK|\ + INTEL_ARCH_UNIT_MASK|\ + INTEL_ARCH_EVTSEL_MASK) #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 86723035a515..447cfb63160e 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h @@ -80,6 +80,44 @@ #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ +/* + * Structure layout used in PTRACE_GETXSTATEREGS/PTRACE_SETXSTATEREGS is same + * as the memory layout of xsave used by the processor (except for the bytes + * 464..511 which can be used by the software). Size of the structure that users + * need to use for these two interfaces can be obtained by doing: + * cpuid_count(0xd, 0, &eax, &ptrace_xstateregs_struct_size, &ecx, &edx); + * i.e., cpuid.(eax=0xd,ecx=0).ebx will be the size that user (debuggers etc) + * need to use. + * + * And format of this structure will be like: + * struct { + * fxsave_bytes[0..463] + * sw_usable_bytes[464..511] + * xsave_hdr_bytes[512..575] + * avx_bytes[576..831] + * future_state etc + * } + * + * Same memory layout will be used for the coredump NT_X86_XSTATE representing + * the xstate registers. + * + * For now, only first 8 bytes of the sw_usable_bytes[464..467] will be used and + * will be set to OS enabled xstate mask(which is same as the 64bit mask + * returned by the xgetbv's xCR0). Users (analyzing core dump remotely etc) + * can use this mask aswell as the mask saved in the xstate_hdr bytes and + * interpret what states the processor/OS supports and what states are in + * modified/initialized conditions for the particular process/thread. + * + * Also when the user modifies certain state FP/SSE/etc through this + * PTRACE_SETXSTATEREGS, they must ensure that the xsave_hdr.xstate_bv + * bytes[512..519] of the above memory layout are updated correspondingly. + * i.e., for example when FP state is modified to a non-init state, + * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to + * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1' etc.. + */ +#define PTRACE_GETXSTATEREGS 34 +#define PTRACE_SETXSTATEREGS 35 + #ifndef __ASSEMBLY__ #include <linux/types.h> diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 9d369f680321..20102808b191 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -274,10 +274,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, return 0; } -/* Get Nth argument at function call */ -extern unsigned long regs_get_argument_nth(struct pt_regs *regs, - unsigned int n); - /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index ca7517d33776..10204a25bf93 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -41,6 +41,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/lockdep.h> +#include <asm/asm.h> struct rwsem_waiter; @@ -55,17 +56,28 @@ extern asmregparm struct rw_semaphore * /* * the semaphore definition + * + * The bias values and the counter type limits the number of + * potential readers/writers to 32767 for 32 bits and 2147483647 + * for 64 bits. */ -#define RWSEM_UNLOCKED_VALUE 0x00000000 -#define RWSEM_ACTIVE_BIAS 0x00000001 -#define RWSEM_ACTIVE_MASK 0x0000ffff -#define RWSEM_WAITING_BIAS (-0x00010000) +#ifdef CONFIG_X86_64 +# define RWSEM_ACTIVE_MASK 0xffffffffL +#else +# define RWSEM_ACTIVE_MASK 0x0000ffffL +#endif + +#define RWSEM_UNLOCKED_VALUE 0x00000000L +#define RWSEM_ACTIVE_BIAS 0x00000001L +#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) +typedef signed long rwsem_count_t; + struct rw_semaphore { - signed long count; + rwsem_count_t count; spinlock_t wait_lock; struct list_head wait_list; #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -105,7 +117,7 @@ do { \ static inline void __down_read(struct rw_semaphore *sem) { asm volatile("# beginning down_read\n\t" - LOCK_PREFIX " incl (%%eax)\n\t" + LOCK_PREFIX _ASM_INC "(%1)\n\t" /* adds 0x00000001, returns the old value */ " jns 1f\n" " call call_rwsem_down_read_failed\n" @@ -121,14 +133,14 @@ static inline void __down_read(struct rw_semaphore *sem) */ static inline int __down_read_trylock(struct rw_semaphore *sem) { - __s32 result, tmp; + rwsem_count_t result, tmp; asm volatile("# beginning __down_read_trylock\n\t" - " movl %0,%1\n\t" + " mov %0,%1\n\t" "1:\n\t" - " movl %1,%2\n\t" - " addl %3,%2\n\t" + " mov %1,%2\n\t" + " add %3,%2\n\t" " jle 2f\n\t" - LOCK_PREFIX " cmpxchgl %2,%0\n\t" + LOCK_PREFIX " cmpxchg %2,%0\n\t" " jnz 1b\n\t" "2:\n\t" "# ending __down_read_trylock\n\t" @@ -143,13 +155,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) */ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) { - int tmp; + rwsem_count_t tmp; tmp = RWSEM_ACTIVE_WRITE_BIAS; asm volatile("# beginning down_write\n\t" - LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" + LOCK_PREFIX " xadd %1,(%2)\n\t" /* subtract 0x0000ffff, returns the old value */ - " testl %%edx,%%edx\n\t" + " test %1,%1\n\t" /* was the count 0 before? */ " jz 1f\n" " call call_rwsem_down_write_failed\n" @@ -170,9 +182,9 @@ static inline void __down_write(struct rw_semaphore *sem) */ static inline int __down_write_trylock(struct rw_semaphore *sem) { - signed long ret = cmpxchg(&sem->count, - RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); + rwsem_count_t ret = cmpxchg(&sem->count, + RWSEM_UNLOCKED_VALUE, + RWSEM_ACTIVE_WRITE_BIAS); if (ret == RWSEM_UNLOCKED_VALUE) return 1; return 0; @@ -183,9 +195,9 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) */ static inline void __up_read(struct rw_semaphore *sem) { - __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; + rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS; asm volatile("# beginning __up_read\n\t" - LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" + LOCK_PREFIX " xadd %1,(%2)\n\t" /* subtracts 1, returns the old value */ " jns 1f\n\t" " call call_rwsem_wake\n" @@ -201,18 +213,18 @@ static inline void __up_read(struct rw_semaphore *sem) */ static inline void __up_write(struct rw_semaphore *sem) { + rwsem_count_t tmp; asm volatile("# beginning __up_write\n\t" - " movl %2,%%edx\n\t" - LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" + LOCK_PREFIX " xadd %1,(%2)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ " jz 1f\n" " call call_rwsem_wake\n" "1:\n\t" "# ending __up_write\n" - : "+m" (sem->count) - : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS) - : "memory", "cc", "edx"); + : "+m" (sem->count), "=d" (tmp) + : "a" (sem), "1" (-RWSEM_ACTIVE_WRITE_BIAS) + : "memory", "cc"); } /* @@ -220,8 +232,25 @@ static inline void __up_write(struct rw_semaphore *sem) */ static inline void __downgrade_write(struct rw_semaphore *sem) { +#ifdef CONFIG_X86_64 +# if RWSEM_WAITING_BIAS != -0x100000000 +# error "This code assumes RWSEM_WAITING_BIAS == -2^32" +# endif + + /* 64-bit immediates are special and expensive, and not needed here */ asm volatile("# beginning __downgrade_write\n\t" - LOCK_PREFIX " addl %2,(%%eax)\n\t" + LOCK_PREFIX "incl 4(%1)\n\t" + /* transitions 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 */ + " jns 1f\n\t" + " call call_rwsem_downgrade_wake\n" + "1:\n\t" + "# ending __downgrade_write\n" + : "+m" (sem->count) + : "a" (sem) + : "memory", "cc"); +#else + asm volatile("# beginning __downgrade_write\n\t" + LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ " jns 1f\n\t" " call call_rwsem_downgrade_wake\n" @@ -230,24 +259,27 @@ static inline void __downgrade_write(struct rw_semaphore *sem) : "+m" (sem->count) : "a" (sem), "i" (-RWSEM_WAITING_BIAS) : "memory", "cc"); +#endif } /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(rwsem_count_t delta, + struct rw_semaphore *sem) { - asm volatile(LOCK_PREFIX "addl %1,%0" + asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" : "+m" (sem->count) - : "ir" (delta)); + : "er" (delta)); } /* * implement exchange and add functionality */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, + struct rw_semaphore *sem) { - int tmp = delta; + rwsem_count_t tmp = delta; asm volatile(LOCK_PREFIX "xadd %0,%1" : "+r" (tmp), "+m" (sem->count) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 1e796782cd7b..4cfc90824068 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -135,6 +135,8 @@ int native_cpu_disable(void); void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); +void wbinvd_on_cpu(int cpu); +int wbinvd_on_all_cpus(void); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); @@ -147,6 +149,13 @@ static inline int num_booting_cpus(void) { return cpumask_weight(cpu_callout_mask); } +#else /* !CONFIG_SMP */ +#define wbinvd_on_cpu(cpu) wbinvd() +static inline int wbinvd_on_all_cpus(void) +{ + wbinvd(); + return 0; +} #endif /* CONFIG_SMP */ extern unsigned disabled_cpus __cpuinitdata; diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 35e89122a42f..4dab78edbad9 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -3,8 +3,6 @@ extern int kstack_depth_to_print; -int x86_is_stack_id(int id, char *name); - struct thread_info; struct stacktrace_ops; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 535e421498f6..316708d5af92 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -8,6 +8,8 @@ #include <linux/errno.h> #include <linux/prefetch.h> #include <linux/lockdep.h> +#include <asm/alternative.h> +#include <asm/cpufeature.h> #include <asm/page.h> /* @@ -16,7 +18,24 @@ /* Handles exceptions in both to and from, but doesn't do access_ok */ __must_check unsigned long -copy_user_generic(void *to, const void *from, unsigned len); +copy_user_generic_string(void *to, const void *from, unsigned len); +__must_check unsigned long +copy_user_generic_unrolled(void *to, const void *from, unsigned len); + +static __always_inline __must_check unsigned long +copy_user_generic(void *to, const void *from, unsigned len) +{ + unsigned ret; + + alternative_call(copy_user_generic_unrolled, + copy_user_generic_string, + X86_FEATURE_REP_GOOD, + ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), + "=d" (len)), + "1" (to), "2" (from), "3" (len) + : "memory", "rcx", "r8", "r9", "r10", "r11"); + return ret; +} __must_check unsigned long _copy_to_user(void __user *to, const void *from, unsigned len); diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 2751f3075d8b..3fbc1f348a7d 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -18,8 +18,8 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. - * Copyright (c) Russ Anderson + * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson <rja@sgi.com> */ #include <linux/rtc.h> @@ -89,7 +89,7 @@ extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64); extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64); extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); -extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); +extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *); extern s64 uv_bios_freq_base(u64, u64 *); extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int, unsigned long *); @@ -104,6 +104,7 @@ extern int uv_type; extern long sn_partition_id; extern long sn_coherency_id; extern long sn_region_size; +extern long system_serial_number; #define partition_coherence_id() (sn_coherency_id) extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 40be813fefb1..14cc74ba5d23 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -329,7 +329,8 @@ static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset */ static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset) { - return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val); + return UV_GLOBAL_GRU_MMR_BASE | offset | + ((unsigned long)pnode << uv_hub_info->m_val); } static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val) diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 727acc152344..9e26171d38df 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -27,9 +27,11 @@ extern unsigned int xstate_size; extern u64 pcntxt_mask; extern struct xsave_struct *init_xstate_buf; +extern u64 xstate_fx_sw_bytes[6]; extern void xsave_cntxt_init(void); extern void xsave_init(void); +extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern int init_fpu(struct task_struct *child); extern int check_for_xstate(struct i387_fxsave_struct __user *buf, void __user *fpstate, diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index de7353c0ce9c..2589ea4c60ce 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -205,7 +205,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start, struct alt_instr *end) { struct alt_instr *a; - char insnbuf[MAX_PATCH_LEN]; + u8 insnbuf[MAX_PATCH_LEN]; DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); for (a = start; a < end; a++) { @@ -223,6 +223,8 @@ void __init_or_module apply_alternatives(struct alt_instr *start, } #endif memcpy(insnbuf, a->replacement, a->replacementlen); + if (*insnbuf == 0xe8 && a->replacementlen == 5) + *(s32 *)(insnbuf + 1) += a->replacement - a->instr; add_nops(insnbuf + a->replacementlen, a->instrlen - a->replacementlen); text_poke_early(instr, insnbuf, a->instrlen); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53243ca7816d..9969094ce75b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -94,8 +94,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; -/* Number of legacy interrupts */ -static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; /* GSI interrupts */ static int nr_irqs_gsi = NR_IRQS_LEGACY; @@ -140,27 +138,10 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ #ifdef CONFIG_SPARSE_IRQ -static struct irq_cfg irq_cfgx[] = { +static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; #else -static struct irq_cfg irq_cfgx[NR_IRQS] = { +static struct irq_cfg irq_cfgx[NR_IRQS]; #endif - [0] = { .vector = IRQ0_VECTOR, }, - [1] = { .vector = IRQ1_VECTOR, }, - [2] = { .vector = IRQ2_VECTOR, }, - [3] = { .vector = IRQ3_VECTOR, }, - [4] = { .vector = IRQ4_VECTOR, }, - [5] = { .vector = IRQ5_VECTOR, }, - [6] = { .vector = IRQ6_VECTOR, }, - [7] = { .vector = IRQ7_VECTOR, }, - [8] = { .vector = IRQ8_VECTOR, }, - [9] = { .vector = IRQ9_VECTOR, }, - [10] = { .vector = IRQ10_VECTOR, }, - [11] = { .vector = IRQ11_VECTOR, }, - [12] = { .vector = IRQ12_VECTOR, }, - [13] = { .vector = IRQ13_VECTOR, }, - [14] = { .vector = IRQ14_VECTOR, }, - [15] = { .vector = IRQ15_VECTOR, }, -}; void __init io_apic_disable_legacy(void) { @@ -185,8 +166,14 @@ int __init arch_early_irq_init(void) desc->chip_data = &cfg[i]; zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); - if (i < nr_legacy_irqs) - cpumask_setall(cfg[i].domain); + /* + * For legacy IRQ's, start with assigning irq0 to irq15 to + * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. + */ + if (i < nr_legacy_irqs) { + cfg[i].vector = IRQ0_VECTOR + i; + cpumask_set_cpu(0, cfg[i].domain); + } } return 0; @@ -1162,7 +1149,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; + static int current_offset = VECTOR_OFFSET_START % 8; unsigned int old_vector; int cpu, err; cpumask_var_t tmp_mask; @@ -1198,7 +1186,7 @@ next: if (vector >= first_system_vector) { /* If out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; + vector = FIRST_EXTERNAL_VECTOR + offset; } if (unlikely(current_vector == vector)) continue; @@ -1268,11 +1256,16 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ - /* This function must be called with vector_lock held */ int irq, vector; struct irq_cfg *cfg; struct irq_desc *desc; + /* + * vector_lock will make sure that we don't run into irq vector + * assignments that might be happening on another cpu in parallel, + * while we setup our initial vector to irq mappings. + */ + spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_irq_desc(irq, desc) { cfg = desc->chip_data; @@ -1291,6 +1284,7 @@ void __setup_vector_irq(int cpu) if (!cpumask_test_cpu(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } + spin_unlock(&vector_lock); } static struct irq_chip ioapic_chip; @@ -1440,6 +1434,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq cfg = desc->chip_data; + /* + * For legacy irqs, cfg->domain starts with cpu 0 for legacy + * controllers like 8259. Now that IO-APIC can handle this irq, update + * the cfg->domain. + */ + if (irq < nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) + apic->vector_allocation_domain(0, cfg->domain); + if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; @@ -3847,7 +3849,7 @@ int __init arch_probe_nr_irqs(void) /* * for MSI and HT dyn irq */ - nr += nr_irqs_gsi * 16; + nr += nr_irqs_gsi * 64; #endif if (nr < nr_irqs) nr_irqs = nr; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 21db3cbea7dc..a8ae82e0a654 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -5,7 +5,7 @@ * * SGI UV APIC functions (note: not an Intel compatible APIC) * - * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved. */ #include <linux/cpumask.h> #include <linux/hardirq.h> @@ -482,7 +482,7 @@ static void uv_heartbeat(unsigned long ignored) static void __cpuinit uv_heartbeat_enable(int cpu) { - if (!uv_cpu_hub_info(cpu)->scir.enabled) { + while (!uv_cpu_hub_info(cpu)->scir.enabled) { struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); @@ -490,11 +490,10 @@ static void __cpuinit uv_heartbeat_enable(int cpu) timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; add_timer_on(timer, cpu); uv_cpu_hub_info(cpu)->scir.enabled = 1; - } - /* check boot cpu */ - if (!uv_cpu_hub_info(0)->scir.enabled) - uv_heartbeat_enable(0); + /* also ensure that boot cpu is enabled */ + cpu = 0; + } } #ifdef CONFIG_HOTPLUG_CPU @@ -634,8 +633,8 @@ void __init uv_system_init(void) } uv_bios_init(); - uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, - &sn_coherency_id, &sn_region_size); + uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, + &sn_region_size, &system_serial_number); uv_rtc_init(); for_each_present_cpu(cpu) { diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index b0206a211b09..c918ebab52ab 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -15,8 +15,8 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. - * Copyright (c) Russ Anderson + * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson <rja@sgi.com> */ #include <linux/efi.h> @@ -30,6 +30,7 @@ static struct uv_systab uv_systab; s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) { struct uv_systab *tab = &uv_systab; + s64 ret; if (!tab->function) /* @@ -37,9 +38,11 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) */ return BIOS_STATUS_UNIMPLEMENTED; - return efi_call6((void *)__va(tab->function), - (u64)which, a1, a2, a3, a4, a5); + ret = efi_call6((void *)__va(tab->function), (u64)which, + a1, a2, a3, a4, a5); + return ret; } +EXPORT_SYMBOL_GPL(uv_bios_call); s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) @@ -73,11 +76,14 @@ long sn_coherency_id; EXPORT_SYMBOL_GPL(sn_coherency_id); long sn_region_size; EXPORT_SYMBOL_GPL(sn_region_size); +long system_serial_number; +EXPORT_SYMBOL_GPL(system_serial_number); int uv_type; +EXPORT_SYMBOL_GPL(uv_type); s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, - long *region) + long *region, long *ssn) { s64 ret; u64 v0, v1; @@ -97,8 +103,11 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, *coher = part.coherence_id; if (region) *region = part.region_size; + if (ssn) + *ssn = v1; return ret; } +EXPORT_SYMBOL_GPL(uv_bios_get_sn_info); int uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, @@ -185,4 +194,3 @@ void uv_bios_init(void) void uv_bios_init(void) { } #endif - diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index fc6c8ef92dcc..db1670f24c58 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -18,6 +18,7 @@ #include <asm/processor.h> #include <linux/smp.h> #include <asm/k8.h> +#include <asm/smp.h> #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -31,6 +32,8 @@ struct _cache_table { short size; }; +#define MB(x) ((x) * 1024) + /* All the cache descriptor types we care about (no TLB or trace cache entries) */ @@ -44,9 +47,9 @@ static const struct _cache_table __cpuinitconst cache_table[] = { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ @@ -59,16 +62,16 @@ static const struct _cache_table __cpuinitconst cache_table[] = { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ - { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ - { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ - { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ - { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ - { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ - { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ - { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ - { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ - { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ - { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ + { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */ + { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ + { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ + { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ + { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ + { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ + { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ + { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */ + { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */ + { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */ { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ @@ -77,34 +80,34 @@ static const struct _cache_table __cpuinitconst cache_table[] = { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ - { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ - { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ - { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ - { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ - { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ - { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ - { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ - { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ - { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ - { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ - { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ - { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */ - { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ - { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ - { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ - { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ - { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ - { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ - { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ - { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */ - { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */ - { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */ + { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */ + { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ + { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ + { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ + { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ + { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ + { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */ + { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ + { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */ + { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ + { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */ + { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */ + { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */ + { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */ + { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ + { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */ + { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ + { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */ + { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */ + { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ + { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ + { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */ + { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */ + { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */ { 0x00, 0, 0} }; @@ -150,7 +153,8 @@ struct _cpuid4_info { union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned long size; - unsigned long can_disable; + bool can_disable; + unsigned int l3_indices; DECLARE_BITMAP(shared_cpu_map, NR_CPUS); }; @@ -160,7 +164,8 @@ struct _cpuid4_info_regs { union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned long size; - unsigned long can_disable; + bool can_disable; + unsigned int l3_indices; }; unsigned short num_cache_leaves; @@ -290,6 +295,29 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } +static unsigned int __cpuinit amd_calc_l3_indices(void) +{ + /* + * We're called over smp_call_function_single() and therefore + * are on the correct cpu. + */ + int cpu = smp_processor_id(); + int node = cpu_to_node(cpu); + struct pci_dev *dev = node_to_k8_nb_misc(node); + unsigned int sc0, sc1, sc2, sc3; + u32 val; + + pci_read_config_dword(dev, 0x1C4, &val); + + /* calculate subcache sizes */ + sc0 = !(val & BIT(0)); + sc1 = !(val & BIT(4)); + sc2 = !(val & BIT(8)) + !(val & BIT(9)); + sc3 = !(val & BIT(12)) + !(val & BIT(13)); + + return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; +} + static void __cpuinit amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) { @@ -299,11 +327,14 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) if (boot_cpu_data.x86 == 0x11) return; - /* see erratum #382 */ - if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) + /* see errata #382 and #388 */ + if ((boot_cpu_data.x86 == 0x10) && + ((boot_cpu_data.x86_model < 0x9) || + (boot_cpu_data.x86_mask < 0x1))) return; - this_leaf->can_disable = 1; + this_leaf->can_disable = true; + this_leaf->l3_indices = amd_calc_l3_indices(); } static int @@ -726,12 +757,12 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, return -EINVAL; pci_read_config_dword(dev, 0x1BC + index * 4, ®); - return sprintf(buf, "%x\n", reg); + return sprintf(buf, "0x%08x\n", reg); } #define SHOW_CACHE_DISABLE(index) \ static ssize_t \ -show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ +show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ { \ return show_cache_disable(this_leaf, buf, index); \ } @@ -745,7 +776,9 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, int node = cpu_to_node(cpu); struct pci_dev *dev = node_to_k8_nb_misc(node); unsigned long val = 0; - unsigned int scrubber = 0; + +#define SUBCACHE_MASK (3UL << 20) +#define SUBCACHE_INDEX 0xfff if (!this_leaf->can_disable) return -EINVAL; @@ -759,21 +792,25 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, if (strict_strtoul(buf, 10, &val) < 0) return -EINVAL; - val |= 0xc0000000; - - pci_read_config_dword(dev, 0x58, &scrubber); - scrubber &= ~0x1f000000; - pci_write_config_dword(dev, 0x58, scrubber); + /* do not allow writes outside of allowed bits */ + if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || + ((val & SUBCACHE_INDEX) > this_leaf->l3_indices)) + return -EINVAL; - pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); - wbinvd(); + val |= BIT(30); pci_write_config_dword(dev, 0x1BC + index * 4, val); + /* + * We need to WBINVD on a core on the node containing the L3 cache which + * indices we disable therefore a simple wbinvd() is not sufficient. + */ + wbinvd_on_cpu(cpu); + pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31)); return count; } #define STORE_CACHE_DISABLE(index) \ static ssize_t \ -store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ +store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ const char *buf, size_t count) \ { \ return store_cache_disable(this_leaf, buf, count, index); \ @@ -806,16 +843,24 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, show_cache_disable_1, store_cache_disable_1); +#define DEFAULT_SYSFS_CACHE_ATTRS \ + &type.attr, \ + &level.attr, \ + &coherency_line_size.attr, \ + &physical_line_partition.attr, \ + &ways_of_associativity.attr, \ + &number_of_sets.attr, \ + &size.attr, \ + &shared_cpu_map.attr, \ + &shared_cpu_list.attr + static struct attribute *default_attrs[] = { - &type.attr, - &level.attr, - &coherency_line_size.attr, - &physical_line_partition.attr, - &ways_of_associativity.attr, - &number_of_sets.attr, - &size.attr, - &shared_cpu_map.attr, - &shared_cpu_list.attr, + DEFAULT_SYSFS_CACHE_ATTRS, + NULL +}; + +static struct attribute *default_l3_attrs[] = { + DEFAULT_SYSFS_CACHE_ATTRS, &cache_disable_0.attr, &cache_disable_1.attr, NULL @@ -908,6 +953,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) unsigned int cpu = sys_dev->id; unsigned long i, j; struct _index_kobject *this_object; + struct _cpuid4_info *this_leaf; int retval; retval = cpuid4_cache_sysfs_init(cpu); @@ -926,6 +972,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) this_object = INDEX_KOBJECT_PTR(cpu, i); this_object->cpu = cpu; this_object->index = i; + + this_leaf = CPUID4_INFO_IDX(cpu, i); + + if (this_leaf->can_disable) + ktype_cache.default_attrs = default_l3_attrs; + else + ktype_cache.default_attrs = default_attrs; + retval = kobject_init_and_add(&(this_object->kobj), &ktype_cache, per_cpu(ici_cache_kobject, cpu), diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index 33af14110dfd..92ba9cd31c9a 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c @@ -108,7 +108,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) return 0; } -static struct mtrr_ops amd_mtrr_ops = { +static const struct mtrr_ops amd_mtrr_ops = { .vendor = X86_VENDOR_AMD, .set = amd_set_mtrr, .get = amd_get_mtrr, diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index de89f14eff3a..316fe3e60a97 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -110,7 +110,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t return 0; } -static struct mtrr_ops centaur_mtrr_ops = { +static const struct mtrr_ops centaur_mtrr_ops = { .vendor = X86_VENDOR_CENTAUR, .set = centaur_set_mcr, .get = centaur_get_mcr, diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 228d982ce09c..68a3343e5798 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -265,7 +265,7 @@ static void cyrix_set_all(void) post_set(); } -static struct mtrr_ops cyrix_mtrr_ops = { +static const struct mtrr_ops cyrix_mtrr_ops = { .vendor = X86_VENDOR_CYRIX, .set_all = cyrix_set_all, .set = cyrix_set_arr, diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 55da0c5f68dd..4d755846fee6 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -752,7 +752,7 @@ int positive_have_wrcomb(void) /* * Generic structure... */ -struct mtrr_ops generic_mtrr_ops = { +const struct mtrr_ops generic_mtrr_ops = { .use_intel_if = 1, .set_all = generic_set_all, .get = generic_get_mtrr, diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 84e83de54575..fe4622e8c837 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -60,14 +60,14 @@ static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; static bool mtrr_aps_delayed_init; -static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; +static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; -struct mtrr_ops *mtrr_if; +const struct mtrr_ops *mtrr_if; static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); -void set_mtrr_ops(struct mtrr_ops *ops) +void set_mtrr_ops(const struct mtrr_ops *ops) { if (ops->vendor && ops->vendor < X86_VENDOR_NUM) mtrr_ops[ops->vendor] = ops; diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index a501dee9a87a..df5e41f31a27 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -32,7 +32,7 @@ extern int generic_get_free_region(unsigned long base, unsigned long size, extern int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type); -extern struct mtrr_ops generic_mtrr_ops; +extern const struct mtrr_ops generic_mtrr_ops; extern int positive_have_wrcomb(void); @@ -53,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); void get_mtrr_state(void); -extern void set_mtrr_ops(struct mtrr_ops *ops); +extern void set_mtrr_ops(const struct mtrr_ops *ops); extern u64 size_or_mask, size_and_mask; -extern struct mtrr_ops *mtrr_if; +extern const struct mtrr_ops *mtrr_if; #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8c1c07073ccc..1846ead0576b 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -7,6 +7,7 @@ * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> + * Copyright (C) 2009 Google, Inc., Stephane Eranian * * For licencing details see kernel-base/COPYING */ @@ -22,6 +23,7 @@ #include <linux/uaccess.h> #include <linux/highmem.h> #include <linux/cpu.h> +#include <linux/bitops.h> #include <asm/apic.h> #include <asm/stacktrace.h> @@ -68,26 +70,47 @@ struct debug_store { u64 pebs_event_reset[MAX_PEBS_EVENTS]; }; +struct event_constraint { + union { + unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + u64 idxmsk64[1]; + }; + int code; + int cmask; + int weight; +}; + struct cpu_hw_events { - struct perf_event *events[X86_PMC_IDX_MAX]; - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; int enabled; struct debug_store *ds; -}; -struct event_constraint { - unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - int code; + int n_events; + int n_added; + int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ + struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; -#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } -#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } +#define EVENT_CONSTRAINT(c, n, m) { \ + { .idxmsk64[0] = (n) }, \ + .code = (c), \ + .cmask = (m), \ + .weight = HWEIGHT64((u64)(n)), \ +} + +#define INTEL_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) + +#define FIXED_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) -#define for_each_event_constraint(e, c) \ - for ((e) = (c); (e)->idxmsk[0]; (e)++) +#define EVENT_CONSTRAINT_END \ + EVENT_CONSTRAINT(0, 0, 0) +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->cmask; (e)++) /* * struct x86_pmu - generic x86 pmu @@ -114,8 +137,14 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); - int (*get_event_idx)(struct cpu_hw_events *cpuc, - struct hw_perf_event *hwc); + + struct event_constraint * + (*get_event_constraints)(struct cpu_hw_events *cpuc, + struct perf_event *event); + + void (*put_event_constraints)(struct cpu_hw_events *cpuc, + struct perf_event *event); + struct event_constraint *event_constraints; }; static struct x86_pmu x86_pmu __read_mostly; @@ -124,7 +153,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; -static const struct event_constraint *event_constraints; +static int x86_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx); /* * Not sure about some of these @@ -171,14 +201,14 @@ static u64 p6_pmu_raw_event(u64 hw_event) return hw_event & P6_EVNTSEL_MASK; } -static const struct event_constraint intel_p6_event_constraints[] = +static struct event_constraint intel_p6_event_constraints[] = { - EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ - EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ - EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ + INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ EVENT_CONSTRAINT_END }; @@ -196,32 +226,51 @@ static const u64 intel_perfmon_event_map[] = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; -static const struct event_constraint intel_core_event_constraints[] = -{ - EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ - EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ - EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ - EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ - EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ - EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ +static struct event_constraint intel_core_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ + INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_nehalem_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ + INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ + INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ + INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ + INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ + INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ EVENT_CONSTRAINT_END }; -static const struct event_constraint intel_nehalem_event_constraints[] = -{ - EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ - EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ - EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ - EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ - EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ - EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ - EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ - EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ - EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ - EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ +static struct event_constraint intel_westmere_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ + INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_gen_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ EVENT_CONSTRAINT_END }; @@ -245,6 +294,97 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; +static __initconst u64 westmere_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ + [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ + [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ + [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ + [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ + [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + static __initconst u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -527,11 +667,11 @@ static u64 intel_pmu_raw_event(u64 hw_event) #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL #define CORE_EVNTSEL_MASK \ - (CORE_EVNTSEL_EVENT_MASK | \ - CORE_EVNTSEL_UNIT_MASK | \ - CORE_EVNTSEL_EDGE_MASK | \ - CORE_EVNTSEL_INV_MASK | \ - CORE_EVNTSEL_REG_MASK) + (INTEL_ARCH_EVTSEL_MASK | \ + INTEL_ARCH_UNIT_MASK | \ + INTEL_ARCH_EDGE_MASK | \ + INTEL_ARCH_INV_MASK | \ + INTEL_ARCH_CNT_MASK) return hw_event & CORE_EVNTSEL_MASK; } @@ -1058,15 +1198,8 @@ static int __hw_perf_event_init(struct perf_event *event) static void p6_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -1077,12 +1210,6 @@ static void intel_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) @@ -1094,17 +1221,6 @@ static void amd_pmu_disable_all(void) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - /* - * ensure we write the disable before we start disabling the - * events proper, so that amd_pmu_enable_event() does the - * right thing. - */ - barrier(); - for (idx = 0; idx < x86_pmu.num_events; idx++) { u64 val; @@ -1120,22 +1236,25 @@ static void amd_pmu_disable_all(void) void hw_perf_disable(void) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + if (!x86_pmu_initialized()) return; - return x86_pmu.disable_all(); + + if (!cpuc->enabled) + return; + + cpuc->n_added = 0; + cpuc->enabled = 0; + barrier(); + + x86_pmu.disable_all(); } static void p6_pmu_enable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); unsigned long val; - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); val |= ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -1146,12 +1265,6 @@ static void intel_pmu_enable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { @@ -1170,12 +1283,6 @@ static void amd_pmu_enable_all(void) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - for (idx = 0; idx < x86_pmu.num_events; idx++) { struct perf_event *event = cpuc->events[idx]; u64 val; @@ -1189,10 +1296,239 @@ static void amd_pmu_enable_all(void) } } +static const struct pmu pmu; + +static inline int is_x86_event(struct perf_event *event) +{ + return event->pmu == &pmu; +} + +static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) +{ + struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; + unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + int i, j, w, wmax, num = 0; + struct hw_perf_event *hwc; + + bitmap_zero(used_mask, X86_PMC_IDX_MAX); + + for (i = 0; i < n; i++) { + constraints[i] = + x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); + } + + /* + * fastpath, try to reuse previous register + */ + for (i = 0; i < n; i++) { + hwc = &cpuc->event_list[i]->hw; + c = constraints[i]; + + /* never assigned */ + if (hwc->idx == -1) + break; + + /* constraint still honored */ + if (!test_bit(hwc->idx, c->idxmsk)) + break; + + /* not already used */ + if (test_bit(hwc->idx, used_mask)) + break; + + set_bit(hwc->idx, used_mask); + if (assign) + assign[i] = hwc->idx; + } + if (i == n) + goto done; + + /* + * begin slow path + */ + + bitmap_zero(used_mask, X86_PMC_IDX_MAX); + + /* + * weight = number of possible counters + * + * 1 = most constrained, only works on one counter + * wmax = least constrained, works on any counter + * + * assign events to counters starting with most + * constrained events. + */ + wmax = x86_pmu.num_events; + + /* + * when fixed event counters are present, + * wmax is incremented by 1 to account + * for one more choice + */ + if (x86_pmu.num_events_fixed) + wmax++; + + for (w = 1, num = n; num && w <= wmax; w++) { + /* for each event */ + for (i = 0; num && i < n; i++) { + c = constraints[i]; + hwc = &cpuc->event_list[i]->hw; + + if (c->weight != w) + continue; + + for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { + if (!test_bit(j, used_mask)) + break; + } + + if (j == X86_PMC_IDX_MAX) + break; + + set_bit(j, used_mask); + + if (assign) + assign[i] = j; + num--; + } + } +done: + /* + * scheduling failed or is just a simulation, + * free resources if necessary + */ + if (!assign || num) { + for (i = 0; i < n; i++) { + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); + } + } + return num ? -ENOSPC : 0; +} + +/* + * dogrp: true if must collect siblings events (group) + * returns total number of events and error code + */ +static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) +{ + struct perf_event *event; + int n, max_count; + + max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; + + /* current number of events already accepted */ + n = cpuc->n_events; + + if (is_x86_event(leader)) { + if (n >= max_count) + return -ENOSPC; + cpuc->event_list[n] = leader; + n++; + } + if (!dogrp) + return n; + + list_for_each_entry(event, &leader->sibling_list, group_entry) { + if (!is_x86_event(event) || + event->state <= PERF_EVENT_STATE_OFF) + continue; + + if (n >= max_count) + return -ENOSPC; + + cpuc->event_list[n] = event; + n++; + } + return n; +} + + +static inline void x86_assign_hw_event(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + hwc->idx = idx; + + if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { + hwc->config_base = 0; + hwc->event_base = 0; + } else if (hwc->idx >= X86_PMC_IDX_FIXED) { + hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; + /* + * We set it so that event_base + idx in wrmsr/rdmsr maps to + * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: + */ + hwc->event_base = + MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; + } else { + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; + } +} + +static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc); + void hw_perf_enable(void) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + struct hw_perf_event *hwc; + int i; + if (!x86_pmu_initialized()) return; + + if (cpuc->enabled) + return; + + if (cpuc->n_added) { + /* + * apply assignment obtained either from + * hw_perf_group_sched_in() or x86_pmu_enable() + * + * step1: save events moving to new counters + * step2: reprogram moved events into new counters + */ + for (i = 0; i < cpuc->n_events; i++) { + + event = cpuc->event_list[i]; + hwc = &event->hw; + + if (hwc->idx == -1 || hwc->idx == cpuc->assign[i]) + continue; + + __x86_pmu_disable(event, cpuc); + + hwc->idx = -1; + } + + for (i = 0; i < cpuc->n_events; i++) { + + event = cpuc->event_list[i]; + hwc = &event->hw; + + if (hwc->idx == -1) { + x86_assign_hw_event(event, hwc, cpuc->assign[i]); + x86_perf_event_set_period(event, hwc, hwc->idx); + } + /* + * need to mark as active because x86_pmu_disable() + * clear active_mask and eventsp[] yet it preserves + * idx + */ + set_bit(hwc->idx, cpuc->active_mask); + cpuc->events[hwc->idx] = event; + + x86_pmu.enable(hwc, hwc->idx); + perf_event_update_userpage(event); + } + cpuc->n_added = 0; + perf_events_lapic_init(); + } + + cpuc->enabled = 1; + barrier(); + x86_pmu.enable_all(); } @@ -1398,148 +1734,40 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) x86_pmu_enable_event(hwc, idx); } -static int fixed_mode_idx(struct hw_perf_event *hwc) -{ - unsigned int hw_event; - - hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; - - if (unlikely((hw_event == - x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && - (hwc->sample_period == 1))) - return X86_PMC_IDX_FIXED_BTS; - - if (!x86_pmu.num_events_fixed) - return -1; - - /* - * fixed counters do not take all possible filters - */ - if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) - return -1; - - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) - return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) - return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) - return X86_PMC_IDX_FIXED_BUS_CYCLES; - - return -1; -} - -/* - * generic counter allocator: get next free counter - */ -static int -gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) -{ - int idx; - - idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); - return idx == x86_pmu.num_events ? -1 : idx; -} - /* - * intel-specific counter allocator: check event constraints - */ -static int -intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) -{ - const struct event_constraint *event_constraint; - int i, code; - - if (!event_constraints) - goto skip; - - code = hwc->config & CORE_EVNTSEL_EVENT_MASK; - - for_each_event_constraint(event_constraint, event_constraints) { - if (code == event_constraint->code) { - for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { - if (!test_and_set_bit(i, cpuc->used_mask)) - return i; - } - return -1; - } - } -skip: - return gen_get_event_idx(cpuc, hwc); -} - -static int -x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) -{ - int idx; - - idx = fixed_mode_idx(hwc); - if (idx == X86_PMC_IDX_FIXED_BTS) { - /* BTS is already occupied. */ - if (test_and_set_bit(idx, cpuc->used_mask)) - return -EAGAIN; - - hwc->config_base = 0; - hwc->event_base = 0; - hwc->idx = idx; - } else if (idx >= 0) { - /* - * Try to get the fixed event, if that is already taken - * then try to get a generic event: - */ - if (test_and_set_bit(idx, cpuc->used_mask)) - goto try_generic; - - hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - /* - * We set it so that event_base + idx in wrmsr/rdmsr maps to - * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: - */ - hwc->event_base = - MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; - hwc->idx = idx; - } else { - idx = hwc->idx; - /* Try to get the previous generic event again */ - if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { -try_generic: - idx = x86_pmu.get_event_idx(cpuc, hwc); - if (idx == -1) - return -EAGAIN; - - set_bit(idx, cpuc->used_mask); - hwc->idx = idx; - } - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; - } - - return idx; -} - -/* - * Find a PMC slot for the freshly enabled / scheduled in event: + * activate a single event + * + * The event is added to the group of enabled events + * but only if it can be scehduled with existing events. + * + * Called with PMU disabled. If successful and return value 1, + * then guaranteed to call perf_enable() and hw_perf_enable() */ static int x86_pmu_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - int idx; - - idx = x86_schedule_event(cpuc, hwc); - if (idx < 0) - return idx; - - perf_events_lapic_init(); + struct hw_perf_event *hwc; + int assign[X86_PMC_IDX_MAX]; + int n, n0, ret; - x86_pmu.disable(hwc, idx); + hwc = &event->hw; - cpuc->events[idx] = event; - set_bit(idx, cpuc->active_mask); + n0 = cpuc->n_events; + n = collect_events(cpuc, event, false); + if (n < 0) + return n; - x86_perf_event_set_period(event, hwc, idx); - x86_pmu.enable(hwc, idx); + ret = x86_schedule_events(cpuc, n, assign); + if (ret) + return ret; + /* + * copy new assignment, now we know it is possible + * will be used by hw_perf_enable() + */ + memcpy(cpuc->assign, assign, n*sizeof(int)); - perf_event_update_userpage(event); + cpuc->n_events = n; + cpuc->n_added = n - n0; return 0; } @@ -1583,7 +1811,7 @@ void perf_event_print_debug(void) pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); } - pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); + pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); for (idx = 0; idx < x86_pmu.num_events; idx++) { rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); @@ -1667,9 +1895,8 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) event->pending_kill = POLL_IN; } -static void x86_pmu_disable(struct perf_event *event) +static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -1681,12 +1908,6 @@ static void x86_pmu_disable(struct perf_event *event) x86_pmu.disable(hwc, idx); /* - * Make sure the cleared pointer becomes visible before we - * (potentially) free the event: - */ - barrier(); - - /* * Drain the remaining delta count out of a event * that we are disabling: */ @@ -1697,8 +1918,28 @@ static void x86_pmu_disable(struct perf_event *event) intel_pmu_drain_bts_buffer(cpuc); cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); +} + +static void x86_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int i; + + __x86_pmu_disable(event, cpuc); + + for (i = 0; i < cpuc->n_events; i++) { + if (event == cpuc->event_list[i]) { + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(cpuc, event); + + while (++i < cpuc->n_events) + cpuc->event_list[i-1] = cpuc->event_list[i]; + + --cpuc->n_events; + break; + } + } perf_event_update_userpage(event); } @@ -1969,6 +2210,162 @@ perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_STOP; } +static struct event_constraint unconstrained; + +static struct event_constraint bts_constraint = + EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); + +static struct event_constraint * +intel_special_constraints(struct perf_event *event) +{ + unsigned int hw_event; + + hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; + + if (unlikely((hw_event == + x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && + (event->hw.sample_period == 1))) { + + return &bts_constraint; + } + return NULL; +} + +static struct event_constraint * +intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct event_constraint *c; + + c = intel_special_constraints(event); + if (c) + return c; + + if (x86_pmu.event_constraints) { + for_each_event_constraint(c, x86_pmu.event_constraints) { + if ((event->hw.config & c->cmask) == c->code) + return c; + } + } + + return &unconstrained; +} + +static struct event_constraint * +amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + return &unconstrained; +} + +static int x86_event_sched_in(struct perf_event *event, + struct perf_cpu_context *cpuctx, int cpu) +{ + int ret = 0; + + event->state = PERF_EVENT_STATE_ACTIVE; + event->oncpu = cpu; + event->tstamp_running += event->ctx->time - event->tstamp_stopped; + + if (!is_x86_event(event)) + ret = event->pmu->enable(event); + + if (!ret && !is_software_event(event)) + cpuctx->active_oncpu++; + + if (!ret && event->attr.exclusive) + cpuctx->exclusive = 1; + + return ret; +} + +static void x86_event_sched_out(struct perf_event *event, + struct perf_cpu_context *cpuctx, int cpu) +{ + event->state = PERF_EVENT_STATE_INACTIVE; + event->oncpu = -1; + + if (!is_x86_event(event)) + event->pmu->disable(event); + + event->tstamp_running -= event->ctx->time - event->tstamp_stopped; + + if (!is_software_event(event)) + cpuctx->active_oncpu--; + + if (event->attr.exclusive || !cpuctx->active_oncpu) + cpuctx->exclusive = 0; +} + +/* + * Called to enable a whole group of events. + * Returns 1 if the group was enabled, or -EAGAIN if it could not be. + * Assumes the caller has disabled interrupts and has + * frozen the PMU with hw_perf_save_disable. + * + * called with PMU disabled. If successful and return value 1, + * then guaranteed to call perf_enable() and hw_perf_enable() + */ +int hw_perf_group_sched_in(struct perf_event *leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct perf_event *sub; + int assign[X86_PMC_IDX_MAX]; + int n0, n1, ret; + + /* n0 = total number of events */ + n0 = collect_events(cpuc, leader, true); + if (n0 < 0) + return n0; + + ret = x86_schedule_events(cpuc, n0, assign); + if (ret) + return ret; + + ret = x86_event_sched_in(leader, cpuctx, cpu); + if (ret) + return ret; + + n1 = 1; + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + if (sub->state > PERF_EVENT_STATE_OFF) { + ret = x86_event_sched_in(sub, cpuctx, cpu); + if (ret) + goto undo; + ++n1; + } + } + /* + * copy new assignment, now we know it is possible + * will be used by hw_perf_enable() + */ + memcpy(cpuc->assign, assign, n0*sizeof(int)); + + cpuc->n_events = n0; + cpuc->n_added = n1; + ctx->nr_active += n1; + + /* + * 1 means successful and events are active + * This is not quite true because we defer + * actual activation until hw_perf_enable() but + * this way we* ensure caller won't try to enable + * individual events + */ + return 1; +undo: + x86_event_sched_out(leader, cpuctx, cpu); + n0 = 1; + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + if (sub->state == PERF_EVENT_STATE_ACTIVE) { + x86_event_sched_out(sub, cpuctx, cpu); + if (++n0 == n1) + break; + } + } + return ret; +} + static __read_mostly struct notifier_block perf_event_nmi_notifier = { .notifier_call = perf_event_nmi_handler, .next = NULL, @@ -2000,7 +2397,8 @@ static __initconst struct x86_pmu p6_pmu = { */ .event_bits = 32, .event_mask = (1ULL << 32) - 1, - .get_event_idx = intel_get_event_idx, + .get_event_constraints = intel_get_event_constraints, + .event_constraints = intel_p6_event_constraints }; static __initconst struct x86_pmu intel_pmu = { @@ -2024,7 +2422,7 @@ static __initconst struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .enable_bts = intel_pmu_enable_bts, .disable_bts = intel_pmu_disable_bts, - .get_event_idx = intel_get_event_idx, + .get_event_constraints = intel_get_event_constraints }; static __initconst struct x86_pmu amd_pmu = { @@ -2045,7 +2443,7 @@ static __initconst struct x86_pmu amd_pmu = { .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, - .get_event_idx = gen_get_event_idx, + .get_event_constraints = amd_get_event_constraints }; static __init int p6_pmu_init(void) @@ -2058,12 +2456,9 @@ static __init int p6_pmu_init(void) case 7: case 8: case 11: /* Pentium III */ - event_constraints = intel_p6_event_constraints; - break; case 9: case 13: /* Pentium M */ - event_constraints = intel_p6_event_constraints; break; default: pr_cont("unsupported p6 CPU model %d ", @@ -2128,23 +2523,40 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + x86_pmu.event_constraints = intel_core_event_constraints; pr_cont("Core2 events, "); - event_constraints = intel_core_event_constraints; break; - default: - case 26: + + case 26: /* 45 nm nehalem, "Bloomfield" */ + case 30: /* 45 nm nehalem, "Lynnfield" */ memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - event_constraints = intel_nehalem_event_constraints; + x86_pmu.event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; case 28: memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + x86_pmu.event_constraints = intel_gen_event_constraints; pr_cont("Atom events, "); break; + + case 37: /* 32 nm nehalem, "Clarkdale" */ + case 44: /* 32 nm nehalem, "Gulftown" */ + memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + x86_pmu.event_constraints = intel_westmere_event_constraints; + pr_cont("Westmere events, "); + break; + default: + /* + * default constraints for v2 and up + */ + x86_pmu.event_constraints = intel_gen_event_constraints; + pr_cont("generic architected perfmon, "); } return 0; } @@ -2220,6 +2632,9 @@ void __init init_hw_perf_events(void) perf_events_lapic_init(); register_die_notifier(&perf_event_nmi_notifier); + unconstrained = (struct event_constraint) + EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0); + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.event_bits); pr_info("... generic registers: %d\n", x86_pmu.num_events); @@ -2241,46 +2656,73 @@ static const struct pmu pmu = { .unthrottle = x86_pmu_unthrottle, }; -static int -validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) -{ - struct hw_perf_event fake_event = event->hw; - - if (event->pmu && event->pmu != &pmu) - return 0; - - return x86_schedule_event(cpuc, &fake_event) >= 0; -} - +/* + * validate a single event group + * + * validation include: + * - check events are compatible which each other + * - events do not compete for the same counter + * - number of events <= number of counters + * + * validation ensures the group can be loaded onto the + * PMU if it was the only group available. + */ static int validate_group(struct perf_event *event) { - struct perf_event *sibling, *leader = event->group_leader; - struct cpu_hw_events fake_pmu; + struct perf_event *leader = event->group_leader; + struct cpu_hw_events *fake_cpuc; + int ret, n; - memset(&fake_pmu, 0, sizeof(fake_pmu)); + ret = -ENOMEM; + fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); + if (!fake_cpuc) + goto out; - if (!validate_event(&fake_pmu, leader)) - return -ENOSPC; + /* + * the event is not yet connected with its + * siblings therefore we must first collect + * existing siblings, then add the new event + * before we can simulate the scheduling + */ + ret = -ENOSPC; + n = collect_events(fake_cpuc, leader, true); + if (n < 0) + goto out_free; - list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(&fake_pmu, sibling)) - return -ENOSPC; - } + fake_cpuc->n_events = n; + n = collect_events(fake_cpuc, event, false); + if (n < 0) + goto out_free; - if (!validate_event(&fake_pmu, event)) - return -ENOSPC; + fake_cpuc->n_events = n; - return 0; + ret = x86_schedule_events(fake_cpuc, n, NULL); + +out_free: + kfree(fake_cpuc); +out: + return ret; } const struct pmu *hw_perf_event_init(struct perf_event *event) { + const struct pmu *tmp; int err; err = __hw_perf_event_init(event); if (!err) { + /* + * we temporarily connect event to its pmu + * such that validate_group() can classify + * it as an x86 event using is_x86_event() + */ + tmp = event->pmu; + event->pmu = &pmu; + if (event->group_leader != event) err = validate_group(event); + + event->pmu = tmp; } if (err) { if (event->destroy) @@ -2304,7 +2746,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); -static DEFINE_PER_CPU(int, in_ignored_frame); static void @@ -2320,10 +2761,6 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - per_cpu(in_ignored_frame, smp_processor_id()) = - x86_is_stack_id(NMI_STACK, name) || - x86_is_stack_id(DEBUG_STACK, name); - return 0; } @@ -2331,9 +2768,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - if (per_cpu(in_ignored_frame, smp_processor_id())) - return; - if (reliable) callchain_store(entry, addr); } @@ -2440,9 +2874,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) is_user = user_mode(regs); - if (!current || current->pid == 0) - return; - if (is_user && current->state != TASK_RUNNING) return; diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 898df9719afb..74f4e85a5727 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) return !test_bit(counter, perfctr_nmi_owner); } - -/* checks the an msr for availability */ -int avail_to_resrv_perfctr_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - return !test_bit(counter, perfctr_nmi_owner); -} EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); int reserve_perfctr_nmi(unsigned int msr) diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index ae775ca47b25..11540a189d93 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -18,11 +18,6 @@ #include "dumpstack.h" -/* Just a stub for now */ -int x86_is_stack_id(int id, char *name) -{ - return 0; -} void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 0ad9597073f5..676bc051252e 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -33,11 +33,6 @@ static char x86_stack_ids[][8] = { #endif }; -int x86_is_stack_id(int id, char *name) -{ - return x86_stack_ids[id - 1] == name; -} - static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, unsigned *usedp, char **idp) { diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a1a7876cadcb..a966b753e496 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -517,11 +517,19 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, int checktype) { int i; + u64 end; u64 real_removed_size = 0; if (size > (ULLONG_MAX - start)) size = ULLONG_MAX - start; + end = start + size; + printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", + (unsigned long long) start, + (unsigned long long) end); + e820_print_type(old_type); + printk(KERN_CONT "\n"); + for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; u64 final_start, final_end; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 05d5fec64a94..ae90b4739435 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -502,8 +502,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) rcu_read_lock(); bp = per_cpu(bp_per_reg[i], cpu); - if (bp) - rc = NOTIFY_DONE; /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling @@ -522,7 +520,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) rcu_read_unlock(); } - if (dr6 & (~DR_TRAP_BITS)) + /* + * Further processing in do_debug() is needed for a) user-space + * breakpoints (to generate signals) and b) when the system has + * taken exception due to multiple causes + */ + if ((current->thread.debugreg6 & DR_TRAP_BITS) || + (dr6 & (~DR_TRAP_BITS))) rc = NOTIFY_DONE; set_debugreg(dr7, 7); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f2f8540a7f3d..dffdf91c8370 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -224,6 +224,85 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, return ret; } +int xstateregs_active(struct task_struct *target, + const struct user_regset *regset) +{ + return (cpu_has_xsave && tsk_used_math(target)) ? xstate_size : 0; +} + +int xstateregs_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_xsave) + return -ENODEV; + + ret = init_fpu(target); + if (ret) + return ret; + + /* + * First copy the fxsave bytes 0..463 + */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.xstate->xsave, 0, + (sizeof(struct i387_fxsave_struct) - + sizeof(xstate_fx_sw_bytes))); + /* + * Copy the 48bytes defined by software + */ + ret |= user_regset_copyout(&pos, &count, &kbuf, &ubuf, + xstate_fx_sw_bytes, + (sizeof(struct i387_fxsave_struct) - + sizeof(xstate_fx_sw_bytes)), + sizeof(struct i387_fxsave_struct)); + /* + * Copy the rest of xstate memory layout + */ + ret |= user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.xstate->xsave.xsave_hdr, + sizeof(struct i387_fxsave_struct), -1); + return ret; +} + +int xstateregs_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct xsave_hdr_struct *xsave_hdr = + ¤t->thread.xstate->xsave.xsave_hdr; + + + if (!cpu_has_xsave) + return -ENODEV; + + ret = init_fpu(target); + if (ret) + return ret; + + set_stopped_child_used_math(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.xstate->xsave, 0, -1); + + /* + * mxcsr reserved bits must be masked to zero for security reasons. + */ + target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; + + xsave_hdr->xstate_bv &= pcntxt_mask; + /* + * These bits must be zero. + */ + xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + + + return ret; +} + #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION /* diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index d5932226614f..fce55d532631 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -84,24 +84,7 @@ static struct irqaction irq2 = { }; DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... IRQ0_VECTOR - 1] = -1, - [IRQ0_VECTOR] = 0, - [IRQ1_VECTOR] = 1, - [IRQ2_VECTOR] = 2, - [IRQ3_VECTOR] = 3, - [IRQ4_VECTOR] = 4, - [IRQ5_VECTOR] = 5, - [IRQ6_VECTOR] = 6, - [IRQ7_VECTOR] = 7, - [IRQ8_VECTOR] = 8, - [IRQ9_VECTOR] = 9, - [IRQ10_VECTOR] = 10, - [IRQ11_VECTOR] = 11, - [IRQ12_VECTOR] = 12, - [IRQ13_VECTOR] = 13, - [IRQ14_VECTOR] = 14, - [IRQ15_VECTOR] = 15, - [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 + [0 ... NR_VECTORS - 1] = -1, }; int vector_used_by_percpu_irq(unsigned int vector) @@ -116,6 +99,9 @@ int vector_used_by_percpu_irq(unsigned int vector) return 0; } +/* Number of legacy interrupts */ +int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; + void __init init_ISA_irqs(void) { int i; @@ -142,6 +128,19 @@ void __init init_ISA_irqs(void) void __init init_IRQ(void) { + int i; + + /* + * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. + * If these IRQ's are handled by legacy interrupt-controllers like PIC, + * then this configuration will likely be static after the boot. If + * these IRQ's are handled by more mordern controllers like IO-APIC, + * then this vector space can be freed and re-used dynamically as the + * irq's migrate etc. + */ + for (i = 0; i < nr_legacy_irqs; i++) + per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; + x86_init.irqs.intr_init(); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c9b3522b6b46..02d678065d7d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -92,6 +92,13 @@ void exit_thread(void) } } +void show_regs(struct pt_regs *regs) +{ + show_registers(regs); + show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), + regs->bp); +} + void show_regs_common(void) { const char *board, *product; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 37ad1e046aae..f6c62667e30c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -174,12 +174,6 @@ void __show_regs(struct pt_regs *regs, int all) d6, d7); } -void show_regs(struct pt_regs *regs) -{ - show_registers(regs); - show_trace(NULL, regs, ®s->sp, regs->bp); -} - void release_thread(struct task_struct *dead_task) { BUG_ON(dead_task->mm); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 41a26a82470a..fc6ad4b95c6b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -211,12 +211,6 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); } -void show_regs(struct pt_regs *regs) -{ - show_registers(regs); - show_trace(NULL, regs, (void *)(regs + 1), regs->bp); -} - void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 118428085ea2..aeac66324895 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -48,6 +48,7 @@ enum x86_regset { REGSET_FP, REGSET_XFP, REGSET_IOPERM64 = REGSET_XFP, + REGSET_XSTATE, REGSET_TLS, REGSET_IOPERM32, }; @@ -140,30 +141,6 @@ static const int arg_offs_table[] = { #endif }; -/** - * regs_get_argument_nth() - get Nth argument at function call - * @regs: pt_regs which contains registers at function entry. - * @n: argument number. - * - * regs_get_argument_nth() returns @n th argument of a function call. - * Since usually the kernel stack will be changed right after function entry, - * you must use this at function entry. If the @n th entry is NOT in the - * kernel stack or pt_regs, this returns 0. - */ -unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) -{ - if (n < ARRAY_SIZE(arg_offs_table)) - return *(unsigned long *)((char *)regs + arg_offs_table[n]); - else { - /* - * The typical case: arg n is on the stack. - * (Note: stack[0] = return address, so skip it) - */ - n -= ARRAY_SIZE(arg_offs_table); - return regs_get_kernel_stack_nth(regs, 1 + n); - } -} - /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -1232,6 +1209,20 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) 0, sizeof(struct user_i387_struct), datap); + case PTRACE_GETXSTATEREGS: /* Get the child extended state. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_XSTATE, + 0, xstate_size, + datap); + + case PTRACE_SETXSTATEREGS: /* Set the child extended state. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_XSTATE, + 0, xstate_size, + datap); + #ifdef CONFIG_X86_32 case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ return copy_regset_to_user(child, &user_x86_32_view, @@ -1561,6 +1552,16 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, sizeof(struct user32_fxsr_struct), datap); + case PTRACE_GETXSTATEREGS: /* Get the child extended state. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_XSTATE, 0, xstate_size, + datap); + + case PTRACE_SETXSTATEREGS: /* Set the child extended state. */ + return copy_regset_from_user(child, &user_x86_32_view, + REGSET_XSTATE, 0, xstate_size, + datap); + case PTRACE_GET_THREAD_AREA: case PTRACE_SET_THREAD_AREA: #ifdef CONFIG_X86_PTRACE_BTS @@ -1584,7 +1585,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, #ifdef CONFIG_X86_64 -static const struct user_regset x86_64_regsets[] = { +static struct user_regset x86_64_regsets[] __read_mostly = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct) / sizeof(long), @@ -1597,6 +1598,12 @@ static const struct user_regset x86_64_regsets[] = { .size = sizeof(long), .align = sizeof(long), .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set }, + [REGSET_XSTATE] = { + .core_note_type = NT_X86_XSTATE, + .size = sizeof(long), .align = sizeof(long), + .active = xstateregs_active, .get = xstateregs_get, + .set = xstateregs_set + }, [REGSET_IOPERM64] = { .core_note_type = NT_386_IOPERM, .n = IO_BITMAP_LONGS, @@ -1622,7 +1629,7 @@ static const struct user_regset_view user_x86_64_view = { #endif /* CONFIG_X86_64 */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION -static const struct user_regset x86_32_regsets[] = { +static struct user_regset x86_32_regsets[] __read_mostly = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct32) / sizeof(u32), @@ -1641,6 +1648,12 @@ static const struct user_regset x86_32_regsets[] = { .size = sizeof(u32), .align = sizeof(u32), .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set }, + [REGSET_XSTATE] = { + .core_note_type = NT_X86_XSTATE, + .size = sizeof(u32), .align = sizeof(u32), + .active = xstateregs_active, .get = xstateregs_get, + .set = xstateregs_set + }, [REGSET_TLS] = { .core_note_type = NT_386_TLS, .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, @@ -1663,6 +1676,18 @@ static const struct user_regset_view user_x86_32_view = { }; #endif +u64 xstate_fx_sw_bytes[6]; +void update_regset_xstate_info(unsigned int size, u64 xstate_mask) +{ +#ifdef CONFIG_X86_64 + x86_64_regsets[REGSET_XSTATE].n = size / sizeof(long); +#endif +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u32); +#endif + xstate_fx_sw_bytes[0] = xstate_mask; +} + const struct user_regset_view *task_user_regset_view(struct task_struct *task) { #ifdef CONFIG_IA32_EMULATION diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5d9e40c58628..3499b4fabc94 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -667,6 +667,23 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { {} }; +static void __init trim_bios_range(void) +{ + /* + * A special case is the first 4Kb of memory; + * This is a BIOS owned area, not kernel ram, but generally + * not listed as such in the E820 table. + */ + e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); + /* + * special case: Some BIOSen report the PC BIOS + * area (640->1Mb) as ram even though it is not. + * take them out. + */ + e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); +} + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -830,7 +847,7 @@ void __init setup_arch(char **cmdline_p) insert_resource(&iomem_resource, &data_resource); insert_resource(&iomem_resource, &bss_resource); - + trim_bios_range(); #ifdef CONFIG_X86_32 if (ppro_with_ram_bug()) { e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 678d0b8c26f3..b2ebcba729d9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -241,6 +241,11 @@ static void __cpuinit smp_callin(void) map_cpu_to_logical_apicid(); notify_cpu_starting(cpuid); + + /* + * Need to setup vector mappings before we enable interrupts. + */ + __setup_vector_irq(smp_processor_id()); /* * Get our bogomips. * @@ -315,7 +320,6 @@ notrace static void __cpuinit start_secondary(void *unused) */ ipi_call_lock(); lock_vector_lock(); - __setup_vector_irq(smp_processor_id()); set_cpu_online(smp_processor_id(), true); unlock_vector_lock(); ipi_call_unlock(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6ae2122467dc..7296bd231490 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -540,6 +540,9 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) get_debugreg(dr6, 6); + /* Filter out all the reserved bits which are preset to 1 */ + dr6 &= ~DR6_RESERVED; + /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 597683aa5ba0..23066ecf12fa 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -806,7 +806,7 @@ static void __init check_system_tsc_reliable(void) unsigned long res_low, res_high; rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ + /* Geode_LX - the OLPC CPU has a very reliable TSC */ if (res_low & RTSC_SUSP) tsc_clocksource_reliable = 1; #endif diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 25bbb9bfc312..5e1ff66ecd73 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -79,11 +79,7 @@ unsigned long vmi_tsc_khz(void) static inline unsigned int vmi_get_timer_vector(void) { -#ifdef CONFIG_X86_IO_APIC - return FIRST_DEVICE_VECTOR; -#else - return FIRST_EXTERNAL_VECTOR; -#endif + return IRQ0_VECTOR; } /** vmi clockchip */ diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 619f7f88b8cc..693920b22496 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -26,7 +26,8 @@ EXPORT_SYMBOL(__put_user_2); EXPORT_SYMBOL(__put_user_4); EXPORT_SYMBOL(__put_user_8); -EXPORT_SYMBOL(copy_user_generic); +EXPORT_SYMBOL(copy_user_generic_string); +EXPORT_SYMBOL(copy_user_generic_unrolled); EXPORT_SYMBOL(__copy_user_nocache); EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(_copy_to_user); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index c5ee17e8c6d9..782c3a362ec6 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -337,6 +337,7 @@ void __ref xsave_cntxt_init(void) cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); xstate_size = ebx; + update_regset_xstate_info(xstate_size, pcntxt_mask); prepare_fx_sw_frame(); setup_xstate_init(); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index cffd754f3039..ddef409c2816 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -14,7 +14,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c clean-files := inat-tables.c -obj-$(CONFIG_SMP) += msr-smp.o +obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o lib-y := delay.o lib-y += thunk_$(BITS).o @@ -39,4 +39,5 @@ else lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o + lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o endif diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c new file mode 100644 index 000000000000..a3c668875038 --- /dev/null +++ b/arch/x86/lib/cache-smp.c @@ -0,0 +1,19 @@ +#include <linux/smp.h> +#include <linux/module.h> + +static void __wbinvd(void *dummy) +{ + wbinvd(); +} + +void wbinvd_on_cpu(int cpu) +{ + smp_call_function_single(cpu, __wbinvd, NULL, 1); +} +EXPORT_SYMBOL(wbinvd_on_cpu); + +int wbinvd_on_all_cpus(void) +{ + return on_each_cpu(__wbinvd, NULL, 1); +} +EXPORT_SYMBOL(wbinvd_on_all_cpus); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index cf889d4e076a..71100c98e337 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -90,12 +90,6 @@ ENTRY(_copy_from_user) CFI_ENDPROC ENDPROC(_copy_from_user) -ENTRY(copy_user_generic) - CFI_STARTPROC - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(copy_user_generic) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index ad5441ed1b57..f82e884928af 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -20,12 +20,11 @@ /* * memcpy_c() - fast string ops (REP MOVSQ) based variant. * - * Calls to this get patched into the kernel image via the + * This gets patched over the unrolled variant (below) via the * alternative instructions framework: */ - ALIGN -memcpy_c: - CFI_STARTPROC + .section .altinstr_replacement, "ax", @progbits +.Lmemcpy_c: movq %rdi, %rax movl %edx, %ecx @@ -35,8 +34,8 @@ memcpy_c: movl %edx, %ecx rep movsb ret - CFI_ENDPROC -ENDPROC(memcpy_c) +.Lmemcpy_e: + .previous ENTRY(__memcpy) ENTRY(memcpy) @@ -128,16 +127,10 @@ ENDPROC(__memcpy) * It is also a lot simpler. Use this when possible: */ - .section .altinstr_replacement, "ax" -1: .byte 0xeb /* jmp <disp8> */ - .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ -2: - .previous - .section .altinstructions, "a" .align 8 .quad memcpy - .quad 1b + .quad .Lmemcpy_c .byte X86_FEATURE_REP_GOOD /* @@ -145,6 +138,6 @@ ENDPROC(__memcpy) * so it is silly to overwrite itself with nops - reboot is the * only outcome... */ - .byte 2b - 1b - .byte 2b - 1b + .byte .Lmemcpy_e - .Lmemcpy_c + .byte .Lmemcpy_e - .Lmemcpy_c .previous diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2c5948116bd2..e88d3b81644a 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -12,9 +12,8 @@ * * rax original destination */ - ALIGN -memset_c: - CFI_STARTPROC + .section .altinstr_replacement, "ax", @progbits +.Lmemset_c: movq %rdi,%r9 movl %edx,%r8d andl $7,%r8d @@ -29,8 +28,8 @@ memset_c: rep stosb movq %r9,%rax ret - CFI_ENDPROC -ENDPROC(memset_c) +.Lmemset_e: + .previous ENTRY(memset) ENTRY(__memset) @@ -118,16 +117,11 @@ ENDPROC(__memset) #include <asm/cpufeature.h> - .section .altinstr_replacement,"ax" -1: .byte 0xeb /* jmp <disp8> */ - .byte (memset_c - memset) - (2f - 1b) /* offset */ -2: - .previous .section .altinstructions,"a" .align 8 .quad memset - .quad 1b + .quad .Lmemset_c .byte X86_FEATURE_REP_GOOD .byte .Lfinal - memset - .byte 2b - 1b + .byte .Lmemset_e - .Lmemset_c .previous diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S new file mode 100644 index 000000000000..15acecf0d7aa --- /dev/null +++ b/arch/x86/lib/rwsem_64.S @@ -0,0 +1,81 @@ +/* + * x86-64 rwsem wrappers + * + * This interfaces the inline asm code to the slow-path + * C routines. We need to save the call-clobbered regs + * that the asm does not mark as clobbered, and move the + * argument from %rax to %rdi. + * + * NOTE! We don't need to save %rax, because the functions + * will always return the semaphore pointer in %rax (which + * is also the input argument to these helpers) + * + * The following can clobber %rdx because the asm clobbers it: + * call_rwsem_down_write_failed + * call_rwsem_wake + * but %rdi, %rsi, %rcx, %r8-r11 always need saving. + */ + +#include <linux/linkage.h> +#include <asm/rwlock.h> +#include <asm/alternative-asm.h> +#include <asm/frame.h> +#include <asm/dwarf2.h> + +#define save_common_regs \ + pushq %rdi; \ + pushq %rsi; \ + pushq %rcx; \ + pushq %r8; \ + pushq %r9; \ + pushq %r10; \ + pushq %r11 + +#define restore_common_regs \ + popq %r11; \ + popq %r10; \ + popq %r9; \ + popq %r8; \ + popq %rcx; \ + popq %rsi; \ + popq %rdi + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_down_read_failed) + save_common_regs + pushq %rdx + movq %rax,%rdi + call rwsem_down_read_failed + popq %rdx + restore_common_regs + ret + ENDPROC(call_rwsem_down_read_failed) + +ENTRY(call_rwsem_down_write_failed) + save_common_regs + movq %rax,%rdi + call rwsem_down_write_failed + restore_common_regs + ret + ENDPROC(call_rwsem_down_write_failed) + +ENTRY(call_rwsem_wake) + decw %dx /* do nothing if still outstanding active readers */ + jnz 1f + save_common_regs + movq %rax,%rdi + call rwsem_wake + restore_common_regs +1: ret + ENDPROC(call_rwsem_wake) + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_downgrade_wake) + save_common_regs + pushq %rdx + movq %rax,%rdi + call rwsem_downgrade_wake + popq %rdx + restore_common_regs + ret + ENDPROC(call_rwsem_downgrade_wake) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 71da1bca13cb..738e6593799d 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) #else /* * With get_user_pages_fast, we walk down the pagetables without taking - * any locks. For this we would like to load the pointers atoimcally, + * any locks. For this we would like to load the pointers atomically, * but that is not possible (without expensive cmpxchg8b) on PAE. What * we do have is the guarantee that a pte will only either go from not * present to present, or present to not present or both -- it will not diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index c246d259822d..5eb1ba74a3a9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -24,43 +24,6 @@ #include "physaddr.h" -int page_is_ram(unsigned long pagenr) -{ - resource_size_t addr, end; - int i; - - /* - * A special case is the first 4Kb of memory; - * This is a BIOS owned area, not kernel ram, but generally - * not listed as such in the E820 table. - */ - if (pagenr == 0) - return 0; - - /* - * Second special case: Some BIOSen report the PC BIOS - * area (640->1Mb) as ram even though it is not. - */ - if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) && - pagenr < (BIOS_END >> PAGE_SHIFT)) - return 0; - - for (i = 0; i < e820.nr_map; i++) { - /* - * Not usable memory: - */ - if (e820.map[i].type != E820_RAM) - continue; - addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT; - end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT; - - - if ((pagenr >= addr) && (pagenr < end)) - return 1; - } - return 0; -} - /* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. @@ -422,6 +385,10 @@ void __init early_ioremap_init(void) * The boot-ioremap range spans multiple pmds, for which * we are not prepared: */ +#define __FIXADDR_TOP (-PAGE_SIZE) + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); +#undef __FIXADDR_TOP if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { WARN_ON(1); printk(KERN_WARNING "pmd %p != %p\n", diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index c8191defc38a..1dab5194fd9d 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -71,7 +71,7 @@ static int mmap_is_legacy(void) if (current->personality & ADDR_COMPAT_LAYOUT) return 1; - if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) return 1; return sysctl_legacy_va_layout; @@ -96,7 +96,7 @@ static unsigned long mmap_rnd(void) static unsigned long mmap_base(void) { - unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + unsigned long gap = rlimit(RLIMIT_STACK); if (gap < MIN_GAP) gap = MIN_GAP; |