diff options
Diffstat (limited to 'include')
38 files changed, 1216 insertions, 596 deletions
diff --git a/include/asm-alpha/thread_info.h b/include/asm-alpha/thread_info.h index fb3185196298..fde5f56cd770 100644 --- a/include/asm-alpha/thread_info.h +++ b/include/asm-alpha/thread_info.h @@ -57,7 +57,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #endif /* __ASSEMBLY__ */ -#define PREEMPT_ACTIVE 0x40000000 +#define PREEMPT_ACTIVE 0x10000000 /* * Thread information flags: diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h index 07049f6c0d41..6ae2a825c0d0 100644 --- a/include/asm-avr32/thread_info.h +++ b/include/asm-avr32/thread_info.h @@ -70,7 +70,7 @@ static inline struct thread_info *current_thread_info(void) #endif /* !__ASSEMBLY__ */ -#define PREEMPT_ACTIVE 0x40000000 +#define PREEMPT_ACTIVE 0x10000000 /* * Thread information flags diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 759f8f17df75..a683ecb6b25e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -52,7 +52,10 @@ . = ALIGN(8); \ VMLINUX_SYMBOL(__start___markers) = .; \ *(__markers) \ - VMLINUX_SYMBOL(__stop___markers) = .; + VMLINUX_SYMBOL(__stop___markers) = .; \ + VMLINUX_SYMBOL(__start___imv) = .; \ + *(__imv) /* Immediate values: pointers */ \ + VMLINUX_SYMBOL(__stop___imv) = .; #define RO_DATA(align) \ . = ALIGN((align)); \ @@ -60,6 +63,10 @@ VMLINUX_SYMBOL(__start_rodata) = .; \ *(.rodata) *(.rodata.*) \ *(__vermagic) /* Kernel version magic */ \ + . = ALIGN(8); \ + VMLINUX_SYMBOL(__start___imv_cond_end) = .; \ + *(__imv_cond_end) /* Immediate condition end pointers */\ + VMLINUX_SYMBOL(__stop___imv_cond_end) = .; \ *(__markers_strings) /* Markers: strings */ \ } \ \ diff --git a/include/asm-powerpc/cacheflush.h b/include/asm-powerpc/cacheflush.h index ba667a383b8c..a52a565ce29a 100644 --- a/include/asm-powerpc/cacheflush.h +++ b/include/asm-powerpc/cacheflush.h @@ -63,7 +63,9 @@ extern void flush_dcache_phys_range(unsigned long start, unsigned long stop); #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ memcpy(dst, src, len) - +#define text_poke memcpy +#define text_poke_early text_poke +#define sync_core() #ifdef CONFIG_DEBUG_PAGEALLOC /* internal debugging function */ diff --git a/include/asm-powerpc/immediate.h b/include/asm-powerpc/immediate.h new file mode 100644 index 000000000000..252206bc9c4e --- /dev/null +++ b/include/asm-powerpc/immediate.h @@ -0,0 +1,76 @@ +#ifndef _ASM_POWERPC_IMMEDIATE_H +#define _ASM_POWERPC_IMMEDIATE_H + +/* + * Immediate values. PowerPC architecture optimizations. + * + * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#include <asm/asm-compat.h> + +struct __imv { + unsigned long var; /* Identifier variable of the immediate value */ + unsigned long imv; /* + * Pointer to the memory location that holds + * the immediate value within the load immediate + * instruction. + */ + unsigned char size; /* Type size. */ +} __attribute__ ((packed)); + +/** + * imv_read - read immediate variable + * @name: immediate value name + * + * Reads the value of @name. + * Optimized version of the immediate. + * Do not use in __init and __exit functions. Use _imv_read() instead. + * Makes sure the 2 bytes update will be atomic by aligning the immediate + * value. Use a normal memory read for the 4 bytes immediate because there is no + * way to atomically update it without using a seqlock read side, which would + * cost more in term of total i-cache and d-cache space than a simple memory + * read. + */ +#define imv_read(name) \ + ({ \ + __typeof__(name##__imv) value; \ + BUILD_BUG_ON(sizeof(value) > 8); \ + switch (sizeof(value)) { \ + case 1: \ + asm(".section __imv,\"aw\",@progbits\n\t" \ + PPC_LONG "%c1, ((1f)-1)\n\t" \ + ".byte 1\n\t" \ + ".previous\n\t" \ + "li %0,0\n\t" \ + "1:\n\t" \ + : "=r" (value) \ + : "i" (&name##__imv)); \ + break; \ + case 2: \ + asm(".section __imv,\"aw\",@progbits\n\t" \ + PPC_LONG "%c1, ((1f)-2)\n\t" \ + ".byte 2\n\t" \ + ".previous\n\t" \ + ".align 2\n\t" \ + "li %0,0\n\t" \ + "1:\n\t" \ + : "=r" (value) \ + : "i" (&name##__imv)); \ + break; \ + case 4: \ + case 8: value = name##__imv; \ + break; \ + }; \ + value; \ + }) + +#define imv_cond(name) imv_read(name) +#define imv_cond_end() + +extern int arch_imv_update(const struct __imv *imv, int early); + +#endif /* _ASM_POWERPC_IMMEDIATE_H */ diff --git a/include/asm-x86/immediate.h b/include/asm-x86/immediate.h new file mode 100644 index 000000000000..1700a76089d2 --- /dev/null +++ b/include/asm-x86/immediate.h @@ -0,0 +1,169 @@ +#ifndef _ASM_X86_IMMEDIATE_H +#define _ASM_X86_IMMEDIATE_H + +/* + * Immediate values. x86 architecture optimizations. + * + * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#include <asm/asm.h> + +struct __imv { + unsigned long var; /* Pointer to the identifier variable of the + * immediate value + */ + unsigned long imv; /* + * Pointer to the memory location of the + * immediate value within the instruction. + */ + unsigned char var_size; /* Type size of variable. */ + unsigned char size; /* Type size of immediate value. */ + unsigned char insn_size;/* Instruction size. */ +} __attribute__ ((packed)); + +/** + * imv_read - read immediate variable + * @name: immediate value name + * + * Reads the value of @name. + * Optimized version of the immediate. + * Do not use in __init and __exit functions. Use _imv_read() instead. + * If size is bigger than the architecture long size, fall back on a memory + * read. + * + * Make sure to populate the initial static 64 bits opcode with a value + * what will generate an instruction with 8 bytes immediate value (not the REX.W + * prefixed one that loads a sign extended 32 bits immediate value in a r64 + * register). + * + * Create the instruction in a discarded section to calculate its size. This is + * how we can align the beginning of the instruction on an address that will + * permit atomic modification of the immediate value without knowing the size of + * the opcode used by the compiler. The operand size is known in advance. + */ +#define imv_read(name) \ + ({ \ + __typeof__(name##__imv) value; \ + BUILD_BUG_ON(sizeof(value) > 8); \ + switch (sizeof(value)) { \ + case 1: \ + asm(".section __discard,\"\",@progbits\n\t" \ + "1:\n\t" \ + "mov $0,%0\n\t" \ + "2:\n\t" \ + ".previous\n\t" \ + ".section __imv,\"aw\",@progbits\n\t" \ + _ASM_PTR "%c1, (3f)-%c2\n\t" \ + ".byte %c2, %c2, (2b-1b)\n\t" \ + ".previous\n\t" \ + "mov $0,%0\n\t" \ + "3:\n\t" \ + : "=q" (value) \ + : "i" (&name##__imv), \ + "i" (sizeof(value))); \ + break; \ + case 2: \ + case 4: \ + asm(".section __discard,\"\",@progbits\n\t" \ + "1:\n\t" \ + "mov $0,%0\n\t" \ + "2:\n\t" \ + ".previous\n\t" \ + ".section __imv,\"aw\",@progbits\n\t" \ + _ASM_PTR "%c1, (3f)-%c2\n\t" \ + ".byte %c2, %c2, (2b-1b)\n\t" \ + ".previous\n\t" \ + ".org . + ((-.-(2b-1b)) & (%c2-1)), 0x90\n\t" \ + "mov $0,%0\n\t" \ + "3:\n\t" \ + : "=r" (value) \ + : "i" (&name##__imv), \ + "i" (sizeof(value))); \ + break; \ + case 8: \ + if (sizeof(long) < 8) { \ + value = name##__imv; \ + break; \ + } \ + asm(".section __discard,\"\",@progbits\n\t" \ + "1:\n\t" \ + "mov $0xFEFEFEFE01010101,%0\n\t" \ + "2:\n\t" \ + ".previous\n\t" \ + ".section __imv,\"aw\",@progbits\n\t" \ + _ASM_PTR "%c1, (3f)-%c2\n\t" \ + ".byte %c2, %c2, (2b-1b)\n\t" \ + ".previous\n\t" \ + ".org . + ((-.-(2b-1b)) & (%c2-1)), 0x90\n\t" \ + "mov $0xFEFEFEFE01010101,%0\n\t" \ + "3:\n\t" \ + : "=r" (value) \ + : "i" (&name##__imv), \ + "i" (sizeof(value))); \ + break; \ + }; \ + value; \ + }) + +/* + * Uses %eax. + * immediate value size is declared as 0. + * Use in + * if (unlikely(imv_cond(var))) { + * imv_cond_end(); + * ... + * } else { + * imv_cond_end(); + * ... + * } + * Given a char as argument. + * If the expected code pattern insuring correct liveliness of ZF and %eax isn't + * met, fallback on standard immediate value. + * patches the 5 bytes mov for a e9 XX XX XX XX (near jump) + * Note : Patching the the 4 bytes immediate value with 1 byte variable + * on fallback. + */ +#define imv_cond(name) \ + ({ \ + uint32_t value; \ + BUILD_BUG_ON(sizeof(__typeof__(name##__imv)) > 1); \ + asm (".section __discard,\"\",@progbits\n\t" \ + "1:\n\t" \ + "mov $0,%0\n\t" \ + "2:\n\t" \ + ".previous\n\t" \ + ".section __imv,\"aw\",@progbits\n\t" \ + _ASM_PTR "%c1, (3f)-%c2\n\t" \ + ".byte %c3, 0, (2b-1b)\n\t" \ + ".previous\n\t" \ + "mov $0,%0\n\t" \ + "3:\n\t" \ + : "=a" (value) \ + : "i" (&name##__imv), \ + "i" (sizeof(value)), \ + "i" (sizeof(__typeof__(name##__imv)))); \ + value; \ + }) + +/* + * Make sure the %eax register and ZF are not live anymore at the current + * address, which is declared in the __imv_cond_end section. + * All asm statements clobbers the flags, but add "cc" clobber just to be sure. + * Clobbers %eax. + */ +#define imv_cond_end() \ + do { \ + asm (".section __imv_cond_end,\"a\",@progbits\n\t" \ + _ASM_PTR "1f\n\t" \ + ".previous\n\t" \ + "1:\n\t" \ + : : : "eax", "cc"); \ + } while (0) + +extern int arch_imv_update(struct __imv *imv, int early); + +#endif /* _ASM_X86_IMMEDIATE_H */ diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index ecc80f341f37..5f7310aa3efd 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -121,7 +121,7 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) * - mbligh */ local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { + for_each_cpu_mask_nr(query_cpu, mask) { __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h index 24d71b1eb189..c3009fd85a47 100644 --- a/include/asm-x86/irqflags.h +++ b/include/asm-x86/irqflags.h @@ -51,6 +51,61 @@ static inline void native_halt(void) #endif +#ifdef CONFIG_X86_64 +/* + * Only returns from a trap or exception to a NMI context (intra-privilege + * level near return) to the same SS and CS segments. Should be used + * upon trap or exception return when nested over a NMI context so no iret is + * issued. It takes care of modifying the eflags, rsp and returning to the + * previous function. + * + * The stack, at that point, looks like : + * + * 0(rsp) RIP + * 8(rsp) CS + * 16(rsp) EFLAGS + * 24(rsp) RSP + * 32(rsp) SS + * + * Upon execution : + * Copy EIP to the top of the return stack + * Update top of return stack address + * Pop eflags into the eflags register + * Make the return stack current + * Near return (popping the return address from the return stack) + */ +#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushq %rax; \ + movq %rsp, %rax; \ + movq 24+8(%rax), %rsp; \ + pushq 0+8(%rax); \ + pushq 16+8(%rax); \ + movq (%rax), %rax; \ + popfq; \ + ret +#else +/* + * Protected mode only, no V8086. Implies that protected mode must + * be entered before NMIs or MCEs are enabled. Only returns from a trap or + * exception to a NMI context (intra-privilege level far return). Should be used + * upon trap or exception return when nested over a NMI context so no iret is + * issued. + * + * The stack, at that point, looks like : + * + * 0(esp) EIP + * 4(esp) CS + * 8(esp) EFLAGS + * + * Upon execution : + * Copy the stack eflags to top of stack + * Pop eflags into the eflags register + * Far return: pop EIP and CS into their register, and additionally pop EFLAGS. + */ +#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushl 8(%esp); \ + popfl; \ + lret $4 +#endif + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else @@ -109,6 +164,7 @@ static inline unsigned long __raw_local_irq_save(void) #define ENABLE_INTERRUPTS(x) sti #define DISABLE_INTERRUPTS(x) cli +#define INTERRUPT_RETURN_NMI_SAFE NATIVE_INTERRUPT_RETURN_NMI_SAFE #ifdef CONFIG_X86_64 #define INTERRUPT_RETURN iretq diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index fe1fbdec1e1c..fa382ed7e2b2 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -3,6 +3,9 @@ #include <linux/notifier.h> +#include <linux/ptrace.h> +#include <asm/system.h> + struct pt_regs; /* Grossly misnamed. */ @@ -34,4 +37,13 @@ extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); +/* trap3/1 are intr gates for kprobes. So, restore the status of IF, + * if necessary, before executing the original int3/1 (trap) handler. + */ +static inline void restore_interrupts(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); +} + #endif diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h index 54980b0b3892..93a7f49d3e25 100644 --- a/include/asm-x86/kprobes.h +++ b/include/asm-x86/kprobes.h @@ -82,15 +82,6 @@ struct kprobe_ctlblk { struct prev_kprobe prev_kprobe; }; -/* trap3/1 are intr gates for kprobes. So, restore the status of IF, - * if necessary, before executing the original int3/1 (trap) handler. - */ -static inline void restore_interrupts(struct pt_regs *regs) -{ - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); -} - extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h index 22e87c9f6a80..b510daf4f4d8 100644 --- a/include/asm-x86/numa_64.h +++ b/include/asm-x86/numa_64.h @@ -14,11 +14,9 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) -extern void numa_add_cpu(int cpu); extern void numa_init_array(void); extern int numa_off; -extern void numa_set_node(int cpu, int node); extern void srat_reserve_add_area(int nodeid); extern int hotadd_percent; @@ -31,15 +29,16 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, #ifdef CONFIG_NUMA extern void __init init_cpu_to_node(void); - -static inline void clear_node_cpumask(int cpu) -{ - clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]); -} - +extern void __cpuinit numa_set_node(int cpu, int node); +extern void __cpuinit numa_clear_node(int cpu); +extern void __cpuinit numa_add_cpu(int cpu); +extern void __cpuinit numa_remove_cpu(int cpu); #else -#define init_cpu_to_node() do {} while (0) -#define clear_node_cpumask(cpu) do {} while (0) +static inline void init_cpu_to_node(void) { } +static inline void numa_set_node(int cpu, int node) { } +static inline void numa_clear_node(int cpu) { } +static inline void numa_add_cpu(int cpu, int node) { } +static inline void numa_remove_cpu(int cpu) { } #endif #endif diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 0f13b945e240..d5087e041117 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -141,9 +141,10 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); unsigned long long (*read_tscp)(unsigned int *aux); - /* These two are jmp to, not actually called. */ + /* These three are jmp to, not actually called. */ void (*irq_enable_syscall_ret)(void); void (*iret)(void); + void (*nmi_return)(void); void (*swapgs)(void); @@ -1385,6 +1386,10 @@ static inline unsigned long __raw_local_irq_save(void) PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ jmp *%cs:pv_cpu_ops+PV_CPU_iret) +#define INTERRUPT_RETURN_NMI_SAFE \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_nmi_return), CLBR_NONE, \ + jmp *%cs:pv_cpu_ops+PV_CPU_nmi_return) + #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ PV_SAVE_REGS; \ diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index 62b734986a44..385ffbe4c169 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -20,6 +20,8 @@ struct x8664_pda { /* gcc-ABI: this canary MUST be at offset 40!!! */ char *irqstackptr; + short nodenumber; /* number of current node (32k max) */ + short in_bootmem; /* pda lives in bootmem */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ short mmu_state; @@ -35,8 +37,7 @@ struct x8664_pda { unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda *_cpu_pda[]; -extern struct x8664_pda boot_cpu_pda[]; +extern struct x8664_pda **_cpu_pda; extern void pda_init(int); #define cpu_pda(i) (_cpu_pda[i]) diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h index 736fc3bb8e1e..912a3a17b9db 100644 --- a/include/asm-x86/percpu.h +++ b/include/asm-x86/percpu.h @@ -143,4 +143,50 @@ do { \ #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) #endif /* !__ASSEMBLY__ */ #endif /* !CONFIG_X86_64 */ + +#ifdef CONFIG_SMP + +/* + * Define the "EARLY_PER_CPU" macros. These are used for some per_cpu + * variables that are initialized and accessed before there are per_cpu + * areas allocated. + */ + +#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ + DEFINE_PER_CPU(_type, _name) = _initvalue; \ + __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ + { [0 ... NR_CPUS-1] = _initvalue }; \ + __typeof__(_type) *_name##_early_ptr = _name##_early_map + +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ + EXPORT_PER_CPU_SYMBOL(_name) + +#define DECLARE_EARLY_PER_CPU(_type, _name) \ + DECLARE_PER_CPU(_type, _name); \ + extern __typeof__(_type) *_name##_early_ptr; \ + extern __typeof__(_type) _name##_early_map[] + +#define early_per_cpu_ptr(_name) (_name##_early_ptr) +#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) +#define early_per_cpu(_name, _cpu) \ + (early_per_cpu_ptr(_name) ? \ + early_per_cpu_ptr(_name)[_cpu] : \ + per_cpu(_name, _cpu)) + +#else /* !CONFIG_SMP */ +#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ + DEFINE_PER_CPU(_type, _name) = _initvalue + +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ + EXPORT_PER_CPU_SYMBOL(_name) + +#define DECLARE_EARLY_PER_CPU(_type, _name) \ + DECLARE_PER_CPU(_type, _name) + +#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) +#define early_per_cpu_ptr(_name) NULL +/* no early_per_cpu_map() */ + +#endif /* !CONFIG_SMP */ + #endif /* _ASM_X86_PERCPU_H_ */ diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 1ebaa5cd3112..ec841639fb44 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -29,21 +29,12 @@ extern int smp_num_siblings; extern unsigned int num_processors; extern cpumask_t cpu_initialized; -#ifdef CONFIG_SMP -extern u16 x86_cpu_to_apicid_init[]; -extern u16 x86_bios_cpu_apicid_init[]; -extern void *x86_cpu_to_apicid_early_ptr; -extern void *x86_bios_cpu_apicid_early_ptr; -#else -#define x86_cpu_to_apicid_early_ptr NULL -#define x86_bios_cpu_apicid_early_ptr NULL -#endif - DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); -DECLARE_PER_CPU(u16, x86_cpu_to_apicid); -DECLARE_PER_CPU(u16, x86_bios_cpu_apicid); + +DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); +DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); /* Static state in head.S used to set up a CPU */ extern struct { diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index dcf3f8131d6b..1f97758de4ab 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -35,79 +35,88 @@ # endif #endif +/* Node not present */ +#define NUMA_NO_NODE (-1) + #ifdef CONFIG_NUMA #include <linux/cpumask.h> #include <asm/mpspec.h> -/* Mappings between logical cpu number and node number */ #ifdef CONFIG_X86_32 -extern int cpu_to_node_map[]; -#else -/* Returns the number of the current Node. */ -#define numa_node_id() (early_cpu_to_node(raw_smp_processor_id())) -#endif - -DECLARE_PER_CPU(int, x86_cpu_to_node_map); - -#ifdef CONFIG_SMP -extern int x86_cpu_to_node_map_init[]; -extern void *x86_cpu_to_node_map_early_ptr; -#else -#define x86_cpu_to_node_map_early_ptr NULL -#endif +/* Mappings between node number and cpus on that node. */ extern cpumask_t node_to_cpumask_map[]; -#define NUMA_NO_NODE (-1) +/* Mappings between logical cpu number and node number */ +extern int cpu_to_node_map[]; /* Returns the number of the node containing CPU 'cpu' */ -#ifdef CONFIG_X86_32 -#define early_cpu_to_node(cpu) cpu_to_node(cpu) static inline int cpu_to_node(int cpu) { return cpu_to_node_map[cpu]; } +#define early_cpu_to_node(cpu) cpu_to_node(cpu) -#else /* CONFIG_X86_64 */ - -#ifdef CONFIG_SMP -static inline int early_cpu_to_node(int cpu) +/* Returns a bitmask of CPUs on Node 'node'. */ +static inline cpumask_t node_to_cpumask(int node) { - int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; - - if (cpu_to_node_map) - return cpu_to_node_map[cpu]; - else if (per_cpu_offset(cpu)) - return per_cpu(x86_cpu_to_node_map, cpu); - else - return NUMA_NO_NODE; + return node_to_cpumask_map[node]; } -#else -#define early_cpu_to_node(cpu) cpu_to_node(cpu) -#endif +#else /* CONFIG_X86_64 */ + +/* Mappings between node number and cpus on that node. */ +extern cpumask_t *node_to_cpumask_map; + +/* Mappings between logical cpu number and node number */ +DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); + +/* Returns the number of the current Node. */ +#define numa_node_id() read_pda(nodenumber) + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +extern int cpu_to_node(int cpu); +extern int early_cpu_to_node(int cpu); +extern cpumask_t *_node_to_cpumask_ptr(int node); +extern cpumask_t node_to_cpumask(int node); + +#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ + +/* Returns the number of the node containing CPU 'cpu' */ static inline int cpu_to_node(int cpu) { -#ifdef CONFIG_DEBUG_PER_CPU_MAPS - if (x86_cpu_to_node_map_early_ptr) { - printk("KERN_NOTICE cpu_to_node(%d): usage too early!\n", - (int)cpu); - dump_stack(); - return ((int *)x86_cpu_to_node_map_early_ptr)[cpu]; - } -#endif return per_cpu(x86_cpu_to_node_map, cpu); } -#ifdef CONFIG_NUMA +/* Same function but used if called before per_cpu areas are setup */ +static inline int early_cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + + return per_cpu(x86_cpu_to_node_map, cpu); +} /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ +static inline cpumask_t *_node_to_cpumask_ptr(int node) +{ + return &node_to_cpumask_map[node]; +} + +/* Returns a bitmask of CPUs on Node 'node'. */ +static inline cpumask_t node_to_cpumask(int node) +{ + return node_to_cpumask_map[node]; +} + +#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ + +/* Replace default node_to_cpumask_ptr with optimized version */ #define node_to_cpumask_ptr(v, node) \ - cpumask_t *v = &(node_to_cpumask_map[node]) + cpumask_t *v = _node_to_cpumask_ptr(node) #define node_to_cpumask_ptr_next(v, node) \ - v = &(node_to_cpumask_map[node]) -#endif + v = _node_to_cpumask_ptr(node) #endif /* CONFIG_X86_64 */ @@ -117,20 +126,6 @@ static inline int cpu_to_node(int cpu) */ #define parent_node(node) (node) -/* Returns a bitmask of CPUs on Node 'node'. */ -static inline cpumask_t node_to_cpumask(int node) -{ - return node_to_cpumask_map[node]; -} - -/* Returns the number of the first CPU on Node 'node'. */ -static inline int node_to_first_cpu(int node) -{ - cpumask_t mask = node_to_cpumask(node); - - return first_cpu(mask); -} - #define pcibus_to_node(bus) __pcibus_to_node(bus) #define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus) @@ -180,12 +175,44 @@ extern int __node_distance(int, int); #define node_distance(a, b) __node_distance(a, b) #endif -#else /* CONFIG_NUMA */ +#else /* !CONFIG_NUMA */ +#define numa_node_id() 0 +#define cpu_to_node(cpu) 0 +#define early_cpu_to_node(cpu) 0 + +static inline cpumask_t *_node_to_cpumask_ptr(int node) +{ + return &cpu_online_map; +} +static inline cpumask_t node_to_cpumask(int node) +{ + return cpu_online_map; +} +static inline int node_to_first_cpu(int node) +{ + return first_cpu(cpu_online_map); +} + +/* Replace default node_to_cpumask_ptr with optimized version */ +#define node_to_cpumask_ptr(v, node) \ + cpumask_t *v = _node_to_cpumask_ptr(node) + +#define node_to_cpumask_ptr_next(v, node) \ + v = _node_to_cpumask_ptr(node) #endif #include <asm-generic/topology.h> +#ifdef CONFIG_NUMA +/* Returns the number of the first CPU on Node 'node'. */ +static inline int node_to_first_cpu(int node) +{ + node_to_cpumask_ptr(mask, node); + return first_cpu(*mask); +} +#endif + extern cpumask_t cpu_coregroup_map(int cpu); #ifdef ENABLE_TOPO_DEFINES @@ -193,6 +220,9 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) + +/* indicates that pointers to the topology cpumask_t maps are valid */ +#define arch_provides_topology_pointers yes #endif static inline void arch_fix_phys_package_id(int num, u32 slot) @@ -220,4 +250,4 @@ static inline void set_mp_bus_to_node(int busnum, int node) } #endif -#endif +#endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9650806fe2ea..2174d39e39d1 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -17,6 +17,20 @@ * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c. * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c. * + * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + * Note: The alternate operations with the suffix "_nr" are used + * to limit the range of the loop to nr_cpu_ids instead of + * NR_CPUS when NR_CPUS > 64 for performance reasons. + * If NR_CPUS is <= 64 then most assembler bitmask + * operators execute faster with a constant range, so + * the operator will continue to use NR_CPUS. + * + * Another consideration is that nr_cpu_ids is initialized + * to NR_CPUS and isn't lowered until the possible cpus are + * discovered (including any disabled cpus). So early uses + * will span the entire range of NR_CPUS. + * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . + * * The available cpumask operations are: * * void cpu_set(cpu, mask) turn on bit 'cpu' in mask @@ -38,12 +52,14 @@ * int cpus_empty(mask) Is mask empty (no bits sets)? * int cpus_full(mask) Is mask full (all bits sets)? * int cpus_weight(mask) Hamming weigh - number of set bits + * int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS * * void cpus_shift_right(dst, src, n) Shift right * void cpus_shift_left(dst, src, n) Shift left * * int first_cpu(mask) Number lowest set bit, or NR_CPUS * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS + * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids * * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set * CPU_MASK_ALL Initializer - all bits set @@ -59,7 +75,8 @@ * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz * - * for_each_cpu_mask(cpu, mask) for-loop cpu over mask + * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS + * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids * * int num_online_cpus() Number of online CPUs * int num_possible_cpus() Number of all possible CPUs @@ -216,15 +233,6 @@ static inline void __cpus_shift_left(cpumask_t *dstp, bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); } -#ifdef CONFIG_SMP -int __first_cpu(const cpumask_t *srcp); -#define first_cpu(src) __first_cpu(&(src)) -int __next_cpu(int n, const cpumask_t *srcp); -#define next_cpu(n, src) __next_cpu((n), &(src)) -#else -#define first_cpu(src) ({ (void)(src); 0; }) -#define next_cpu(n, src) ({ (void)(src); 1; }) -#endif #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP extern cpumask_t *cpumask_of_cpu_map; @@ -350,15 +358,48 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, bitmap_fold(dstp->bits, origp->bits, sz, nbits); } -#if NR_CPUS > 1 +#if NR_CPUS == 1 + +#define nr_cpu_ids 1 +#define first_cpu(src) ({ (void)(src); 0; }) +#define next_cpu(n, src) ({ (void)(src); 1; }) +#define any_online_cpu(mask) 0 +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) + +#else /* NR_CPUS > 1 */ + +extern int nr_cpu_ids; +int __first_cpu(const cpumask_t *srcp); +int __next_cpu(int n, const cpumask_t *srcp); +int __any_online_cpu(const cpumask_t *mask); + +#define first_cpu(src) __first_cpu(&(src)) +#define next_cpu(n, src) __next_cpu((n), &(src)) +#define any_online_cpu(mask) __any_online_cpu(&(mask)) #define for_each_cpu_mask(cpu, mask) \ for ((cpu) = first_cpu(mask); \ (cpu) < NR_CPUS; \ (cpu) = next_cpu((cpu), (mask))) -#else /* NR_CPUS == 1 */ -#define for_each_cpu_mask(cpu, mask) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) -#endif /* NR_CPUS */ +#endif + +#if NR_CPUS <= 64 + +#define next_cpu_nr(n, src) next_cpu(n, src) +#define cpus_weight_nr(cpumask) cpus_weight(cpumask) +#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) + +#else /* NR_CPUS > 64 */ + +int __next_cpu_nr(int n, const cpumask_t *srcp); +#define next_cpu_nr(n, src) __next_cpu_nr((n), &(src)) +#define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids) +#define for_each_cpu_mask_nr(cpu, mask) \ + for ((cpu) = first_cpu(mask); \ + (cpu) < nr_cpu_ids; \ + (cpu) = next_cpu_nr((cpu), (mask))) + +#endif /* NR_CPUS > 64 */ /* * The following particular system cpumasks and operations manage @@ -419,14 +460,16 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, extern cpumask_t cpu_possible_map; extern cpumask_t cpu_online_map; extern cpumask_t cpu_present_map; +extern cpumask_t cpu_system_map; #if NR_CPUS > 1 -#define num_online_cpus() cpus_weight(cpu_online_map) -#define num_possible_cpus() cpus_weight(cpu_possible_map) -#define num_present_cpus() cpus_weight(cpu_present_map) +#define num_online_cpus() cpus_weight_nr(cpu_online_map) +#define num_possible_cpus() cpus_weight_nr(cpu_possible_map) +#define num_present_cpus() cpus_weight_nr(cpu_present_map) #define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) #define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) #define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) +#define cpu_system(cpu) cpu_isset((cpu), cpu_system_map) #else #define num_online_cpus() 1 #define num_possible_cpus() 1 @@ -434,21 +477,15 @@ extern cpumask_t cpu_present_map; #define cpu_online(cpu) ((cpu) == 0) #define cpu_possible(cpu) ((cpu) == 0) #define cpu_present(cpu) ((cpu) == 0) +#define cpu_system(cpu) ((cpu) == 0) #endif -#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) +extern int cpus_match_system(cpumask_t mask); -#ifdef CONFIG_SMP -extern int nr_cpu_ids; -#define any_online_cpu(mask) __any_online_cpu(&(mask)) -int __any_online_cpu(const cpumask_t *mask); -#else -#define nr_cpu_ids 1 -#define any_online_cpu(mask) 0 -#endif +#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) -#define for_each_possible_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map) -#define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) -#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) +#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map) +#define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_online_map) +#define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_present_map) #endif /* __LINUX_CPUMASK_H */ diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 038578362b47..712f25c16b8d 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -78,6 +78,8 @@ extern void cpuset_track_online_nodes(void); extern int current_cpuset_is_being_rebound(void); +extern struct blocking_notifier_head system_map_notifier; + #else /* !CONFIG_CPUSETS */ static inline int cpuset_init_early(void) { return 0; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2a6639407c80..1f5cebf10a23 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -3,6 +3,7 @@ #include <asm/atomic.h> #include <linux/list.h> +#include <linux/rculist.h> #include <linux/spinlock.h> #include <linux/cache.h> #include <linux/rcupdate.h> diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 897f723bd222..e15b0ea6073b 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -22,10 +22,13 @@ * PREEMPT_MASK: 0x000000ff * SOFTIRQ_MASK: 0x0000ff00 * HARDIRQ_MASK: 0x0fff0000 + * HARDNMI_MASK: 0x40000000 */ #define PREEMPT_BITS 8 #define SOFTIRQ_BITS 8 +#define HARDNMI_BITS 1 + #ifndef HARDIRQ_BITS #define HARDIRQ_BITS 12 @@ -45,16 +48,19 @@ #define PREEMPT_SHIFT 0 #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) +#define HARDNMI_SHIFT (30) #define __IRQ_MASK(x) ((1UL << (x))-1) #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) +#define HARDNMI_MASK (__IRQ_MASK(HARDNMI_BITS) << HARDNMI_SHIFT) #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) +#define HARDNMI_OFFSET (1UL << HARDNMI_SHIFT) #if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS)) #error PREEMPT_ACTIVE is too low! @@ -62,7 +68,9 @@ #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #define softirq_count() (preempt_count() & SOFTIRQ_MASK) -#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK)) +#define irq_count() \ + (preempt_count() & (HARDNMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK)) +#define hardnmi_count() (preempt_count() & HARDNMI_MASK) /* * Are we doing bottom half or hardware interrupt processing? @@ -71,6 +79,7 @@ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) +#define in_nmi() (hardnmi_count()) /* * Are we running in atomic context? WARNING: this macro cannot @@ -159,7 +168,19 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() \ + do { \ + lockdep_off(); \ + BUG_ON(hardnmi_count()); \ + add_preempt_count(HARDNMI_OFFSET); \ + __irq_enter(); \ + } while (0) + +#define nmi_exit() \ + do { \ + __irq_exit(); \ + sub_preempt_count(HARDNMI_OFFSET); \ + lockdep_on(); \ + } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/immediate.h b/include/linux/immediate.h new file mode 100644 index 000000000000..3754e846eaa0 --- /dev/null +++ b/include/linux/immediate.h @@ -0,0 +1,97 @@ +#ifndef _LINUX_IMMEDIATE_H +#define _LINUX_IMMEDIATE_H + +/* + * Immediate values, can be updated at runtime and save cache lines. + * + * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#ifdef CONFIG_IMMEDIATE + +#include <asm/immediate.h> + +/** + * imv_set - set immediate variable (with locking) + * @name: immediate value name + * @i: required value + * + * Sets the value of @name, taking the module_mutex if required by + * the architecture. + */ +#define imv_set(name, i) \ + do { \ + name##__imv = (i); \ + core_imv_update(); \ + module_imv_update(); \ + } while (0) + +/* + * Internal update functions. + */ +extern void core_imv_update(void); +extern void imv_update_range(struct __imv *begin, struct __imv *end); +extern void imv_unref_core_init(void); +extern void imv_unref(struct __imv *begin, struct __imv *end, void *start, + unsigned long size); +extern int _is_imv_cond_end(unsigned long *begin, unsigned long *end, + unsigned long addr1, unsigned long addr2); +extern int is_imv_cond_end(unsigned long addr1, unsigned long addr2); + +#else + +/* + * Generic immediate values: a simple, standard, memory load. + */ + +/** + * imv_read - read immediate variable + * @name: immediate value name + * + * Reads the value of @name. + */ +#define imv_read(name) _imv_read(name) + +/** + * imv_cond - read immediate variable use as condition for if() + * @name: immediate value name + * + * Reads the value of @name. + */ +#define imv_cond(name) _imv_read(name) +#define imv_cond_end() + +/** + * imv_set - set immediate variable (with locking) + * @name: immediate value name + * @i: required value + * + * Sets the value of @name, taking the module_mutex if required by + * the architecture. + */ +#define imv_set(name, i) (name##__imv = (i)) + +static inline void core_imv_update(void) { } +static inline void imv_unref_core_init(void) { } + +#endif + +#define DECLARE_IMV(type, name) extern __typeof__(type) name##__imv +#define DEFINE_IMV(type, name) __typeof__(type) name##__imv + +#define EXPORT_IMV_SYMBOL(name) EXPORT_SYMBOL(name##__imv) +#define EXPORT_IMV_SYMBOL_GPL(name) EXPORT_SYMBOL_GPL(name##__imv) + +/** + * _imv_read - Read immediate value with standard memory load. + * @name: immediate value name + * + * Force a data read of the immediate value instead of the immediate value + * based mechanism. Useful for __init and __exit section data read. + */ +#define _imv_read(name) (name##__imv) + +#endif diff --git a/include/linux/irq.h b/include/linux/irq.h index 552e0ec269c9..f8d5dc08c53a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -244,14 +244,7 @@ static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) } #endif -#ifdef CONFIG_AUTO_IRQ_AFFINITY extern int select_smp_affinity(unsigned int irq); -#else -static inline int select_smp_affinity(unsigned int irq) -{ - return 1; -} -#endif extern int no_irq_affinity; diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7cb7ea58232f..a1ecdb8cc4f8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -184,9 +184,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) __attribute__ ((format (printf, 1, 0))); asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern int log_buf_get_len(void); -extern int log_buf_read(int idx); -extern int log_buf_copy(char *dest, int idx, int len); extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; @@ -202,9 +199,6 @@ static inline int vprintk(const char *s, va_list args) { return 0; } static inline int printk(const char *s, ...) __attribute__ ((format (printf, 1, 2))); static inline int __cold printk(const char *s, ...) { return 0; } -static inline int log_buf_get_len(void) { return 0; } -static inline int log_buf_read(int idx) { return 0; } -static inline int log_buf_copy(char *dest, int idx, int len) { return 0; } static inline int printk_ratelimit(void) { return 0; } static inline int __printk_ratelimit(int ratelimit_jiffies, \ int ratelimit_burst) { return 0; } diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 1036631ff4fa..56d2d410415c 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -35,7 +35,6 @@ #include <linux/percpu.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> -#include <linux/mutex.h> #ifdef CONFIG_KPROBES #include <asm/kprobes.h> @@ -202,7 +201,6 @@ static inline int init_test_probes(void) #endif /* CONFIG_KPROBES_SANITY_TEST */ extern spinlock_t kretprobe_lock; -extern struct mutex kprobe_mutex; extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); diff --git a/include/linux/list.h b/include/linux/list.h index 08cf4f651889..139ec41d9c2e 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -85,65 +85,6 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head) } /* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add_rcu(struct list_head * new, - struct list_head * prev, struct list_head * next) -{ - new->next = next; - new->prev = prev; - smp_wmb(); - next->prev = new; - prev->next = new; -} - -/** - * list_add_rcu - add a new entry to rcu-protected list - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_add_rcu() - * or list_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - */ -static inline void list_add_rcu(struct list_head *new, struct list_head *head) -{ - __list_add_rcu(new, head, head->next); -} - -/** - * list_add_tail_rcu - add a new entry to rcu-protected list - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_add_tail_rcu() - * or list_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - */ -static inline void list_add_tail_rcu(struct list_head *new, - struct list_head *head) -{ - __list_add_rcu(new, head->prev, head); -} - -/* * Delete a list entry by making the prev/next entries * point to each other. * @@ -174,36 +115,6 @@ extern void list_del(struct list_head *entry); #endif /** - * list_del_rcu - deletes entry from list without re-initialization - * @entry: the element to delete from the list. - * - * Note: list_empty() on entry does not return true after this, - * the entry is in an undefined state. It is useful for RCU based - * lockfree traversal. - * - * In particular, it means that we can not poison the forward - * pointers that may still be used for walking the list. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_del_rcu() - * or list_add_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - * - * Note that the caller is not permitted to immediately free - * the newly deleted entry. Instead, either synchronize_rcu() - * or call_rcu() must be used to defer freeing until an RCU - * grace period has elapsed. - */ -static inline void list_del_rcu(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->prev = LIST_POISON2; -} - -/** * list_replace - replace old entry by new one * @old : the element to be replaced * @new : the new element to insert @@ -227,25 +138,6 @@ static inline void list_replace_init(struct list_head *old, } /** - * list_replace_rcu - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * - * The @old entry will be replaced with the @new entry atomically. - * Note: @old should not be empty. - */ -static inline void list_replace_rcu(struct list_head *old, - struct list_head *new) -{ - new->next = old->next; - new->prev = old->prev; - smp_wmb(); - new->next->prev = new; - new->prev->next = new; - old->prev = LIST_POISON2; -} - -/** * list_del_init - deletes entry from list and reinitialize it. * @entry: the element to delete from the list. */ @@ -369,62 +261,6 @@ static inline void list_splice_init(struct list_head *list, } /** - * list_splice_init_rcu - splice an RCU-protected list into an existing list. - * @list: the RCU-protected list to splice - * @head: the place in the list to splice the first list into - * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... - * - * @head can be RCU-read traversed concurrently with this function. - * - * Note that this function blocks. - * - * Important note: the caller must take whatever action is necessary to - * prevent any other updates to @head. In principle, it is possible - * to modify the list as soon as sync() begins execution. - * If this sort of thing becomes necessary, an alternative version - * based on call_rcu() could be created. But only if -really- - * needed -- there is no shortage of RCU API members. - */ -static inline void list_splice_init_rcu(struct list_head *list, - struct list_head *head, - void (*sync)(void)) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - if (list_empty(head)) - return; - - /* "first" and "last" tracking list, so initialize it. */ - - INIT_LIST_HEAD(list); - - /* - * At this point, the list body still points to the source list. - * Wait for any readers to finish using the list before splicing - * the list body into the new list. Any new readers will see - * an empty list. - */ - - sync(); - - /* - * Readers are finished with the source list, so perform splice. - * The order is important if the new list is global and accessible - * to concurrent RCU readers. Note that RCU readers are not - * permitted to traverse the prev pointers without excluding - * this function. - */ - - last->next = at; - smp_wmb(); - head->next = first; - first->prev = head; - at->prev = last; -} - -/** * list_entry - get the struct for this entry * @ptr: the &struct list_head pointer. * @type: the type of the struct this is embedded in. @@ -629,57 +465,6 @@ static inline void list_splice_init_rcu(struct list_head *list, &pos->member != (head); \ pos = n, n = list_entry(n->member.prev, typeof(*n), member)) -/** - * list_for_each_rcu - iterate over an rcu-protected list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - prefetch(pos->next), pos != (head); \ - pos = rcu_dereference(pos->next)) - -#define __list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - pos != (head); \ - pos = rcu_dereference(pos->next)) - -/** - * list_for_each_entry_rcu - iterate over rcu list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) - - -/** - * list_for_each_continue_rcu - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * Iterate over an rcu-protected list, continuing after current point. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference((pos)->next); \ - prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference((pos)->next)) - /* * Double linked lists with a single pointer list head. * Mostly useful for hash tables where the two pointer list head is @@ -730,31 +515,6 @@ static inline void hlist_del(struct hlist_node *n) n->pprev = LIST_POISON2; } -/** - * hlist_del_rcu - deletes entry from hash list without re-initialization - * @n: the element to delete from the hash list. - * - * Note: list_unhashed() on entry does not return true after this, - * the entry is in an undefined state. It is useful for RCU based - * lockfree traversal. - * - * In particular, it means that we can not poison the forward - * pointers that may still be used for walking the hash list. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry(). - */ -static inline void hlist_del_rcu(struct hlist_node *n) -{ - __hlist_del(n); - n->pprev = LIST_POISON2; -} - static inline void hlist_del_init(struct hlist_node *n) { if (!hlist_unhashed(n)) { @@ -763,27 +523,6 @@ static inline void hlist_del_init(struct hlist_node *n) } } -/** - * hlist_replace_rcu - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * - * The @old entry will be replaced with the @new entry atomically. - */ -static inline void hlist_replace_rcu(struct hlist_node *old, - struct hlist_node *new) -{ - struct hlist_node *next = old->next; - - new->next = next; - new->pprev = old->pprev; - smp_wmb(); - if (next) - new->next->pprev = &new->next; - *new->pprev = new; - old->pprev = LIST_POISON2; -} - static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; @@ -794,38 +533,6 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) n->pprev = &h->first; } - -/** - * hlist_add_head_rcu - * @n: the element to add to the hash list. - * @h: the list to add to. - * - * Description: - * Adds the specified element to the specified hlist, - * while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. Regardless of the type of CPU, the - * list-traversal primitive must be guarded by rcu_read_lock(). - */ -static inline void hlist_add_head_rcu(struct hlist_node *n, - struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - n->pprev = &h->first; - smp_wmb(); - if (first) - first->pprev = &n->next; - h->first = n; -} - /* next must be != NULL */ static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next) @@ -847,63 +554,6 @@ static inline void hlist_add_after(struct hlist_node *n, next->next->pprev = &next->next; } -/** - * hlist_add_before_rcu - * @n: the new element to add to the hash list. - * @next: the existing element to add the new element before. - * - * Description: - * Adds the specified element to the specified hlist - * before the specified node while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. - */ -static inline void hlist_add_before_rcu(struct hlist_node *n, - struct hlist_node *next) -{ - n->pprev = next->pprev; - n->next = next; - smp_wmb(); - next->pprev = &n->next; - *(n->pprev) = n; -} - -/** - * hlist_add_after_rcu - * @prev: the existing element to add the new element after. - * @n: the new element to add to the hash list. - * - * Description: - * Adds the specified element to the specified hlist - * after the specified node while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. - */ -static inline void hlist_add_after_rcu(struct hlist_node *prev, - struct hlist_node *n) -{ - n->next = prev->next; - n->pprev = &prev->next; - smp_wmb(); - prev->next = n; - if (n->next) - n->next->pprev = &n->next; -} - #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ @@ -964,21 +614,4 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ pos = n) -/** - * hlist_for_each_entry_rcu - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as hlist_add_head_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = rcu_dereference(pos->next)) - #endif diff --git a/include/linux/marker.h b/include/linux/marker.h index 430f6adf9762..33f3be747d8d 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -12,6 +12,7 @@ * See the file COPYING for more details. */ +#include <linux/immediate.h> #include <linux/types.h> struct module; @@ -42,10 +43,10 @@ struct marker { const char *format; /* Marker format string, describing the * variable argument list. */ - char state; /* Marker state. */ + DEFINE_IMV(char, state);/* Immediate value state. */ char ptype; /* probe type : 0 : single, 1 : multi */ - void (*call)(const struct marker *mdata, /* Probe wrapper */ - void *call_private, const char *fmt, ...); + /* Probe wrapper */ + void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; } __attribute__((aligned(8))); @@ -58,8 +59,12 @@ struct marker { * Make sure the alignment of the structure in the __markers section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. + * + * The "generic" argument controls which marker enabling mechanism must be used. + * If generic is true, a variable read is used. + * If generic is false, immediate values are used. */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ do { \ static const char __mstrtab_##name[] \ __attribute__((section("__markers_strings"))) \ @@ -70,17 +75,26 @@ struct marker { 0, 0, marker_probe_cb, \ { __mark_empty_function, NULL}, NULL }; \ __mark_check_format(format, ## args); \ - if (unlikely(__mark_##name.state)) { \ - (*__mark_##name.call) \ - (&__mark_##name, call_private, \ - format, ## args); \ + if (!generic) { \ + if (unlikely(imv_cond(__mark_##name.state))) { \ + imv_cond_end(); \ + (*__mark_##name.call) \ + (&__mark_##name, call_private, \ + ## args); \ + } else \ + imv_cond_end(); \ + } else { \ + if (unlikely(_imv_read(__mark_##name.state))) \ + (*__mark_##name.call) \ + (&__mark_##name, call_private, \ + ## args); \ } \ } while (0) extern void marker_update_probe_range(struct marker *begin, struct marker *end); #else /* !CONFIG_MARKERS */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) static inline void marker_update_probe_range(struct marker *begin, struct marker *end) @@ -88,15 +102,30 @@ static inline void marker_update_probe_range(struct marker *begin, #endif /* CONFIG_MARKERS */ /** - * trace_mark - Marker + * trace_mark - Marker using code patching * @name: marker name, not quoted. * @format: format string * @args...: variable argument list * - * Places a marker. + * Places a marker using optimized code patching technique (imv_read()) + * to be enabled when immediate values are present. */ #define trace_mark(name, format, args...) \ - __trace_mark(name, NULL, format, ## args) + __trace_mark(0, name, NULL, format, ## args) + +/** + * _trace_mark - Marker using variable read + * @name: marker name, not quoted. + * @format: format string + * @args...: variable argument list + * + * Places a marker using a standard memory read (_imv_read()) to be + * enabled. Should be used for markers in code paths where instruction + * modification based enabling is not welcome. (__init and __exit functions, + * lockdep, some traps, printk). + */ +#define _trace_mark(name, format, args...) \ + __trace_mark(1, name, NULL, format, ## args) /** * MARK_NOARGS - Format string for a marker with no argument. @@ -117,9 +146,9 @@ static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); /* * Connect a probe to a marker. diff --git a/include/linux/memory.h b/include/linux/memory.h index 2f5f8a5ef2a0..95379b568050 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -99,4 +99,11 @@ extern int memory_notify(unsigned long val, void *v); #define hotplug_memory_notifier(fn, pri) do { } while (0) #endif +/* + * Take and release the kernel text modification lock, used for code patching. + * Users of this lock can sleep. + */ +extern void kernel_text_lock(void); +extern void kernel_text_unlock(void); + #endif /* _LINUX_MEMORY_H_ */ diff --git a/include/linux/mm.h b/include/linux/mm.h index c31a9cd2a30e..28694deb95f8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1023,6 +1023,7 @@ extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); +extern int after_bootmem; #ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); diff --git a/include/linux/module.h b/include/linux/module.h index 3e03b1acbc94..3263812bf9e8 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -15,6 +15,7 @@ #include <linux/stringify.h> #include <linux/kobject.h> #include <linux/moduleparam.h> +#include <linux/immediate.h> #include <linux/marker.h> #include <asm/local.h> @@ -338,6 +339,12 @@ struct module /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; +#ifdef CONFIG_IMMEDIATE + struct __imv *immediate; + unsigned int num_immediate; + unsigned long *immediate_cond_end; + unsigned int num_immediate_cond_end; +#endif #ifdef CONFIG_MARKERS struct marker *markers; unsigned int num_markers; @@ -556,6 +563,24 @@ static inline void module_update_markers(void) #endif /* CONFIG_MODULES */ +#if defined(CONFIG_MODULES) && defined(CONFIG_IMMEDIATE) +extern void _module_imv_update(void); +extern void module_imv_update(void); +extern int is_imv_cond_end_module(unsigned long addr1, unsigned long addr2); +#else +static inline void _module_imv_update(void) +{ +} +static inline void module_imv_update(void) +{ +} +static inline int is_imv_cond_end_module(unsigned long addr1, + unsigned long addr2) +{ + return 0; +} +#endif + struct device_driver; #ifdef CONFIG_SYSFS struct module; diff --git a/include/linux/profile.h b/include/linux/profile.h index 05c1cc736937..1397c38b85b0 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -5,10 +5,11 @@ #include <linux/init.h> #include <linux/cpumask.h> #include <linux/cache.h> +#include <linux/immediate.h> #include <asm/errno.h> -extern int prof_on __read_mostly; +DECLARE_IMV(char, prof_on) __read_mostly; #define CPU_PROFILING 1 #define SCHED_PROFILING 2 @@ -36,7 +37,7 @@ static inline void profile_hit(int type, void *ip) /* * Speedup for the common (no profiling enabled) case: */ - if (unlikely(prof_on == type)) + if (unlikely(imv_read(prof_on) == type)) profile_hits(type, ip, 1); } diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index b3aa05baab8a..8c774905dcfe 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -151,7 +151,10 @@ extern struct lockdep_map rcu_lock_map; #define __synchronize_sched() synchronize_rcu() +#define call_rcu_sched(head, func) call_rcu(head, func) + extern void __rcu_init(void); +#define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); diff --git a/include/linux/rculist.h b/include/linux/rculist.h new file mode 100644 index 000000000000..b0f39be08b6c --- /dev/null +++ b/include/linux/rculist.h @@ -0,0 +1,373 @@ +#ifndef _LINUX_RCULIST_H +#define _LINUX_RCULIST_H + +#ifdef __KERNEL__ + +/* + * RCU-protected list version + */ +#include <linux/list.h> +#include <linux/rcupdate.h> + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next) +{ + new->next = next; + new->prev = prev; + rcu_assign_pointer(prev->next, new); + next->prev = new; +} + +/** + * list_add_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_rcu(struct list_head *new, struct list_head *head) +{ + __list_add_rcu(new, head, head->next); +} + +/** + * list_add_tail_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_tail_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_tail_rcu(struct list_head *new, + struct list_head *head) +{ + __list_add_rcu(new, head->prev, head); +} + +/** + * list_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * Note: list_empty() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_del_rcu() + * or list_add_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_del_rcu(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->prev = LIST_POISON2; +} + +/** + * list_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + * Note: @old should not be empty. + */ +static inline void list_replace_rcu(struct list_head *old, + struct list_head *new) +{ + new->next = old->next; + new->prev = old->prev; + rcu_assign_pointer(new->prev->next, new); + new->next->prev = new; + old->prev = LIST_POISON2; +} + +/** + * list_splice_init_rcu - splice an RCU-protected list into an existing list. + * @list: the RCU-protected list to splice + * @head: the place in the list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + * + * @head can be RCU-read traversed concurrently with this function. + * + * Note that this function blocks. + * + * Important note: the caller must take whatever action is necessary to + * prevent any other updates to @head. In principle, it is possible + * to modify the list as soon as sync() begins execution. + * If this sort of thing becomes necessary, an alternative version + * based on call_rcu() could be created. But only if -really- + * needed -- there is no shortage of RCU API members. + */ +static inline void list_splice_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + if (list_empty(head)) + return; + + /* "first" and "last" tracking list, so initialize it. */ + + INIT_LIST_HEAD(list); + + /* + * At this point, the list body still points to the source list. + * Wait for any readers to finish using the list before splicing + * the list body into the new list. Any new readers will see + * an empty list. + */ + + sync(); + + /* + * Readers are finished with the source list, so perform splice. + * The order is important if the new list is global and accessible + * to concurrent RCU readers. Note that RCU readers are not + * permitted to traverse the prev pointers without excluding + * this function. + */ + + last->next = at; + rcu_assign_pointer(head->next, first); + first->prev = head; + at->prev = last; +} + +/** + * list_for_each_rcu - iterate over an rcu-protected list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_rcu(pos, head) \ + for (pos = rcu_dereference((head)->next); \ + prefetch(pos->next), pos != (head); \ + pos = rcu_dereference(pos->next)) + +#define __list_for_each_rcu(pos, head) \ + for (pos = rcu_dereference((head)->next); \ + pos != (head); \ + pos = rcu_dereference(pos->next)) + +/** + * list_for_each_entry_rcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_entry_rcu(pos, head, member) \ + for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ + prefetch(pos->member.next), &pos->member != (head); \ + pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) + + +/** + * list_for_each_continue_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * Iterate over an rcu-protected list, continuing after current point. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_continue_rcu(pos, head) \ + for ((pos) = rcu_dereference((pos)->next); \ + prefetch((pos)->next), (pos) != (head); \ + (pos) = rcu_dereference((pos)->next)) + +/** + * hlist_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: list_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry(). + */ +static inline void hlist_del_rcu(struct hlist_node *n) +{ + __hlist_del(n); + n->pprev = LIST_POISON2; +} + +/** + * hlist_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + */ +static inline void hlist_replace_rcu(struct hlist_node *old, + struct hlist_node *new) +{ + struct hlist_node *next = old->next; + + new->next = next; + new->pprev = old->pprev; + rcu_assign_pointer(*new->pprev, new); + if (next) + new->next->pprev = &new->next; + old->pprev = LIST_POISON2; +} + +/** + * hlist_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_add_head_rcu(struct hlist_node *n, + struct hlist_head *h) +{ + struct hlist_node *first = h->first; + + n->next = first; + n->pprev = &h->first; + rcu_assign_pointer(h->first, n); + if (first) + first->pprev = &n->next; +} + +/** + * hlist_add_before_rcu + * @n: the new element to add to the hash list. + * @next: the existing element to add the new element before. + * + * Description: + * Adds the specified element to the specified hlist + * before the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_before_rcu(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + rcu_assign_pointer(*(n->pprev), n); + next->pprev = &n->next; +} + +/** + * hlist_add_after_rcu + * @prev: the existing element to add the new element after. + * @n: the new element to add to the hash list. + * + * Description: + * Adds the specified element to the specified hlist + * after the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_after_rcu(struct hlist_node *prev, + struct hlist_node *n) +{ + n->next = prev->next; + n->pprev = &prev->next; + rcu_assign_pointer(prev->next, n); + if (n->next) + n->next->pprev = &n->next; +} + +/** + * hlist_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference((head)->first); \ + pos && ({ prefetch(pos->next); 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference(pos->next)) + +#endif /* __KERNEL__ */ +#endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8082d6587a0f..cfeed4db46a7 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -40,6 +40,7 @@ #include <linux/cpumask.h> #include <linux/seqlock.h> #include <linux/lockdep.h> +#include <linux/completion.h> /** * struct rcu_head - callback structure for use with RCU @@ -180,6 +181,27 @@ struct rcu_head { (p) = (v); \ }) +/* Infrastructure to implement the synchronize_() primitives. */ + +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; + +extern void wakeme_after_rcu(struct rcu_head *head); + +#define synchronize_rcu_xxx(name, func) \ +void name(void) \ +{ \ + struct rcu_synchronize rcu; \ + \ + init_completion(&rcu.completion); \ + /* Will wake me after RCU finished. */ \ + func(&rcu.head, wakeme_after_rcu); \ + /* Wait for it. */ \ + wait_for_completion(&rcu.completion); \ +} + /** * synchronize_sched - block until all CPUs have exited any non-preemptive * kernel code sequences. @@ -236,8 +258,8 @@ extern void call_rcu_bh(struct rcu_head *head, /* Exported common interfaces */ extern void synchronize_rcu(void); extern void rcu_barrier(void); -extern long rcu_batches_completed(void); -extern long rcu_batches_completed_bh(void); +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); /* Internal to kernel */ extern void rcu_init(void); diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 8a05c7e20bc4..f04b64eca636 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -40,10 +40,39 @@ #include <linux/cpumask.h> #include <linux/seqlock.h> -#define rcu_qsctr_inc(cpu) +struct rcu_dyntick_sched { + int dynticks; + int dynticks_snap; + int sched_qs; + int sched_qs_snap; + int sched_dynticks_snap; +}; + +DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); + +static inline void rcu_qsctr_inc(int cpu) +{ + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + rdssp->sched_qs++; +} #define rcu_bh_qsctr_inc(cpu) #define call_rcu_bh(head, rcu) call_rcu(head, rcu) +/** + * call_rcu_sched - Queue RCU callback for invocation after sched grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full + * synchronize_sched()-style grace period elapses, in other words after + * all currently executing preempt-disabled sections of code (including + * hardirq handlers, NMI handlers, and local_irq_save() blocks) have + * completed. + */ +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + extern void __rcu_read_lock(void) __acquires(RCU); extern void __rcu_read_unlock(void) __releases(RCU); extern int rcu_pending(int cpu); @@ -55,6 +84,7 @@ extern int rcu_needs_cpu(int cpu); extern void __synchronize_sched(void); extern void __rcu_init(void); +extern void rcu_init_sched(void); extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); @@ -81,20 +111,20 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); struct softirq_action; #ifdef CONFIG_NO_HZ -DECLARE_PER_CPU(long, dynticks_progress_counter); +DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); static inline void rcu_enter_nohz(void) { smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - __get_cpu_var(dynticks_progress_counter)++; - WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1); + __get_cpu_var(rcu_dyntick_sched).dynticks++; + WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1); } static inline void rcu_exit_nohz(void) { - __get_cpu_var(dynticks_progress_counter)++; smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ - WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1)); + __get_cpu_var(rcu_dyntick_sched).dynticks++; + WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1)); } #else /* CONFIG_NO_HZ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 41f3c50e29ee..d3248cd56cbf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -134,7 +134,6 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); -extern unsigned long weighted_cpuload(const int cpu); struct seq_file; struct cfs_rq; @@ -296,10 +295,11 @@ extern void softlockup_tick(void); extern void spawn_softlockup_task(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); -extern unsigned long softlockup_thresh; +extern unsigned int softlockup_panic; extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; +extern int softlockup_thresh; #else static inline void softlockup_tick(void) { @@ -826,23 +826,6 @@ extern int arch_reinit_sched_domains(void); #endif /* CONFIG_SMP */ -/* - * A runqueue laden with a single nice 0 task scores a weighted_cpuload of - * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a - * task of nice 0 or enough lower priority tasks to bring up the - * weighted_cpuload - */ -static inline int above_background_load(void) -{ - unsigned long cpu; - - for_each_online_cpu(cpu) { - if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE) - return 1; - } - return 0; -} - struct io_context; /* See blkdev.h */ #define NGROUPS_SMALL 32 #define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) @@ -933,10 +916,6 @@ struct sched_class { int running); void (*prio_changed) (struct rq *this_rq, struct task_struct *task, int oldprio, int running); - -#ifdef CONFIG_FAIR_GROUP_SCHED - void (*moved_group) (struct task_struct *p); -#endif }; struct load_weight { @@ -955,9 +934,15 @@ struct load_weight { */ struct sched_entity { struct load_weight load; /* for load-balancing */ - struct rb_node run_node; - struct list_head group_node; + struct rb_node timeline_node; +#ifdef CONFIG_FAIR_GROUP_SCHED + struct rb_node deadline_node; + u64 deadline; + u64 min_deadline; + unsigned int eligible; +#endif unsigned int on_rq; + struct list_head group_node; u64 exec_start; u64 sum_exec_runtime; @@ -2139,38 +2124,6 @@ __trace_special(void *__tr, void *__data, } #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER -extern void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); -extern void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr); -extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); -#else -static inline void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) -{ -} -static inline void -sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) -{ -} -static inline void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr) -{ -} -static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) -{ -} -static inline void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} -#endif - extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); diff --git a/include/linux/stringify.h b/include/linux/stringify.h index 0b4388356c87..de04a96c50de 100644 --- a/include/linux/stringify.h +++ b/include/linux/stringify.h @@ -6,7 +6,12 @@ * converts to "bar". */ +#ifdef __STDC__ +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) +#else /* Support gcc -traditional, without commas. */ #define __stringify_1(x) #x #define __stringify(x) __stringify_1(x) +#endif #endif /* !__LINUX_STRINGIFY_H */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 6ec39ab27b4b..fb40dace57ec 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -76,6 +76,14 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry) return __swp_entry_to_pte(arch_entry); } +static inline swp_entry_t page_swp_entry(struct page *page) +{ + swp_entry_t entry; + VM_BUG_ON(!PageSwapCache(page)); + entry.val = page_private(page); + return entry; +} + #ifdef CONFIG_MIGRATION static inline swp_entry_t make_migration_entry(struct page *page, int write) { diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f462439cc288..bd91987c065f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; +extern unsigned long determine_dirtyable_memory(void); + extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); |