From dd17c8f72993f9461e9c19250e3f155d6d99df22 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: remove per_cpu__ prefix. Now that the return from alloc_percpu is compatible with the address of per-cpu vars, it makes sense to hand around the address of per-cpu variables. To make this sane, we remove the per_cpu__ prefix we used created to stop people accidentally using these vars directly. Now we have sparse, we can use that (next patch). tj: * Updated to convert stuff which were missed by or added after the original patch. * Kill per_cpu_var() macro. Signed-off-by: Rusty Russell Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter --- arch/blackfin/mach-common/entry.S | 4 ++-- arch/cris/arch-v10/kernel/entry.S | 2 +- arch/cris/arch-v32/mm/mmu.S | 2 +- arch/ia64/include/asm/percpu.h | 4 ++-- arch/ia64/kernel/ia64_ksyms.c | 4 ++-- arch/ia64/mm/discontig.c | 2 +- arch/microblaze/include/asm/entry.h | 2 +- arch/parisc/lib/fixup.S | 8 +++---- arch/powerpc/platforms/pseries/hvCall.S | 2 +- arch/sparc/kernel/nmi.c | 6 +++--- arch/sparc/kernel/rtrap_64.S | 8 +++---- arch/x86/include/asm/percpu.h | 37 +++++++++++++++------------------ arch/x86/include/asm/system.h | 8 +++---- arch/x86/kernel/apic/nmi.c | 6 +++--- arch/x86/kernel/head_32.S | 6 +++--- arch/x86/kernel/vmlinux.lds.S | 4 ++-- arch/x86/xen/xen-asm_32.S | 4 ++-- include/asm-generic/percpu.h | 12 +++++------ include/linux/percpu-defs.h | 18 ++++++---------- include/linux/percpu.h | 5 ++--- include/linux/vmstat.h | 8 +++---- kernel/rcutorture.c | 8 +++---- kernel/trace/trace.c | 6 +++--- kernel/trace/trace_functions_graph.c | 4 ++-- 24 files changed, 80 insertions(+), 90 deletions(-) diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 1e7cac23e25f..a3ea7e9fe43b 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -835,8 +835,8 @@ ENDPROC(_resume) ENTRY(_ret_from_exception) #ifdef CONFIG_IPIPE - p2.l = _per_cpu__ipipe_percpu_domain; - p2.h = _per_cpu__ipipe_percpu_domain; + p2.l = _ipipe_percpu_domain; + p2.h = _ipipe_percpu_domain; r0.l = _ipipe_root; r0.h = _ipipe_root; r2 = [p2]; diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S index 2c18d08cd913..c52bef39e250 100644 --- a/arch/cris/arch-v10/kernel/entry.S +++ b/arch/cris/arch-v10/kernel/entry.S @@ -358,7 +358,7 @@ mmu_bus_fault: 1: btstq 12, $r1 ; Refill? bpl 2f lsrq 24, $r1 ; Get PGD index (bit 24-31) - move.d [per_cpu__current_pgd], $r0 ; PGD for the current process + move.d [current_pgd], $r0 ; PGD for the current process move.d [$r0+$r1.d], $r0 ; Get PMD beq 2f nop diff --git a/arch/cris/arch-v32/mm/mmu.S b/arch/cris/arch-v32/mm/mmu.S index 2238d154bde3..f125d912e140 100644 --- a/arch/cris/arch-v32/mm/mmu.S +++ b/arch/cris/arch-v32/mm/mmu.S @@ -115,7 +115,7 @@ #ifdef CONFIG_SMP move $s7, $acr ; PGD #else - move.d per_cpu__current_pgd, $acr ; PGD + move.d current_pgd, $acr ; PGD #endif ; Look up PMD in PGD lsrq 24, $r0 ; Get PMD index into PGD (bit 24-31) diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h index 30cf46534dd2..f7c00a5e0e2b 100644 --- a/arch/ia64/include/asm/percpu.h +++ b/arch/ia64/include/asm/percpu.h @@ -9,7 +9,7 @@ #define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE #ifdef __ASSEMBLY__ -# define THIS_CPU(var) (per_cpu__##var) /* use this to mark accesses to per-CPU variables... */ +# define THIS_CPU(var) (var) /* use this to mark accesses to per-CPU variables... */ #else /* !__ASSEMBLY__ */ @@ -39,7 +39,7 @@ extern void *per_cpu_init(void); * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly * more efficient. */ -#define __ia64_per_cpu_var(var) per_cpu__##var +#define __ia64_per_cpu_var(var) var #include diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 461b99902bf6..7f4a0ed24152 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -30,9 +30,9 @@ EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic #endif #include -EXPORT_SYMBOL(per_cpu__ia64_cpu_info); +EXPORT_SYMBOL(ia64_cpu_info); #ifdef CONFIG_SMP -EXPORT_SYMBOL(per_cpu__local_per_cpu_offset); +EXPORT_SYMBOL(local_per_cpu_offset); #endif #include diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 19c4b2195dce..8d586d1e2515 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -459,7 +459,7 @@ static void __init initialize_pernode_data(void) cpu = 0; node = node_cpuid[cpu].nid; cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + - ((char *)&per_cpu__ia64_cpu_info - __per_cpu_start)); + ((char *)&ia64_cpu_info - __per_cpu_start)); cpu0_cpu_info->node_data = mem_data[node].node_data; } #endif /* CONFIG_SMP */ diff --git a/arch/microblaze/include/asm/entry.h b/arch/microblaze/include/asm/entry.h index 61abbd232640..ec89f2ad0fe1 100644 --- a/arch/microblaze/include/asm/entry.h +++ b/arch/microblaze/include/asm/entry.h @@ -21,7 +21,7 @@ * places */ -#define PER_CPU(var) per_cpu__##var +#define PER_CPU(var) var # ifndef __ASSEMBLY__ DECLARE_PER_CPU(unsigned int, KSP); /* Saved kernel stack pointer */ diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S index d172d4245cdc..f8c45cc2947d 100644 --- a/arch/parisc/lib/fixup.S +++ b/arch/parisc/lib/fixup.S @@ -36,8 +36,8 @@ #endif /* t2 = &__per_cpu_offset[smp_processor_id()]; */ LDREGX \t2(\t1),\t2 - addil LT%per_cpu__exception_data,%r27 - LDREG RT%per_cpu__exception_data(%r1),\t1 + addil LT%exception_data,%r27 + LDREG RT%exception_data(%r1),\t1 /* t1 = &__get_cpu_var(exception_data) */ add,l \t1,\t2,\t1 /* t1 = t1->fault_ip */ @@ -46,8 +46,8 @@ #else .macro get_fault_ip t1 t2 /* t1 = &__get_cpu_var(exception_data) */ - addil LT%per_cpu__exception_data,%r27 - LDREG RT%per_cpu__exception_data(%r1),\t2 + addil LT%exception_data,%r27 + LDREG RT%exception_data(%r1),\t2 /* t1 = t2->fault_ip */ LDREG EXCDATA_IP(\t2), \t1 .endm diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c1427b3634ec..580f789cae7f 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -55,7 +55,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ /* calculate address of stat structure r4 = opcode */ \ srdi r4,r4,2; /* index into array */ \ mulli r4,r4,HCALL_STAT_SIZE; \ - LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \ + LOAD_REG_ADDR(r7, hcall_stats); \ add r4,r4,r7; \ ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \ add r4,r4,r7; \ diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index f30f4a1ead23..2ad288ff99a4 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -112,13 +112,13 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) touched = 1; } if (!touched && __get_cpu_var(last_irq_sum) == sum) { - __this_cpu_inc(per_cpu_var(alert_counter)); - if (__this_cpu_read(per_cpu_var(alert_counter)) == 30 * nmi_hz) + __this_cpu_inc(alert_counter); + if (__this_cpu_read(alert_counter) == 30 * nmi_hz) die_nmi("BUG: NMI Watchdog detected LOCKUP", regs, panic_on_timeout); } else { __get_cpu_var(last_irq_sum) = sum; - __this_cpu_write(per_cpu_var(alert_counter), 0); + __this_cpu_write(alert_counter, 0); } if (__get_cpu_var(wd_enabled)) { write_pic(picl_value(nmi_hz)); diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index fd3cee4d117c..1ddec403f512 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -149,11 +149,11 @@ rtrap_nmi: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 rtrap_irq: rtrap: #ifndef CONFIG_SMP - sethi %hi(per_cpu____cpu_data), %l0 - lduw [%l0 + %lo(per_cpu____cpu_data)], %l1 + sethi %hi(__cpu_data), %l0 + lduw [%l0 + %lo(__cpu_data)], %l1 #else - sethi %hi(per_cpu____cpu_data), %l0 - or %l0, %lo(per_cpu____cpu_data), %l0 + sethi %hi(__cpu_data), %l0 + or %l0, %lo(__cpu_data), %l0 lduw [%l0 + %g5], %l1 #endif cmp %l1, 0 diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0c44196b78ac..4c170ccc72ed 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -25,19 +25,18 @@ */ #ifdef CONFIG_SMP #define PER_CPU(var, reg) \ - __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \ - lea per_cpu__##var(reg), reg -#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var + __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \ + lea var(reg), reg +#define PER_CPU_VAR(var) %__percpu_seg:var #else /* ! SMP */ -#define PER_CPU(var, reg) \ - __percpu_mov_op $per_cpu__##var, reg -#define PER_CPU_VAR(var) per_cpu__##var +#define PER_CPU(var, reg) __percpu_mov_op $var, reg +#define PER_CPU_VAR(var) var #endif /* SMP */ #ifdef CONFIG_X86_64_SMP #define INIT_PER_CPU_VAR(var) init_per_cpu__##var #else -#define INIT_PER_CPU_VAR(var) per_cpu__##var +#define INIT_PER_CPU_VAR(var) var #endif #else /* ...!ASSEMBLY */ @@ -60,12 +59,12 @@ * There also must be an entry in vmlinux_64.lds.S */ #define DECLARE_INIT_PER_CPU(var) \ - extern typeof(per_cpu_var(var)) init_per_cpu_var(var) + extern typeof(var) init_per_cpu_var(var) #ifdef CONFIG_X86_64_SMP #define init_per_cpu_var(var) init_per_cpu__##var #else -#define init_per_cpu_var(var) per_cpu_var(var) +#define init_per_cpu_var(var) var #endif /* For arch-specific code, we can use direct single-insn ops (they @@ -142,16 +141,14 @@ do { \ * per-thread variables implemented as per-cpu variables and thus * stable for the duration of the respective task. */ -#define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \ - "m" (per_cpu__##var)) -#define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \ - "p" (&per_cpu__##var)) -#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) -#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) -#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) -#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) -#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) -#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) +#define percpu_read(var) percpu_from_op("mov", var, "m" (var)) +#define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) +#define percpu_write(var, val) percpu_to_op("mov", var, val) +#define percpu_add(var, val) percpu_to_op("add", var, val) +#define percpu_sub(var, val) percpu_to_op("sub", var, val) +#define percpu_and(var, val) percpu_to_op("and", var, val) +#define percpu_or(var, val) percpu_to_op("or", var, val) +#define percpu_xor(var, val) percpu_to_op("xor", var, val) #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) @@ -236,7 +233,7 @@ do { \ ({ \ int old__; \ asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (per_cpu__##var) \ + : "=r" (old__), "+m" (var) \ : "dIr" (bit)); \ old__; \ }) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index f08f97374892..de10c19d9558 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -31,7 +31,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, "movl %P[task_canary](%[next]), %%ebx\n\t" \ "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" #define __switch_canary_oparam \ - , [stack_canary] "=m" (per_cpu_var(stack_canary.canary)) + , [stack_canary] "=m" (stack_canary.canary) #define __switch_canary_iparam \ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) #else /* CC_STACKPROTECTOR */ @@ -113,7 +113,7 @@ do { \ "movq %P[task_canary](%%rsi),%%r8\n\t" \ "movq %%r8,"__percpu_arg([gs_canary])"\n\t" #define __switch_canary_oparam \ - , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) + , [gs_canary] "=m" (irq_stack_union.stack_canary) #define __switch_canary_iparam \ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) #else /* CC_STACKPROTECTOR */ @@ -134,7 +134,7 @@ do { \ __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ "movq %%rax,%%rdi\n\t" \ - "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ + "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ "jnz ret_from_fork\n\t" \ RESTORE_CONTEXT \ : "=a" (last) \ @@ -144,7 +144,7 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [_tif_fork] "i" (_TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [current_task] "m" (per_cpu_var(current_task)) \ + [current_task] "m" (current_task) \ __switch_canary_iparam \ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index e631cc4416f7..45404379d173 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -437,8 +437,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ - __this_cpu_inc(per_cpu_var(alert_counter)); - if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz) + __this_cpu_inc(alert_counter); + if (__this_cpu_read(alert_counter) == 5 * nmi_hz) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ @@ -446,7 +446,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) regs, panic_on_timeout); } else { __get_cpu_var(last_irq_sum) = sum; - __this_cpu_write(per_cpu_var(alert_counter), 0); + __this_cpu_write(alert_counter, 0); } /* see if the nmi watchdog went off */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 050c278481b1..fd39eaf83b84 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -438,8 +438,8 @@ is386: movl $2,%ecx # set MP */ cmpb $0,ready jne 1f - movl $per_cpu__gdt_page,%eax - movl $per_cpu__stack_canary,%ecx + movl $gdt_page,%eax + movl $stack_canary,%ecx movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) shrl $16, %ecx movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) @@ -702,7 +702,7 @@ idt_descr: .word 0 # 32 bit align gdt_desc.address ENTRY(early_gdt_descr) .word GDT_ENTRIES*8-1 - .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ + .long gdt_page /* Overwritten for secondary CPUs */ /* * The boot_gdt must mirror the equivalent in setup.S and is diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 92929fb3f9fa..ecb92717c412 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -312,7 +312,7 @@ SECTIONS * Per-cpu symbols which need to be offset from __per_cpu_load * for the boot processor. */ -#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load +#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load INIT_PER_CPU(gdt_page); INIT_PER_CPU(irq_stack_union); @@ -323,7 +323,7 @@ INIT_PER_CPU(irq_stack_union); "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -. = ASSERT((per_cpu__irq_stack_union == 0), +. = ASSERT((irq_stack_union == 0), "irq_stack_union is not at start of per-cpu area"); #endif diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 88e15deb8b82..22a2093b5862 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -90,9 +90,9 @@ ENTRY(xen_iret) GET_THREAD_INFO(%eax) movl TI_cpu(%eax), %eax movl __per_cpu_offset(,%eax,4), %eax - mov per_cpu__xen_vcpu(%eax), %eax + mov xen_vcpu(%eax), %eax #else - movl per_cpu__xen_vcpu, %eax + movl xen_vcpu, %eax #endif /* check IF state we're restoring */ diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 8087b90d4673..ca6f0491412b 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -50,11 +50,11 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; * offset. */ #define per_cpu(var, cpu) \ - (*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu))) + (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) #define __get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset)) + (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset)) #define __raw_get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset)) + (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) #define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) @@ -66,9 +66,9 @@ extern void setup_per_cpu_areas(void); #else /* ! SMP */ -#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var))) -#define __get_cpu_var(var) per_cpu_var(var) -#define __raw_get_cpu_var(var) per_cpu_var(var) +#define per_cpu(var, cpu) (*((void)(cpu), &(var))) +#define __get_cpu_var(var) (var) +#define __raw_get_cpu_var(var) (var) #define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) #define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 5a5d6ce4bd55..ee99f6c2cdcd 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -1,12 +1,6 @@ #ifndef _LINUX_PERCPU_DEFS_H #define _LINUX_PERCPU_DEFS_H -/* - * Determine the real variable name from the name visible in the - * kernel sources. - */ -#define per_cpu_var(var) per_cpu__##var - /* * Base implementations of per-CPU variable declarations and definitions, where * the section in which the variable is to be placed is provided by the @@ -56,24 +50,24 @@ */ #define DECLARE_PER_CPU_SECTION(type, name, sec) \ extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ - extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name + extern __PCPU_ATTRS(sec) __typeof__(type) name #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ __PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak \ - __typeof__(type) per_cpu__##name + __typeof__(type) name #else /* * Normal declaration and definition macros. */ #define DECLARE_PER_CPU_SECTION(type, name, sec) \ - extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name + extern __PCPU_ATTRS(sec) __typeof__(type) name #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES \ - __typeof__(type) per_cpu__##name + __typeof__(type) name #endif /* @@ -137,8 +131,8 @@ /* * Intermodule exports for per-CPU variables. */ -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var) #endif /* _LINUX_PERCPU_DEFS_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 522f421ec213..e12410e55e05 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -182,7 +182,7 @@ static inline void *pcpu_lpage_remapped(void *kaddr) #ifndef percpu_read # define percpu_read(var) \ ({ \ - typeof(per_cpu_var(var)) __tmp_var__; \ + typeof(var) __tmp_var__; \ __tmp_var__ = get_cpu_var(var); \ put_cpu_var(var); \ __tmp_var__; \ @@ -253,8 +253,7 @@ do { \ /* * Optimized manipulation for memory allocated through the per cpu - * allocator or for addresses of per cpu variables (can be determined - * using per_cpu_var(xx). + * allocator or for addresses of per cpu variables. * * These operation guarantee exclusivity of access for other operations * on the *same* processor. The assumption is that per cpu data is only diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index d85889710f9b..3e489fda11a1 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -76,22 +76,22 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); static inline void __count_vm_event(enum vm_event_item item) { - __this_cpu_inc(per_cpu_var(vm_event_states).event[item]); + __this_cpu_inc(vm_event_states.event[item]); } static inline void count_vm_event(enum vm_event_item item) { - this_cpu_inc(per_cpu_var(vm_event_states).event[item]); + this_cpu_inc(vm_event_states.event[item]); } static inline void __count_vm_events(enum vm_event_item item, long delta) { - __this_cpu_add(per_cpu_var(vm_event_states).event[item], delta); + __this_cpu_add(vm_event_states.event[item], delta); } static inline void count_vm_events(enum vm_event_item item, long delta) { - this_cpu_add(per_cpu_var(vm_event_states).event[item], delta); + this_cpu_add(vm_event_states.event[item], delta); } extern void all_vm_events(unsigned long *); diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 178967b6434e..e339ab349121 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -731,13 +731,13 @@ static void rcu_torture_timer(unsigned long unused) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } - __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]); + __this_cpu_inc(rcu_torture_count[pipe_count]); completed = cur_ops->completed() - completed; if (completed > RCU_TORTURE_PIPE_LEN) { /* Should not happen, but... */ completed = RCU_TORTURE_PIPE_LEN; } - __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]); + __this_cpu_inc(rcu_torture_batch[completed]); preempt_enable(); cur_ops->readunlock(idx); } @@ -786,13 +786,13 @@ rcu_torture_reader(void *arg) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } - __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]); + __this_cpu_inc(rcu_torture_count[pipe_count]); completed = cur_ops->completed() - completed; if (completed > RCU_TORTURE_PIPE_LEN) { /* Should not happen, but... */ completed = RCU_TORTURE_PIPE_LEN; } - __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]); + __this_cpu_inc(rcu_torture_batch[completed]); preempt_enable(); cur_ops->readunlock(idx); schedule(); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 85a5ed70b5b2..b808177af816 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -91,12 +91,12 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled); static inline void ftrace_disable_cpu(void) { preempt_disable(); - __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled)); + __this_cpu_inc(ftrace_cpu_disabled); } static inline void ftrace_enable_cpu(void) { - __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled)); + __this_cpu_dec(ftrace_cpu_disabled); preempt_enable(); } @@ -1085,7 +1085,7 @@ trace_function(struct trace_array *tr, struct ftrace_entry *entry; /* If we are reading the ring buffer, don't trace */ - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return; event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 90a6daa10962..8614e3241ff8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -176,7 +176,7 @@ static int __trace_graph_entry(struct trace_array *tr, struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ent_entry *entry; - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return 0; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, @@ -240,7 +240,7 @@ static void __trace_graph_return(struct trace_array *tr, struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ret_entry *entry; - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, -- cgit v1.2.3 From f7b64fe806029e0a0454df132eec3c5ab576102c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: make access macros universal Now that per_cpu__ prefix is gone, there's no distinction between static and dynamic percpu variables. Make get_cpu_var() take dynamic percpu variables and ensure that all macros have parentheses around the parameter evaluation and evaluate the variable parameter only once such that any expression which evaluates to percpu address can be used safely. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e12410e55e05..f965f833a643 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -27,10 +27,13 @@ * we force a syntax error here if it isn't. */ #define get_cpu_var(var) (*({ \ - extern int simple_identifier_##var(void); \ preempt_disable(); \ &__get_cpu_var(var); })) -#define put_cpu_var(var) preempt_enable() + +#define put_cpu_var(var) do { \ + (void)(var); \ + preempt_enable(); \ +} while (0) #ifdef CONFIG_SMP @@ -182,17 +185,19 @@ static inline void *pcpu_lpage_remapped(void *kaddr) #ifndef percpu_read # define percpu_read(var) \ ({ \ - typeof(var) __tmp_var__; \ - __tmp_var__ = get_cpu_var(var); \ - put_cpu_var(var); \ - __tmp_var__; \ + typeof(var) *pr_ptr__ = &(var); \ + typeof(var) pr_ret__; \ + pr_ret__ = get_cpu_var(*pr_ptr__); \ + put_cpu_var(*pr_ptr__); \ + pr_ret__; \ }) #endif #define __percpu_generic_to_op(var, val, op) \ do { \ - get_cpu_var(var) op val; \ - put_cpu_var(var); \ + typeof(var) *pgto_ptr__ = &(var); \ + get_cpu_var(*pgto_ptr__) op val; \ + put_cpu_var(*pgto_ptr__); \ } while (0) #ifndef percpu_write @@ -304,7 +309,7 @@ do { \ #define _this_cpu_generic_to_op(pcp, val, op) \ do { \ preempt_disable(); \ - *__this_cpu_ptr(&pcp) op val; \ + *__this_cpu_ptr(&(pcp)) op val; \ preempt_enable(); \ } while (0) -- cgit v1.2.3 From e0fdb0e050eae331046385643618f12452aa7e73 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: add __percpu for sparse. We have to make __kernel "__attribute__((address_space(0)))" so we can cast to it. tj: * put_cpu_var() update. * Annotations added to dynamic allocator interface. Signed-off-by: Rusty Russell Cc: Al Viro Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 4 +++- include/linux/compiler.h | 4 +++- include/linux/percpu-defs.h | 2 +- include/linux/percpu.h | 18 +++++++++++------- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index ca6f0491412b..fded453fd25c 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -41,7 +41,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; * Only S390 provides its own means of moving the pointer. */ #ifndef SHIFT_PERCPU_PTR -#define SHIFT_PERCPU_PTR(__p, __offset) RELOC_HIDE((__p), (__offset)) +/* Weird cast keeps both GCC and sparse happy. */ +#define SHIFT_PERCPU_PTR(__p, __offset) \ + RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) #endif /* diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb5135b4e1..abba8045c6ef 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -5,7 +5,7 @@ #ifdef __CHECKER__ # define __user __attribute__((noderef, address_space(1))) -# define __kernel /* default address space */ +# define __kernel __attribute__((address_space(0))) # define __safe __attribute__((safe)) # define __force __attribute__((force)) # define __nocast __attribute__((nocast)) @@ -15,6 +15,7 @@ # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) +# define __percpu __attribute__((noderef, address_space(3))) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else @@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __acquire(x) (void)0 # define __release(x) (void)0 # define __cond_lock(x,c) (c) +# define __percpu #endif #ifdef __KERNEL__ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index ee99f6c2cdcd..0fa0cb524250 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -12,7 +12,7 @@ * that section. */ #define __PCPU_ATTRS(sec) \ - __attribute__((section(PER_CPU_BASE_SECTION sec))) \ + __percpu __attribute__((section(PER_CPU_BASE_SECTION sec))) \ PER_CPU_ATTRIBUTES #define __PCPU_DUMMY_ATTRS \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f965f833a643..2c0d31a3f6b6 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -30,8 +30,12 @@ preempt_disable(); \ &__get_cpu_var(var); })) +/* + * The weird & is necessary because sparse considers (void)(var) to be + * a direct dereference of percpu variable (var). + */ #define put_cpu_var(var) do { \ - (void)(var); \ + (void)&(var); \ preempt_enable(); \ } while (0) @@ -130,9 +134,9 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, */ #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) -extern void *__alloc_reserved_percpu(size_t size, size_t align); -extern void *__alloc_percpu(size_t size, size_t align); -extern void free_percpu(void *__pdata); +extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); +extern void __percpu *__alloc_percpu(size_t size, size_t align); +extern void free_percpu(void __percpu *__pdata); #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void __init setup_per_cpu_areas(void); @@ -142,7 +146,7 @@ extern void __init setup_per_cpu_areas(void); #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) -static inline void *__alloc_percpu(size_t size, size_t align) +static inline void __percpu *__alloc_percpu(size_t size, size_t align) { /* * Can't easily make larger alignment work with kmalloc. WARN @@ -153,7 +157,7 @@ static inline void *__alloc_percpu(size_t size, size_t align) return kzalloc(size, GFP_KERNEL); } -static inline void free_percpu(void *p) +static inline void free_percpu(void __percpu *p) { kfree(p); } @@ -168,7 +172,7 @@ static inline void *pcpu_lpage_remapped(void *kaddr) #endif /* CONFIG_SMP */ #define alloc_percpu(type) \ - (typeof(type) *)__alloc_percpu(sizeof(type), __alignof__(type)) + (typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type)) /* * Optional methods for optimized non-lvalue per-cpu variable access. -- cgit v1.2.3 From 545695fb41da117928ab946067a42d9e15fd009d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: make accessors check for percpu pointer in sparse The previous patch made sparse warn about percpu variables being used directly without going through percpu accessors. This patch implements the other half - checking whether non percpu variable is passed into percpu accessors. Signed-off-by: Tejun Heo Cc: Rusty Russell Cc: Al Viro --- include/asm-generic/percpu.h | 6 ++++-- include/linux/percpu-defs.h | 20 ++++++++++++++++++-- include/linux/percpu.h | 2 ++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index fded453fd25c..04f91c2d3f7b 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -42,8 +42,10 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; */ #ifndef SHIFT_PERCPU_PTR /* Weird cast keeps both GCC and sparse happy. */ -#define SHIFT_PERCPU_PTR(__p, __offset) \ - RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) +#define SHIFT_PERCPU_PTR(__p, __offset) ({ \ + __verify_pcpu_ptr((__p)); \ + RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \ +}) #endif /* diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 0fa0cb524250..1fa36eb54b6a 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -18,6 +18,16 @@ #define __PCPU_DUMMY_ATTRS \ __attribute__((section(".discard"), unused)) +/* + * Macro which verifies @ptr is a percpu pointer without evaluating + * @ptr. This is to be used in percpu accessors to verify that the + * input parameter is a percpu pointer. + */ +#define __verify_pcpu_ptr(ptr) do { \ + void __percpu *__vpp_verify = (typeof(ptr))NULL; \ + (void)__vpp_verify; \ +} while (0) + /* * s390 and alpha modules require percpu variables to be defined as * weak to force the compiler to generate GOT based external @@ -129,10 +139,16 @@ __aligned(PAGE_SIZE) /* - * Intermodule exports for per-CPU variables. + * Intermodule exports for per-CPU variables. sparse forgets about + * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to + * noop if __CHECKER__. */ +#ifndef __CHECKER__ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var) - +#else +#define EXPORT_PER_CPU_SYMBOL(var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) +#endif #endif /* _LINUX_PERCPU_DEFS_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 2c0d31a3f6b6..42878f0cd0e2 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -237,6 +237,7 @@ extern void __bad_size_call_parameter(void); #define __pcpu_size_call_return(stem, variable) \ ({ typeof(variable) pscr_ret__; \ + __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: pscr_ret__ = stem##1(variable);break; \ case 2: pscr_ret__ = stem##2(variable);break; \ @@ -250,6 +251,7 @@ extern void __bad_size_call_parameter(void); #define __pcpu_size_call(stem, variable, ...) \ do { \ + __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: stem##1(variable, __VA_ARGS__);break; \ case 2: stem##2(variable, __VA_ARGS__);break; \ -- cgit v1.2.3 From 85438592f179c126ad4cb9a280046d4f0a501e6d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 18 Nov 2009 17:53:21 +0900 Subject: percpu: remove compile warnings caused by __verify_pcpu_ptr() If percpu pointer is const, __verify_pcpu_ptr() triggers warnings like the following. drivers/net/loopback.c: In function 'loopback_get_stats': drivers/net/loopback.c:109: warning: initialization discards qualifiers from pointer target type Fix it by adding const to the verification target pointer used in __verify_pcpu_ptr(). Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell --- include/linux/percpu-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 1fa36eb54b6a..68567c0b3a5d 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -24,7 +24,7 @@ * input parameter is a percpu pointer. */ #define __verify_pcpu_ptr(ptr) do { \ - void __percpu *__vpp_verify = (typeof(ptr))NULL; \ + const void __percpu *__vpp_verify = (typeof(ptr))NULL; \ (void)__vpp_verify; \ } while (0) -- cgit v1.2.3 From 22b737f4c75197372d64afc6ed1bccd58c00e549 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 1 Dec 2009 23:28:10 +0900 Subject: percpu: refactor the code in pcpu_[de]populate_chunk() Using break statement at the end of a for loop is confusing, refactor it by replacing the for loop. Signed-off-by: WANG Cong Signed-off-by: Tejun Heo --- mm/percpu.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index e2e80fc78601..77c6f7994a46 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -886,11 +886,10 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) int rs, re; /* quick path, check whether it's empty already */ - pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { - if (rs == page_start && re == page_end) - return; - break; - } + rs = page_start; + pcpu_next_unpop(chunk, &rs, &re, page_end); + if (rs == page_start && re == page_end) + return; /* immutable chunks can't be depopulated */ WARN_ON(chunk->immutable); @@ -941,11 +940,10 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) int rs, re, rc; /* quick path, check whether all pages are already there */ - pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) { - if (rs == page_start && re == page_end) - goto clear; - break; - } + rs = page_start; + pcpu_next_pop(chunk, &rs, &re, page_end); + if (rs == page_start && re == page_end) + goto clear; /* need to allocate and map pages, this chunk can't be immutable */ WARN_ON(chunk->immutable); -- cgit v1.2.3 From 38b7827fcdd660f591d645bd3ae6644456a4773c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:49 +0900 Subject: local_t: Remove cpu_local_xx macros These macros have not been used for awhile now. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/alpha/include/asm/local.h | 17 ----------------- arch/m32r/include/asm/local.h | 25 ------------------------- arch/mips/include/asm/local.h | 25 ------------------------- arch/powerpc/include/asm/local.h | 25 ------------------------- arch/x86/include/asm/local.h | 37 ------------------------------------- include/asm-generic/local.h | 19 ------------------- 6 files changed, 148 deletions(-) diff --git a/arch/alpha/include/asm/local.h b/arch/alpha/include/asm/local.h index 6ad3ea696421..b9e3e3318371 100644 --- a/arch/alpha/include/asm/local.h +++ b/arch/alpha/include/asm/local.h @@ -98,21 +98,4 @@ static __inline__ long local_sub_return(long i, local_t * l) #define __local_add(i,l) ((l)->a.counter+=(i)) #define __local_sub(i,l) ((l)->a.counter-=(i)) -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(l) local_read(&__get_cpu_var(l)) -#define cpu_local_set(l, i) local_set(&__get_cpu_var(l), (i)) - -#define cpu_local_inc(l) local_inc(&__get_cpu_var(l)) -#define cpu_local_dec(l) local_dec(&__get_cpu_var(l)) -#define cpu_local_add(i, l) local_add((i), &__get_cpu_var(l)) -#define cpu_local_sub(i, l) local_sub((i), &__get_cpu_var(l)) - -#define __cpu_local_inc(l) __local_inc(&__get_cpu_var(l)) -#define __cpu_local_dec(l) __local_dec(&__get_cpu_var(l)) -#define __cpu_local_add(i, l) __local_add((i), &__get_cpu_var(l)) -#define __cpu_local_sub(i, l) __local_sub((i), &__get_cpu_var(l)) - #endif /* _ALPHA_LOCAL_H */ diff --git a/arch/m32r/include/asm/local.h b/arch/m32r/include/asm/local.h index 22256d138630..734bca87018a 100644 --- a/arch/m32r/include/asm/local.h +++ b/arch/m32r/include/asm/local.h @@ -338,29 +338,4 @@ static inline void local_set_mask(unsigned long mask, local_t *addr) * a variable, not an address. */ -/* Need to disable preemption for the cpu local counters otherwise we could - still access a variable of a previous CPU in a non local way. */ -#define cpu_local_wrap_v(l) \ - ({ local_t res__; \ - preempt_disable(); \ - res__ = (l); \ - preempt_enable(); \ - res__; }) -#define cpu_local_wrap(l) \ - ({ preempt_disable(); \ - l; \ - preempt_enable(); }) \ - -#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) -#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) -#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) -#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) -#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) -#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) - -#define __cpu_local_inc(l) cpu_local_inc(l) -#define __cpu_local_dec(l) cpu_local_dec(l) -#define __cpu_local_add(i, l) cpu_local_add((i), (l)) -#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) - #endif /* __M32R_LOCAL_H */ diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h index 361f4f16c30c..bdcdef02d147 100644 --- a/arch/mips/include/asm/local.h +++ b/arch/mips/include/asm/local.h @@ -193,29 +193,4 @@ static __inline__ long local_sub_return(long i, local_t * l) #define __local_add(i, l) ((l)->a.counter+=(i)) #define __local_sub(i, l) ((l)->a.counter-=(i)) -/* Need to disable preemption for the cpu local counters otherwise we could - still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(l) \ - ({ local_t res__; \ - preempt_disable(); \ - res__ = (l); \ - preempt_enable(); \ - res__; }) -#define cpu_local_wrap(l) \ - ({ preempt_disable(); \ - l; \ - preempt_enable(); }) \ - -#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) -#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) -#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) -#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) -#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) -#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) - -#define __cpu_local_inc(l) cpu_local_inc(l) -#define __cpu_local_dec(l) cpu_local_dec(l) -#define __cpu_local_add(i, l) cpu_local_add((i), (l)) -#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) - #endif /* _ARCH_MIPS_LOCAL_H */ diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h index 84b457a3c1bc..227753d288f6 100644 --- a/arch/powerpc/include/asm/local.h +++ b/arch/powerpc/include/asm/local.h @@ -172,29 +172,4 @@ static __inline__ long local_dec_if_positive(local_t *l) #define __local_add(i,l) ((l)->a.counter+=(i)) #define __local_sub(i,l) ((l)->a.counter-=(i)) -/* Need to disable preemption for the cpu local counters otherwise we could - still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(l) \ - ({ local_t res__; \ - preempt_disable(); \ - res__ = (l); \ - preempt_enable(); \ - res__; }) -#define cpu_local_wrap(l) \ - ({ preempt_disable(); \ - l; \ - preempt_enable(); }) \ - -#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) -#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) -#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) -#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) -#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) -#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) - -#define __cpu_local_inc(l) cpu_local_inc(l) -#define __cpu_local_dec(l) cpu_local_dec(l) -#define __cpu_local_add(i, l) cpu_local_add((i), (l)) -#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) - #endif /* _ARCH_POWERPC_LOCAL_H */ diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 47b9b6f19057..2e9972468a5d 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -195,41 +195,4 @@ static inline long local_sub_return(long i, local_t *l) #define __local_add(i, l) local_add((i), (l)) #define __local_sub(i, l) local_sub((i), (l)) -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - * - * X86_64: This could be done better if we moved the per cpu data directly - * after GS. - */ - -/* Need to disable preemption for the cpu local counters otherwise we could - still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(l) \ -({ \ - local_t res__; \ - preempt_disable(); \ - res__ = (l); \ - preempt_enable(); \ - res__; \ -}) -#define cpu_local_wrap(l) \ -({ \ - preempt_disable(); \ - (l); \ - preempt_enable(); \ -}) \ - -#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var((l)))) -#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var((l)), (i))) -#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var((l)))) -#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var((l)))) -#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var((l)))) -#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var((l)))) - -#define __cpu_local_inc(l) cpu_local_inc((l)) -#define __cpu_local_dec(l) cpu_local_dec((l)) -#define __cpu_local_add(i, l) cpu_local_add((i), (l)) -#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) - #endif /* _ASM_X86_LOCAL_H */ diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h index fc218444e315..c8a5d68541d7 100644 --- a/include/asm-generic/local.h +++ b/include/asm-generic/local.h @@ -52,23 +52,4 @@ typedef struct #define __local_add(i,l) local_set((l), local_read(l) + (i)) #define __local_sub(i,l) local_set((l), local_read(l) - (i)) -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable (eg. mystruct.foo), not an address. - */ -#define cpu_local_read(l) local_read(&__get_cpu_var(l)) -#define cpu_local_set(l, i) local_set(&__get_cpu_var(l), (i)) -#define cpu_local_inc(l) local_inc(&__get_cpu_var(l)) -#define cpu_local_dec(l) local_dec(&__get_cpu_var(l)) -#define cpu_local_add(i, l) local_add((i), &__get_cpu_var(l)) -#define cpu_local_sub(i, l) local_sub((i), &__get_cpu_var(l)) - -/* Non-atomic increments, ie. preemption disabled and won't be touched - * in interrupt, etc. Some archs can optimize this case well. - */ -#define __cpu_local_inc(l) __local_inc(&__get_cpu_var(l)) -#define __cpu_local_dec(l) __local_dec(&__get_cpu_var(l)) -#define __cpu_local_add(i, l) __local_add((i), &__get_cpu_var(l)) -#define __cpu_local_sub(i, l) __local_sub((i), &__get_cpu_var(l)) - #endif /* _ASM_GENERIC_LOCAL_H */ -- cgit v1.2.3 From e1783a240f491fb233f04edc042e16b18a7a79ba Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: module: Use this_cpu_xx to dynamically allocate counters Use cpu ops to deal with the per cpu data instead of a local_t. Reduces memory requirements, cache footprint and decreases cycle counts. The this_cpu_xx operations are also used for !SMP mode. Otherwise we could not drop the use of __module_ref_addr() which would make per cpu data handling complicated. this_cpu_xx operations have their own fallback for !SMP. V8-V9: - Leave include asm/module.h since ringbuffer.c depends on it. Nothing else does though. Another patch will deal with that. - Remove spurious free. Signed-off-by: Christoph Lameter Acked-by: Rusty Russell Signed-off-by: Tejun Heo --- include/linux/module.h | 36 ++++++++++++++---------------------- kernel/module.c | 29 +++++++++++++++-------------- 2 files changed, 29 insertions(+), 36 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 6cb1a3cab5d3..2302f09ea2d9 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -363,11 +364,9 @@ struct module /* Destruction function. */ void (*exit)(void); -#ifdef CONFIG_SMP - char *refptr; -#else - local_t ref; -#endif + struct module_ref { + int count; + } *refptr; #endif #ifdef CONFIG_CONSTRUCTORS @@ -454,25 +453,16 @@ void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) void symbol_put_addr(void *addr); -static inline local_t *__module_ref_addr(struct module *mod, int cpu) -{ -#ifdef CONFIG_SMP - return (local_t *) (mod->refptr + per_cpu_offset(cpu)); -#else - return &mod->ref; -#endif -} - /* Sometimes we know we already have a refcount, and it's easier not to handle the error case (which only happens with rmmod --wait). */ static inline void __module_get(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_inc(__module_ref_addr(module, cpu)); + preempt_disable(); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); - put_cpu(); + __this_cpu_read(module->refptr->count)); + preempt_enable(); } } @@ -481,15 +471,17 @@ static inline int try_module_get(struct module *module) int ret = 1; if (module) { - unsigned int cpu = get_cpu(); + preempt_disable(); + if (likely(module_is_live(module))) { - local_inc(__module_ref_addr(module, cpu)); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); + __this_cpu_read(module->refptr->count)); } else ret = 0; - put_cpu(); + + preempt_enable(); } return ret; } diff --git a/kernel/module.c b/kernel/module.c index e96b8ed1cb6a..9bf228052ec5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -474,9 +474,10 @@ static void module_unload_init(struct module *mod) INIT_LIST_HEAD(&mod->modules_which_use_me); for_each_possible_cpu(cpu) - local_set(__module_ref_addr(mod, cpu), 0); + per_cpu_ptr(mod->refptr, cpu)->count = 0; + /* Hold reference count during initialization. */ - local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); + __this_cpu_write(mod->refptr->count, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -619,7 +620,7 @@ unsigned int module_refcount(struct module *mod) int cpu; for_each_possible_cpu(cpu) - total += local_read(__module_ref_addr(mod, cpu)); + total += per_cpu_ptr(mod->refptr, cpu)->count; return total; } EXPORT_SYMBOL(module_refcount); @@ -796,14 +797,15 @@ static struct module_attribute refcnt = { void module_put(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_dec(__module_ref_addr(module, cpu)); + preempt_disable(); + __this_cpu_dec(module->refptr->count); + trace_module_put(module, _RET_IP_, - local_read(__module_ref_addr(module, cpu))); + __this_cpu_read(module->refptr->count)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); - put_cpu(); + preempt_enable(); } } EXPORT_SYMBOL(module_put); @@ -1394,9 +1396,9 @@ static void free_module(struct module *mod) kfree(mod->args); if (mod->percpu) percpu_modfree(mod->percpu); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) +#if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) - percpu_modfree(mod->refptr); + free_percpu(mod->refptr); #endif /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); @@ -2159,9 +2161,8 @@ static noinline struct module *load_module(void __user *umod, mod = (void *)sechdrs[modindex].sh_addr; kmemleak_load_module(mod, hdr, sechdrs, secstrings); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), - mod->name); +#if defined(CONFIG_MODULE_UNLOAD) + mod->refptr = alloc_percpu(struct module_ref); if (!mod->refptr) { err = -ENOMEM; goto free_init; @@ -2393,8 +2394,8 @@ static noinline struct module *load_module(void __user *umod, kobject_put(&mod->mkobj.kobj); free_unload: module_unload_free(mod); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - percpu_modfree(mod->refptr); +#if defined(CONFIG_MODULE_UNLOAD) + free_percpu(mod->refptr); free_init: #endif module_free(mod, mod->module_init); -- cgit v1.2.3 From 79615760f380ec86cd58204744e774c33fab9211 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: local_t: Move local.h include to ringbuffer.c and ring_buffer_benchmark.c ringbuffer*.c are the last users of local.h. Remove the include from modules.h and add it to ringbuffer files. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/module.h | 1 - kernel/trace/ring_buffer.c | 1 + kernel/trace/ring_buffer_benchmark.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/module.h b/include/linux/module.h index 2302f09ea2d9..7e74ae0051cc 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2326b04c95c4..eb6c8988c31a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -20,6 +20,7 @@ #include #include +#include #include "trace.h" /* diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index b2477caf09c2..df74c7982255 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -8,6 +8,7 @@ #include #include #include +#include struct rb_page { u64 ts; -- cgit v1.2.3 From 5917dae83cb02dfe74c9167b79e86e6d65183fa3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: percpu, x86: Generic inc / dec percpu instructions Optimize code generated for percpu access by checking for increment and decrements. tj: fix incorrect usage of __builtin_constant_p() and restructure percpu_add_op() macro. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 86 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 72 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 4c170ccc72ed..66a272dfd8b8 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -103,6 +103,64 @@ do { \ } \ } while (0) +/* + * Generate a percpu add to memory instruction and optimize code + * if a one is added or subtracted. + */ +#define percpu_add_op(var, val) \ +do { \ + typedef typeof(var) pao_T__; \ + const int pao_ID__ = (__builtin_constant_p(val) && \ + ((val) == 1 || (val) == -1)) ? (val) : 0; \ + if (0) { \ + pao_T__ pao_tmp__; \ + pao_tmp__ = (val); \ + } \ + switch (sizeof(var)) { \ + case 1: \ + if (pao_ID__ == 1) \ + asm("incb "__percpu_arg(0) : "+m" (var)); \ + else if (pao_ID__ == -1) \ + asm("decb "__percpu_arg(0) : "+m" (var)); \ + else \ + asm("addb %1, "__percpu_arg(0) \ + : "+m" (var) \ + : "qi" ((pao_T__)(val))); \ + break; \ + case 2: \ + if (pao_ID__ == 1) \ + asm("incw "__percpu_arg(0) : "+m" (var)); \ + else if (pao_ID__ == -1) \ + asm("decw "__percpu_arg(0) : "+m" (var)); \ + else \ + asm("addw %1, "__percpu_arg(0) \ + : "+m" (var) \ + : "ri" ((pao_T__)(val))); \ + break; \ + case 4: \ + if (pao_ID__ == 1) \ + asm("incl "__percpu_arg(0) : "+m" (var)); \ + else if (pao_ID__ == -1) \ + asm("decl "__percpu_arg(0) : "+m" (var)); \ + else \ + asm("addl %1, "__percpu_arg(0) \ + : "+m" (var) \ + : "ri" ((pao_T__)(val))); \ + break; \ + case 8: \ + if (pao_ID__ == 1) \ + asm("incq "__percpu_arg(0) : "+m" (var)); \ + else if (pao_ID__ == -1) \ + asm("decq "__percpu_arg(0) : "+m" (var)); \ + else \ + asm("addq %1, "__percpu_arg(0) \ + : "+m" (var) \ + : "re" ((pao_T__)(val))); \ + break; \ + default: __bad_percpu_size(); \ + } \ +} while (0) + #define percpu_from_op(op, var, constraint) \ ({ \ typeof(var) pfo_ret__; \ @@ -144,8 +202,8 @@ do { \ #define percpu_read(var) percpu_from_op("mov", var, "m" (var)) #define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) #define percpu_write(var, val) percpu_to_op("mov", var, val) -#define percpu_add(var, val) percpu_to_op("add", var, val) -#define percpu_sub(var, val) percpu_to_op("sub", var, val) +#define percpu_add(var, val) percpu_add_op(var, val) +#define percpu_sub(var, val) percpu_add_op(var, -(val)) #define percpu_and(var, val) percpu_to_op("and", var, val) #define percpu_or(var, val) percpu_to_op("or", var, val) #define percpu_xor(var, val) percpu_to_op("xor", var, val) @@ -157,9 +215,9 @@ do { \ #define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) #define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) #define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) -#define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) -#define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) +#define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) +#define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) #define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) #define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) #define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) @@ -176,9 +234,9 @@ do { \ #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) -#define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) -#define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) +#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) +#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) #define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) #define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) #define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) @@ -189,9 +247,9 @@ do { \ #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) -#define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) -#define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) -#define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) +#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) +#define irqsafe_cpu_add_4(pcp, val) percpu_add_op((pcp), val) #define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) #define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) #define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) @@ -209,19 +267,19 @@ do { \ #ifdef CONFIG_X86_64 #define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) -#define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) -- cgit v1.2.3 From 99dcc3e5a94ed491fbef402831d8c0bbb267f995 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:51 +0900 Subject: this_cpu: Page allocator conversion Use the per cpu allocator functionality to avoid per cpu arrays in struct zone. This drastically reduces the size of struct zone for systems with large amounts of processors and allows placement of critical variables of struct zone in one cacheline even on very large systems. Another effect is that the pagesets of one processor are placed near one another. If multiple pagesets from different zones fit into one cacheline then additional cacheline fetches can be avoided on the hot paths when allocating memory from multiple zones. Bootstrap becomes simpler if we use the same scheme for UP, SMP, NUMA. #ifdefs are reduced and we can drop the zone_pcp macro. Hotplug handling is also simplified since cpu alloc can bring up and shut down cpu areas for a specific cpu as a whole. So there is no need to allocate or free individual pagesets. V7-V8: - Explain chicken egg dilemmna with percpu allocator. V4-V5: - Fix up cases where per_cpu_ptr is called before irq disable - Integrate the bootstrap logic that was separate before. tj: Build failure in pageset_cpuup_callback() due to missing ret variable fixed. Reviewed-by: Mel Gorman Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/mm.h | 4 - include/linux/mmzone.h | 12 +-- mm/page_alloc.c | 202 +++++++++++++++++-------------------------------- mm/vmstat.c | 14 ++-- 4 files changed, 81 insertions(+), 151 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 2265f28eb47a..554fa395aac9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1079,11 +1079,7 @@ extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern int after_bootmem; -#ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); -#else -static inline void setup_per_cpu_pageset(void) {} -#endif extern void zone_pcp_update(struct zone *zone); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 30fe668c2542..7874201a3556 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -184,13 +184,7 @@ struct per_cpu_pageset { s8 stat_threshold; s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; #endif -} ____cacheline_aligned_in_smp; - -#ifdef CONFIG_NUMA -#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)]) -#else -#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) -#endif +}; #endif /* !__GENERATING_BOUNDS.H */ @@ -306,10 +300,8 @@ struct zone { */ unsigned long min_unmapped_pages; unsigned long min_slab_pages; - struct per_cpu_pageset *pageset[NR_CPUS]; -#else - struct per_cpu_pageset pageset[NR_CPUS]; #endif + struct per_cpu_pageset *pageset; /* * free areas of different sizes */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e9f5cc5fb59..6849e870de54 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1008,10 +1008,10 @@ static void drain_pages(unsigned int cpu) struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; - pset = zone_pcp(zone, cpu); + local_irq_save(flags); + pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; - local_irq_save(flags); free_pcppages_bulk(zone, pcp->count, pcp); pcp->count = 0; local_irq_restore(flags); @@ -1095,7 +1095,6 @@ static void free_hot_cold_page(struct page *page, int cold) arch_free_page(page, 0); kernel_map_pages(page, 1, 0); - pcp = &zone_pcp(zone, get_cpu())->pcp; migratetype = get_pageblock_migratetype(page); set_page_private(page, migratetype); local_irq_save(flags); @@ -1118,6 +1117,7 @@ static void free_hot_cold_page(struct page *page, int cold) migratetype = MIGRATE_MOVABLE; } + pcp = &this_cpu_ptr(zone->pageset)->pcp; if (cold) list_add_tail(&page->lru, &pcp->lists[migratetype]); else @@ -1130,7 +1130,6 @@ static void free_hot_cold_page(struct page *page, int cold) out: local_irq_restore(flags); - put_cpu(); } void free_hot_page(struct page *page) @@ -1180,17 +1179,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, unsigned long flags; struct page *page; int cold = !!(gfp_flags & __GFP_COLD); - int cpu; again: - cpu = get_cpu(); if (likely(order == 0)) { struct per_cpu_pages *pcp; struct list_head *list; - pcp = &zone_pcp(zone, cpu)->pcp; - list = &pcp->lists[migratetype]; local_irq_save(flags); + pcp = &this_cpu_ptr(zone->pageset)->pcp; + list = &pcp->lists[migratetype]; if (list_empty(list)) { pcp->count += rmqueue_bulk(zone, 0, pcp->batch, list, @@ -1231,7 +1228,6 @@ again: __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone); local_irq_restore(flags); - put_cpu(); VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) @@ -1240,7 +1236,6 @@ again: failed: local_irq_restore(flags); - put_cpu(); return NULL; } @@ -2179,7 +2174,7 @@ void show_free_areas(void) for_each_online_cpu(cpu) { struct per_cpu_pageset *pageset; - pageset = zone_pcp(zone, cpu); + pageset = per_cpu_ptr(zone->pageset, cpu); printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", cpu, pageset->pcp.high, @@ -2744,10 +2739,29 @@ static void build_zonelist_cache(pg_data_t *pgdat) #endif /* CONFIG_NUMA */ +/* + * Boot pageset table. One per cpu which is going to be used for all + * zones and all nodes. The parameters will be set in such a way + * that an item put on a list will immediately be handed over to + * the buddy list. This is safe since pageset manipulation is done + * with interrupts disabled. + * + * The boot_pagesets must be kept even after bootup is complete for + * unused processors and/or zones. They do play a role for bootstrapping + * hotplugged processors. + * + * zoneinfo_show() and maybe other functions do + * not check if the processor is online before following the pageset pointer. + * Other parts of the kernel may not check if the zone is available. + */ +static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch); +static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); + /* return values int ....just for stop_machine() */ static int __build_all_zonelists(void *dummy) { int nid; + int cpu; #ifdef CONFIG_NUMA memset(node_load, 0, sizeof(node_load)); @@ -2758,6 +2772,23 @@ static int __build_all_zonelists(void *dummy) build_zonelists(pgdat); build_zonelist_cache(pgdat); } + + /* + * Initialize the boot_pagesets that are going to be used + * for bootstrapping processors. The real pagesets for + * each zone will be allocated later when the per cpu + * allocator is available. + * + * boot_pagesets are used also for bootstrapping offline + * cpus if the system is already booted because the pagesets + * are needed to initialize allocators on a specific cpu too. + * F.e. the percpu allocator needs the page allocator which + * needs the percpu allocator in order to allocate its pagesets + * (a chicken-egg dilemma). + */ + for_each_possible_cpu(cpu) + setup_pageset(&per_cpu(boot_pageset, cpu), 0); + return 0; } @@ -3095,121 +3126,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p, pcp->batch = PAGE_SHIFT * 8; } - -#ifdef CONFIG_NUMA -/* - * Boot pageset table. One per cpu which is going to be used for all - * zones and all nodes. The parameters will be set in such a way - * that an item put on a list will immediately be handed over to - * the buddy list. This is safe since pageset manipulation is done - * with interrupts disabled. - * - * Some NUMA counter updates may also be caught by the boot pagesets. - * - * The boot_pagesets must be kept even after bootup is complete for - * unused processors and/or zones. They do play a role for bootstrapping - * hotplugged processors. - * - * zoneinfo_show() and maybe other functions do - * not check if the processor is online before following the pageset pointer. - * Other parts of the kernel may not check if the zone is available. - */ -static struct per_cpu_pageset boot_pageset[NR_CPUS]; - /* - * Dynamically allocate memory for the - * per cpu pageset array in struct zone. + * Allocate per cpu pagesets and initialize them. + * Before this call only boot pagesets were available. + * Boot pagesets will no longer be used by this processorr + * after setup_per_cpu_pageset(). */ -static int __cpuinit process_zones(int cpu) +void __init setup_per_cpu_pageset(void) { - struct zone *zone, *dzone; - int node = cpu_to_node(cpu); - - node_set_state(node, N_CPU); /* this node has a cpu */ + struct zone *zone; + int cpu; for_each_populated_zone(zone) { - zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), - GFP_KERNEL, node); - if (!zone_pcp(zone, cpu)) - goto bad; - - setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); - - if (percpu_pagelist_fraction) - setup_pagelist_highmark(zone_pcp(zone, cpu), - (zone->present_pages / percpu_pagelist_fraction)); - } - - return 0; -bad: - for_each_zone(dzone) { - if (!populated_zone(dzone)) - continue; - if (dzone == zone) - break; - kfree(zone_pcp(dzone, cpu)); - zone_pcp(dzone, cpu) = &boot_pageset[cpu]; - } - return -ENOMEM; -} + zone->pageset = alloc_percpu(struct per_cpu_pageset); -static inline void free_zone_pagesets(int cpu) -{ - struct zone *zone; - - for_each_zone(zone) { - struct per_cpu_pageset *pset = zone_pcp(zone, cpu); + for_each_possible_cpu(cpu) { + struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); - /* Free per_cpu_pageset if it is slab allocated */ - if (pset != &boot_pageset[cpu]) - kfree(pset); - zone_pcp(zone, cpu) = &boot_pageset[cpu]; - } -} + setup_pageset(pcp, zone_batchsize(zone)); -static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - int cpu = (long)hcpu; - int ret = NOTIFY_OK; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - if (process_zones(cpu)) - ret = NOTIFY_BAD; - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - free_zone_pagesets(cpu); - break; - default: - break; + if (percpu_pagelist_fraction) + setup_pagelist_highmark(pcp, + (zone->present_pages / + percpu_pagelist_fraction)); + } } - return ret; } -static struct notifier_block __cpuinitdata pageset_notifier = - { &pageset_cpuup_callback, NULL, 0 }; - -void __init setup_per_cpu_pageset(void) -{ - int err; - - /* Initialize per_cpu_pageset for cpu 0. - * A cpuup callback will do this for every cpu - * as it comes online - */ - err = process_zones(smp_processor_id()); - BUG_ON(err); - register_cpu_notifier(&pageset_notifier); -} - -#endif - static noinline __init_refok int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) { @@ -3263,7 +3206,7 @@ static int __zone_pcp_update(void *data) struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; - pset = zone_pcp(zone, cpu); + pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; local_irq_save(flags); @@ -3281,21 +3224,17 @@ void zone_pcp_update(struct zone *zone) static __meminit void zone_pcp_init(struct zone *zone) { - int cpu; - unsigned long batch = zone_batchsize(zone); + /* + * per cpu subsystem is not up at this point. The following code + * relies on the ability of the linker to provide the + * offset of a (static) per cpu variable into the per cpu area. + */ + zone->pageset = &boot_pageset; - for (cpu = 0; cpu < NR_CPUS; cpu++) { -#ifdef CONFIG_NUMA - /* Early boot. Slab allocator not functional yet */ - zone_pcp(zone, cpu) = &boot_pageset[cpu]; - setup_pageset(&boot_pageset[cpu],0); -#else - setup_pageset(zone_pcp(zone,cpu), batch); -#endif - } if (zone->present_pages) - printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", - zone->name, zone->present_pages, batch); + printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n", + zone->name, zone->present_pages, + zone_batchsize(zone)); } __meminit int init_currently_empty_zone(struct zone *zone, @@ -4809,10 +4748,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, if (!write || (ret == -EINVAL)) return ret; for_each_populated_zone(zone) { - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { unsigned long high; high = zone->present_pages / percpu_pagelist_fraction; - setup_pagelist_highmark(zone_pcp(zone, cpu), high); + setup_pagelist_highmark( + per_cpu_ptr(zone->pageset, cpu), high); } } return 0; diff --git a/mm/vmstat.c b/mm/vmstat.c index 6051fbab67ba..1ba0bb7ad043 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -139,7 +139,8 @@ static void refresh_zone_stat_thresholds(void) threshold = calculate_threshold(zone); for_each_online_cpu(cpu) - zone_pcp(zone, cpu)->stat_threshold = threshold; + per_cpu_ptr(zone->pageset, cpu)->stat_threshold + = threshold; } } @@ -149,7 +150,8 @@ static void refresh_zone_stat_thresholds(void) void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, int delta) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); + s8 *p = pcp->vm_stat_diff + item; long x; @@ -202,7 +204,7 @@ EXPORT_SYMBOL(mod_zone_page_state); */ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); s8 *p = pcp->vm_stat_diff + item; (*p)++; @@ -223,7 +225,7 @@ EXPORT_SYMBOL(__inc_zone_page_state); void __dec_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); s8 *p = pcp->vm_stat_diff + item; (*p)--; @@ -300,7 +302,7 @@ void refresh_cpu_vm_stats(int cpu) for_each_populated_zone(zone) { struct per_cpu_pageset *p; - p = zone_pcp(zone, cpu); + p = per_cpu_ptr(zone->pageset, cpu); for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) if (p->vm_stat_diff[i]) { @@ -741,7 +743,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, for_each_online_cpu(i) { struct per_cpu_pageset *pageset; - pageset = zone_pcp(zone, i); + pageset = per_cpu_ptr(zone->pageset, i); seq_printf(m, "\n cpu: %i" "\n count: %i" -- cgit v1.2.3 From ad596925eaf9a48ed61bc9210088828f1f8e0552 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:51 +0900 Subject: this_cpu: Remove pageset_notifier Remove the pageset notifier since it only marks that a processor exists on a specific node. Move that code into the vmstat notifier. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- mm/vmstat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/vmstat.c b/mm/vmstat.c index 1ba0bb7ad043..fc5aa183bc45 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -908,6 +908,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, case CPU_ONLINE: case CPU_ONLINE_FROZEN: start_cpu_timer(cpu); + node_set_state(cpu_to_node(cpu), N_CPU); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: -- cgit v1.2.3 From dbfc196a3cc1a2514ad0737a82f764de23bd65e6 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 4 Jan 2010 16:34:46 -0600 Subject: local_t: Remove leftover local.h Somehow the local.h was not removed when taking out the local_t usage during the 2.6.32 merge. CC: David Miller Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/sparc/kernel/nmi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 2ad288ff99a4..9b9f5b4e2994 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -21,7 +21,6 @@ #include #include -#include #include /* We don't have a real NMI on sparc64, but we can fake one -- cgit v1.2.3