summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2008-11-06 14:49:05 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2008-11-06 14:49:05 +1100
commitaa214b2c4e8376b2762c864dea592c8c9b61157b (patch)
treebfc251c21aece7b9bf0636a29bee791319bc5d3b
parent28cff3a69b0cb843c533f72506c431b808f677a2 (diff)
parent5bac7c164167e3425d8115a8f938cbd76c6e2f7e (diff)
Merge commit 'cpu_alloc/cpu_alloc'
Conflicts: kernel/lockdep.c kernel/module.c
-rw-r--r--Documentation/kernel-parameters.txt7
-rw-r--r--arch/ia64/include/asm/percpu.h2
-rw-r--r--arch/powerpc/kernel/setup_64.c4
-rw-r--r--arch/sparc64/kernel/smp.c2
-rw-r--r--arch/x86/kernel/setup_percpu.c2
-rw-r--r--include/linux/module.h1
-rw-r--r--include/linux/percpu.h67
-rw-r--r--include/linux/vmstat.h2
-rw-r--r--init/main.c18
-rw-r--r--kernel/lockdep.c2
-rw-r--r--kernel/module.c173
-rw-r--r--mm/Makefile2
-rw-r--r--mm/cpu_alloc.c203
-rw-r--r--mm/vmstat.c1
14 files changed, 306 insertions, 180 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b641c1736a6e..59985b114697 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1766,6 +1766,13 @@ and is between 256 and 4096 characters. It is defined in the file
Format: { 0 | 1 }
See arch/parisc/kernel/pdc_chassis.c
+ percpu= Configure the number of percpu bytes that can be
+ dynamically allocated. This is used for per cpu
+ variables of modules and other dynamic per cpu data
+ structures. Creation of per cpu structures after boot
+ may fail if this is set too low.
+ Default is 8000 bytes.
+
pf. [PARIDE]
See Documentation/paride.txt.
diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
index 77f30b664b4e..f34c9db18d04 100644
--- a/arch/ia64/include/asm/percpu.h
+++ b/arch/ia64/include/asm/percpu.h
@@ -6,7 +6,7 @@
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
-#define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE
+#define PERCPU_AREA_SIZE PERCPU_PAGE_SIZE
#ifdef __ASSEMBLY__
# define THIS_CPU(var) (per_cpu__##var) /* use this to mark accesses to per-CPU variables... */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 169d74cef157..9dca8cfca384 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -600,8 +600,8 @@ void __init setup_per_cpu_areas(void)
/* Copy section for each CPU (we discard the original) */
size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE);
#ifdef CONFIG_MODULES
- if (size < PERCPU_ENOUGH_ROOM)
- size = PERCPU_ENOUGH_ROOM;
+ if (size < PERCPU_AREA_SIZE)
+ size = PERCPU_AREA_SIZE;
#endif
for_each_possible_cpu(i) {
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index e5627118e613..814c9b7f7734 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1390,7 +1390,7 @@ void __init real_setup_per_cpu_areas(void)
char *ptr;
/* Copy section for each CPU (we discard the original) */
- goal = PERCPU_ENOUGH_ROOM;
+ goal = PERCPU_AREA_SIZE;
__per_cpu_shift = PAGE_SHIFT;
for (size = PAGE_SIZE; size < goal; size <<= 1UL)
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3bb770..f6e6df4c406c 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -149,7 +149,7 @@ void __init setup_per_cpu_areas(void)
setup_cpu_pda_map();
/* Copy section for each CPU (we discard the original) */
- old_size = PERCPU_ENOUGH_ROOM;
+ old_size = PERCPU_AREA_SIZE;
align = max_t(unsigned long, PAGE_SIZE, align);
size = roundup(old_size, align);
printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
diff --git a/include/linux/module.h b/include/linux/module.h
index 3bfed013350b..dac05721458d 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -324,6 +324,7 @@ struct module
/* Per-cpu data. */
void *percpu;
+ int percpu_size;
/* The command line arguments (may be mangled). People like
keeping pointers to this stuff */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e1f87085d39a..b58ba9ff00dc 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -56,17 +56,14 @@
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
+extern unsigned int percpu_reserve;
/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
-#ifndef PERCPU_ENOUGH_ROOM
-#ifdef CONFIG_MODULES
-#define PERCPU_MODULE_RESERVE 8192
-#else
-#define PERCPU_MODULE_RESERVE 0
-#endif
+#ifndef PERCPU_AREA_SIZE
+#define PERCPU_RESERVE_SIZE 8192
-#define PERCPU_ENOUGH_ROOM \
- (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE)
-#endif /* PERCPU_ENOUGH_ROOM */
+#define PERCPU_AREA_SIZE \
+ (__per_cpu_end - __per_cpu_start + percpu_reserve)
+#endif /* PERCPU_AREA_SIZE */
/*
* Must be an lvalue. Since @var must be a simple identifier,
@@ -128,4 +125,56 @@ static inline void percpu_free(void *__pdata)
#define free_percpu(ptr) percpu_free((ptr))
#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu))
+
+/*
+ * cpu allocator definitions
+ *
+ * The cpu allocator allows allocating an instance of an object for each
+ * processor and the use of a single pointer to access all instances
+ * of the object. cpu_alloc provides optimized means for accessing the
+ * instance of the object belonging to the currently executing processor
+ * as well as special atomic operations on fields of objects of the
+ * currently executing processor.
+ *
+ * Cpu objects are typically small. The allocator packs them tightly
+ * to increase the chance on each access that a per cpu object is already
+ * cached. Alignments may be specified but the intent is to align the data
+ * properly due to cpu alignment constraints and not to avoid cacheline
+ * contention. Any holes left by aligning objects are filled up with smaller
+ * objects that are allocated later.
+ *
+ * Cpu data can be allocated using CPU_ALLOC. The resulting pointer is
+ * pointing to the instance of the variable in the per cpu area provided
+ * by the loader. It is generally an error to use the pointer directly
+ * unless we are booting the system.
+ *
+ * __GFP_ZERO may be passed as a flag to zero the allocated memory.
+ */
+
+/*
+ * Raw calls
+ */
+void *cpu_alloc(unsigned long size, gfp_t flags, unsigned long align);
+void cpu_free(void *cpu_pointer, unsigned long size);
+
+#ifndef CONFIG_SMP
+#define per_cpu_offset(x) 0
+#define SHIFT_PERCPU_PTR(__p, __offset) (__p)
+#endif
+
+/* Return a pointer to the instance of a object for a particular processor */
+#define CPU_PTR(__p, __cpu) SHIFT_PERCPU_PTR((__p), per_cpu_offset(__cpu))
+
+/*
+ * Return a pointer to the instance of the object belonging to the processor
+ * running the current code.
+ */
+#define THIS_CPU(__p) SHIFT_PERCPU_PTR((__p), my_cpu_offset)
+#define __THIS_CPU(__p) SHIFT_PERCPU_PTR((__p), __my_cpu_offset)
+
+#define CPU_ALLOC(type, flags) ((typeof(type) *)cpu_alloc(sizeof(type), (flags), \
+ __alignof__(type)))
+#define CPU_FREE(pointer) cpu_free((pointer), sizeof(*(pointer)))
+
+
#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 524cd1b28ecb..d9363da258ab 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -37,7 +37,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
FOR_ALL_ZONES(PGSCAN_KSWAPD),
FOR_ALL_ZONES(PGSCAN_DIRECT),
PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
- PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+ PAGEOUTRUN, ALLOCSTALL, PGROTATED, CPU_BYTES,
#ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
#endif
diff --git a/init/main.c b/init/main.c
index c73b0e878684..306634488147 100644
--- a/init/main.c
+++ b/init/main.c
@@ -121,6 +121,7 @@ extern void time_init(void);
/* Default late time init is NULL. archs can override this later. */
void (*late_time_init)(void);
extern void softirq_init(void);
+extern void cpu_alloc_init(void);
/* Untouched command line saved by arch-specific code. */
char __initdata boot_command_line[COMMAND_LINE_SIZE];
@@ -259,6 +260,18 @@ static int __init loglevel(char *str)
early_param("loglevel", loglevel);
+#ifdef PERCPU_RESERVE_SIZE
+unsigned int percpu_reserve = PERCPU_RESERVE_SIZE;
+
+static int __init init_percpu_reserve(char *str)
+{
+ get_option(&str, &percpu_reserve);
+ return 0;
+}
+
+early_param("percpu", init_percpu_reserve);
+#endif
+
/*
* Unknown boot options get handed to init, unless they look like
* failed parameters
@@ -402,8 +415,10 @@ static void __init setup_per_cpu_areas(void)
unsigned long nr_possible_cpus = num_possible_cpus();
/* Copy section for each CPU (we discard the original) */
- size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
+ size = ALIGN(PERCPU_AREA_SIZE, PAGE_SIZE);
ptr = alloc_bootmem_pages(size * nr_possible_cpus);
+ printk(KERN_INFO "percpu area: %d bytes total, %d available.\n",
+ size, size - (__per_cpu_end - __per_cpu_start));
for_each_possible_cpu(i) {
__per_cpu_offset[i] = ptr - __per_cpu_start;
@@ -584,6 +599,7 @@ asmlinkage void __init start_kernel(void)
unwind_setup();
setup_per_cpu_areas();
setup_nr_cpu_ids();
+ cpu_alloc_init();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
/*
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 63a9f9064fd8..0f1ebb56c654 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -643,7 +643,7 @@ static int static_obj(void *obj)
*/
for_each_possible_cpu(i) {
start = (unsigned long) __per_cpu_start + per_cpu_offset(i);
- end = (unsigned long) __per_cpu_start + PERCPU_ENOUGH_ROOM
+ end = (unsigned long) __per_cpu_start + PERCPU_AREA_SIZE
+ per_cpu_offset(i);
if ((addr >= start) && (addr < end))
diff --git a/kernel/module.c b/kernel/module.c
index 48c323c97ae6..c228fb7e317a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -366,121 +366,6 @@ static struct module *find_module(const char *name)
return NULL;
}
-#ifdef CONFIG_SMP
-/* Number of blocks used and allocated. */
-static unsigned int pcpu_num_used, pcpu_num_allocated;
-/* Size of each block. -ve means used. */
-static int *pcpu_size;
-
-static int split_block(unsigned int i, unsigned short size)
-{
- /* Reallocation required? */
- if (pcpu_num_used + 1 > pcpu_num_allocated) {
- int *new;
-
- new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2,
- GFP_KERNEL);
- if (!new)
- return 0;
-
- pcpu_num_allocated *= 2;
- pcpu_size = new;
- }
-
- /* Insert a new subblock */
- memmove(&pcpu_size[i+1], &pcpu_size[i],
- sizeof(pcpu_size[0]) * (pcpu_num_used - i));
- pcpu_num_used++;
-
- pcpu_size[i+1] -= size;
- pcpu_size[i] = size;
- return 1;
-}
-
-static inline unsigned int block_size(int val)
-{
- if (val < 0)
- return -val;
- return val;
-}
-
-static void *percpu_modalloc(unsigned long size, unsigned long align,
- const char *name)
-{
- unsigned long extra;
- unsigned int i;
- void *ptr;
-
- if (align > PAGE_SIZE) {
- printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
- name, align, PAGE_SIZE);
- align = PAGE_SIZE;
- }
-
- ptr = __per_cpu_load;
- for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
- /* Extra for alignment requirement. */
- extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr;
- BUG_ON(i == 0 && extra != 0);
-
- if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size)
- continue;
-
- /* Transfer extra to previous block. */
- if (pcpu_size[i-1] < 0)
- pcpu_size[i-1] -= extra;
- else
- pcpu_size[i-1] += extra;
- pcpu_size[i] -= extra;
- ptr += extra;
-
- /* Split block if warranted */
- if (pcpu_size[i] - size > sizeof(unsigned long))
- if (!split_block(i, size))
- return NULL;
-
- /* Mark allocated */
- pcpu_size[i] = -pcpu_size[i];
- return ptr;
- }
-
- printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
- size);
- return NULL;
-}
-
-static void percpu_modfree(void *freeme)
-{
- unsigned int i;
- void *ptr = __per_cpu_load + block_size(pcpu_size[0]);
-
- /* First entry is core kernel percpu data. */
- for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
- if (ptr == freeme) {
- pcpu_size[i] = -pcpu_size[i];
- goto free;
- }
- }
- BUG();
-
- free:
- /* Merge with previous? */
- if (pcpu_size[i-1] >= 0) {
- pcpu_size[i-1] += pcpu_size[i];
- pcpu_num_used--;
- memmove(&pcpu_size[i], &pcpu_size[i+1],
- (pcpu_num_used - i) * sizeof(pcpu_size[0]));
- i--;
- }
- /* Merge with next? */
- if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) {
- pcpu_size[i] += pcpu_size[i+1];
- pcpu_num_used--;
- memmove(&pcpu_size[i+1], &pcpu_size[i+2],
- (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0]));
- }
-}
-
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
const char *secstrings)
@@ -496,48 +381,6 @@ static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
memcpy(pcpudest + per_cpu_offset(cpu), from, size);
}
-static int percpu_modinit(void)
-{
- pcpu_num_used = 2;
- pcpu_num_allocated = 2;
- pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
- GFP_KERNEL);
- /* Static in-kernel percpu data (used). */
- pcpu_size[0] = -__per_cpu_size;
- /* Free room. */
- pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
- if (pcpu_size[1] < 0) {
- printk(KERN_ERR "No per-cpu room for modules.\n");
- pcpu_num_used = 1;
- }
-
- return 0;
-}
-__initcall(percpu_modinit);
-#else /* ... !CONFIG_SMP */
-static inline void *percpu_modalloc(unsigned long size, unsigned long align,
- const char *name)
-{
- return NULL;
-}
-static inline void percpu_modfree(void *pcpuptr)
-{
- BUG();
-}
-static inline unsigned int find_pcpusec(Elf_Ehdr *hdr,
- Elf_Shdr *sechdrs,
- const char *secstrings)
-{
- return 0;
-}
-static inline void percpu_modcopy(void *pcpudst, const void *src,
- unsigned long size)
-{
- /* pcpusec should be 0, and size of that section should be 0. */
- BUG_ON(size != 0);
-}
-#endif /* CONFIG_SMP */
-
#define MODINFO_ATTR(field) \
static void setup_modinfo_##field(struct module *mod, const char *s) \
{ \
@@ -1455,7 +1298,7 @@ static void free_module(struct module *mod)
module_free(mod, mod->module_init);
kfree(mod->args);
if (mod->percpu)
- percpu_modfree(mod->percpu);
+ cpu_free(mod->percpu, mod->percpu_size);
/* Free lock-classes: */
lockdep_free_key_range(mod->module_core, mod->core_size);
@@ -1853,6 +1696,7 @@ static noinline struct module *load_module(void __user *umod,
struct kernel_param *kp;
struct module *mod;
long err = 0;
+ unsigned long percpu_size = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
unsigned long *mseg;
mm_segment_t old_fs;
@@ -1994,15 +1838,20 @@ static noinline struct module *load_module(void __user *umod,
if (pcpuindex) {
/* We have a special allocation for this section. */
- percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
- sechdrs[pcpuindex].sh_addralign,
- mod->name);
+ unsigned long align = sechdrs[pcpuindex].sh_addralign;
+
+ percpu_size = sechdrs[pcpuindex].sh_size;
+ percpu = cpu_alloc(percpu_size, GFP_KERNEL|__GFP_ZERO, align);
+ if (!percpu)
+ printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
+ percpu_size);
if (!percpu) {
err = -ENOMEM;
goto free_mod;
}
sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
mod->percpu = percpu;
+ mod->percpu_size = percpu_size;
}
/* Determine total sizes, and put offsets in sh_entsize. For now
@@ -2275,7 +2124,7 @@ static noinline struct module *load_module(void __user *umod,
module_free(mod, mod->module_core);
free_percpu:
if (percpu)
- percpu_modfree(percpu);
+ cpu_free(percpu, percpu_size);
free_mod:
kfree(args);
free_hdr:
diff --git a/mm/Makefile b/mm/Makefile
index f35fcc3b6f30..d621b994513c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
maccess.o page_alloc.o page-writeback.o pdflush.o \
readahead.o swap.o truncate.o vmscan.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
- page_isolation.o mm_init.o $(mmu-y)
+ page_isolation.o mm_init.o cpu_alloc.o $(mmu-y)
obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
obj-$(CONFIG_BOUNCE) += bounce.o
diff --git a/mm/cpu_alloc.c b/mm/cpu_alloc.c
new file mode 100644
index 000000000000..5f3e784e5903
--- /dev/null
+++ b/mm/cpu_alloc.c
@@ -0,0 +1,203 @@
+/*
+ * Cpu allocator - Manage objects allocated for each processor
+ *
+ * (C) 2008 SGI, Christoph Lameter <cl@linux-foundation.org>
+ * Basic implementation with allocation and free from a dedicated per
+ * cpu area.
+ *
+ * The per cpu allocator allows a dynamic allocation of a piece of memory on
+ * every processor. A bitmap is used to track used areas.
+ * The allocator implements tight packing to reduce the cache footprint
+ * and increase speed since cacheline contention is typically not a concern
+ * for memory mainly used by a single cpu. Small objects will fill up gaps
+ * left by larger allocations that required alignments.
+ */
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/bitmap.h>
+#include <asm/sections.h>
+#include <linux/bootmem.h>
+
+/*
+ * Basic allocation unit. A bit map is created to track the use of each
+ * UNIT_SIZE element in the cpu area.
+ */
+#define UNIT_TYPE int
+#define UNIT_SIZE sizeof(UNIT_TYPE)
+
+#ifdef CONFIG_SMP
+/*
+ * cpu_alloc area immediately follows the percpu area that is allocated for
+ * each processor.
+ */
+#define cpu_alloc_start ((int *)__per_cpu_end)
+#else
+/* cpu_alloc area is separately allocated for UP */
+static int *cpu_alloc_start;
+#endif
+
+/*
+ * How many units are needed for an object of a given size
+ */
+static int size_to_units(unsigned long size)
+{
+ return DIV_ROUND_UP(size, UNIT_SIZE);
+}
+
+/*
+ * Lock to protect the bitmap and the meta data for the cpu allocator.
+ */
+static DEFINE_SPINLOCK(cpu_alloc_map_lock);
+static unsigned long *cpu_alloc_map;
+static unsigned long nr_units; /* Number of available units */
+static unsigned long first_free; /* First known free unit */
+static unsigned long base_percpu_in_units; /* Size of base percpu area in units */
+
+/*
+ * Mark an object as used in the cpu_alloc_map
+ *
+ * Must hold cpu_alloc_map_lock
+ */
+static void set_map(int start, int length)
+{
+ while (length-- > 0)
+ __set_bit(start++, cpu_alloc_map);
+}
+
+/*
+ * Mark an area as freed.
+ *
+ * Must hold cpu_alloc_map_lock
+ */
+static void clear_map(int start, int length)
+{
+ while (length-- > 0)
+ __clear_bit(start++, cpu_alloc_map);
+}
+
+/*
+ * Allocate an object of a certain size
+ *
+ * Returns a special pointer that can be used with CPU_PTR to find the
+ * address of the object for a certain cpu.
+ */
+void *cpu_alloc(unsigned long size, gfp_t gfpflags, unsigned long align)
+{
+ unsigned long start;
+ int units = size_to_units(size);
+ void *ptr;
+ int first;
+ unsigned long flags;
+
+ if (!size)
+ return ZERO_SIZE_PTR;
+
+ WARN_ON(align > PAGE_SIZE);
+
+ if (align < UNIT_SIZE)
+ align = UNIT_SIZE;
+
+ spin_lock_irqsave(&cpu_alloc_map_lock, flags);
+
+ first = 1;
+ start = first_free;
+
+ for ( ; ; ) {
+
+ start = find_next_zero_bit(cpu_alloc_map, nr_units, start);
+ if (start >= nr_units)
+ goto out_of_memory;
+
+ if (first)
+ first_free = start;
+
+ /*
+ * Check alignment and that there is enough space after
+ * the starting unit.
+ */
+ if ((base_percpu_in_units + start) %
+ (align / UNIT_SIZE) == 0 &&
+ find_next_bit(cpu_alloc_map, nr_units, start + 1)
+ >= start + units)
+ break;
+ start++;
+ first = 0;
+ }
+
+ if (first)
+ first_free = start + units;
+
+ if (start + units > nr_units)
+ goto out_of_memory;
+
+ set_map(start, units);
+ __count_vm_events(CPU_BYTES, units * UNIT_SIZE);
+
+ spin_unlock_irqrestore(&cpu_alloc_map_lock, flags);
+
+ ptr = cpu_alloc_start + start;
+
+ if (gfpflags & __GFP_ZERO) {
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ memset(CPU_PTR(ptr, cpu), 0, size);
+ }
+
+ return ptr;
+
+out_of_memory:
+ spin_unlock_irqrestore(&cpu_alloc_map_lock, flags);
+ return NULL;
+}
+EXPORT_SYMBOL(cpu_alloc);
+
+/*
+ * Free an object. The pointer must be a cpu pointer allocated
+ * via cpu_alloc.
+ */
+void cpu_free(void *start, unsigned long size)
+{
+ unsigned long units = size_to_units(size);
+ unsigned long index = (int *)start - cpu_alloc_start;
+ unsigned long flags;
+
+ if (!start || start == ZERO_SIZE_PTR)
+ return;
+
+ if (WARN_ON(index >= nr_units))
+ return;
+
+ if (WARN_ON(!test_bit(index, cpu_alloc_map) ||
+ !test_bit(index + units - 1, cpu_alloc_map)))
+ return;
+
+ spin_lock_irqsave(&cpu_alloc_map_lock, flags);
+
+ clear_map(index, units);
+ __count_vm_events(CPU_BYTES, -units * UNIT_SIZE);
+
+ if (index < first_free)
+ first_free = index;
+
+ spin_unlock_irqrestore(&cpu_alloc_map_lock, flags);
+}
+EXPORT_SYMBOL(cpu_free);
+
+
+void __init cpu_alloc_init(void)
+{
+#ifdef CONFIG_SMP
+ base_percpu_in_units = (__per_cpu_end - __per_cpu_start
+ + UNIT_SIZE - 1) / UNIT_SIZE;
+#endif
+ nr_units = PERCPU_AREA_SIZE / UNIT_SIZE - base_percpu_in_units;
+
+ cpu_alloc_map = alloc_bootmem(BITS_TO_LONGS(nr_units));
+#ifndef CONFIG_SMP
+ cpu_alloc_start = alloc_bootmem(nr_units * UNIT_SIZE);
+#endif
+}
+
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c4b7280e6e70..28368657aff4 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -701,6 +701,7 @@ static const char * const vmstat_text[] = {
"allocstall",
"pgrotated",
+ "cpu_bytes",
#ifdef CONFIG_HUGETLB_PAGE
"htlb_buddy_alloc_success",
"htlb_buddy_alloc_fail",