From 6b588c18f8dacfa6d7957c33c5ff832096e752d3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:07 +0900 Subject: module: reorder module pcpu related functions Impact: cleanup Move percpu_modinit() upwards. This is to ease further changes. Signed-off-by: Tejun Heo --- kernel/module.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index ba22484a987e..52b3497b8748 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -480,21 +480,6 @@ static void percpu_modfree(void *freeme) } } -static unsigned int find_pcpusec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) -{ - return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); -} - -static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) -{ - int cpu; - - for_each_possible_cpu(cpu) - memcpy(pcpudest + per_cpu_offset(cpu), from, size); -} - static int percpu_modinit(void) { pcpu_num_used = 2; @@ -513,7 +498,24 @@ static int percpu_modinit(void) return 0; } __initcall(percpu_modinit); + +static unsigned int find_pcpusec(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + const char *secstrings) +{ + return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); +} + +static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) +{ + int cpu; + + for_each_possible_cpu(cpu) + memcpy(pcpudest + per_cpu_offset(cpu), from, size); +} + #else /* ... !CONFIG_SMP */ + static inline void *percpu_modalloc(unsigned long size, unsigned long align, const char *name) { @@ -535,6 +537,7 @@ static inline void percpu_modcopy(void *pcpudst, const void *src, /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); } + #endif /* CONFIG_SMP */ #define MODINFO_ATTR(field) \ -- cgit v1.2.3 From fbf59bc9d74d1fb30b8e0630743aff2806eafcea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: percpu: implement new dynamic percpu allocator Impact: new scalable dynamic percpu allocator which allows dynamic percpu areas to be accessed the same way as static ones Implement scalable dynamic percpu allocator which can be used for both static and dynamic percpu areas. This will allow static and dynamic areas to share faster direct access methods. This feature is optional and enabled only when CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is defined by arch. Please read comment on top of mm/percpu.c for details. Signed-off-by: Tejun Heo Cc: Andrew Morton --- include/linux/percpu.h | 22 +- kernel/module.c | 31 ++ mm/Makefile | 4 + mm/percpu.c | 890 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 943 insertions(+), 4 deletions(-) create mode 100644 mm/percpu.c (limited to 'kernel/module.c') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d99e24ae1811..18080995ff3e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -76,23 +76,37 @@ #ifdef CONFIG_SMP -struct percpu_data { - void *ptrs[1]; -}; +#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA -#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) +extern void *pcpu_base_addr; +typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); + +extern size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, + struct page **pages, size_t cpu_size); /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's * version should probably be combined with get_cpu()/put_cpu(). */ +#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) + +#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + +struct percpu_data { + void *ptrs[1]; +}; + +#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) + #define per_cpu_ptr(ptr, cpu) \ ({ \ struct percpu_data *__p = __percpu_disguise(ptr); \ (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) +#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + extern void *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void *__pdata); diff --git a/kernel/module.c b/kernel/module.c index 52b3497b8748..1f0657ae555b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -51,6 +51,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -366,6 +367,34 @@ static struct module *find_module(const char *name) } #ifdef CONFIG_SMP + +#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA + +static void *percpu_modalloc(unsigned long size, unsigned long align, + const char *name) +{ + void *ptr; + + if (align > PAGE_SIZE) { + printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", + name, align, PAGE_SIZE); + align = PAGE_SIZE; + } + + ptr = __alloc_percpu(size, align); + if (!ptr) + printk(KERN_WARNING + "Could not allocate %lu bytes percpu data\n", size); + return ptr; +} + +static void percpu_modfree(void *freeme) +{ + free_percpu(freeme); +} + +#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + /* Number of blocks used and allocated. */ static unsigned int pcpu_num_used, pcpu_num_allocated; /* Size of each block. -ve means used. */ @@ -499,6 +528,8 @@ static int percpu_modinit(void) } __initcall(percpu_modinit); +#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + static unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, const char *secstrings) diff --git a/mm/Makefile b/mm/Makefile index 72255be57f89..818569b68f46 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -30,6 +30,10 @@ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o +ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +obj-$(CONFIG_SMP) += percpu.o +else obj-$(CONFIG_SMP) += allocpercpu.o +endif obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o diff --git a/mm/percpu.c b/mm/percpu.c new file mode 100644 index 000000000000..4617d97e877c --- /dev/null +++ b/mm/percpu.c @@ -0,0 +1,890 @@ +/* + * linux/mm/percpu.c - percpu memory allocator + * + * Copyright (C) 2009 SUSE Linux Products GmbH + * Copyright (C) 2009 Tejun Heo + * + * This file is released under the GPLv2. + * + * This is percpu allocator which can handle both static and dynamic + * areas. Percpu areas are allocated in chunks in vmalloc area. Each + * chunk is consisted of num_possible_cpus() units and the first chunk + * is used for static percpu variables in the kernel image (special + * boot time alloc/init handling necessary as these areas need to be + * brought up before allocation services are running). Unit grows as + * necessary and all units grow or shrink in unison. When a chunk is + * filled up, another chunk is allocated. ie. in vmalloc area + * + * c0 c1 c2 + * ------------------- ------------------- ------------ + * | u0 | u1 | u2 | u3 | | u0 | u1 | u2 | u3 | | u0 | u1 | u + * ------------------- ...... ------------------- .... ------------ + * + * Allocation is done in offset-size areas of single unit space. Ie, + * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0, + * c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring + * percpu base registers UNIT_SIZE apart. + * + * There are usually many small percpu allocations many of them as + * small as 4 bytes. The allocator organizes chunks into lists + * according to free size and tries to allocate from the fullest one. + * Each chunk keeps the maximum contiguous area size hint which is + * guaranteed to be eqaul to or larger than the maximum contiguous + * area in the chunk. This helps the allocator not to iterate the + * chunk maps unnecessarily. + * + * Allocation state in each chunk is kept using an array of integers + * on chunk->map. A positive value in the map represents a free + * region and negative allocated. Allocation inside a chunk is done + * by scanning this map sequentially and serving the first matching + * entry. This is mostly copied from the percpu_modalloc() allocator. + * Chunks are also linked into a rb tree to ease address to chunk + * mapping during free. + * + * To use this allocator, arch code should do the followings. + * + * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA + * + * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate + * regular address to percpu pointer and back + * + * - use pcpu_setup_static() during percpu area initialization to + * setup kernel static percpu area + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define PCPU_MIN_UNIT_PAGES_SHIFT 4 /* also max alloc size */ +#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ +#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ + +struct pcpu_chunk { + struct list_head list; /* linked to pcpu_slot lists */ + struct rb_node rb_node; /* key is chunk->vm->addr */ + int free_size; /* free bytes in the chunk */ + int contig_hint; /* max contiguous size hint */ + struct vm_struct *vm; /* mapped vmalloc region */ + int map_used; /* # of map entries used */ + int map_alloc; /* # of map entries allocated */ + int *map; /* allocation map */ + struct page *page[]; /* #cpus * UNIT_PAGES */ +}; + +static int pcpu_unit_pages_shift; +static int pcpu_unit_pages; +static int pcpu_unit_shift; +static int pcpu_unit_size; +static int pcpu_chunk_size; +static int pcpu_nr_slots; +static size_t pcpu_chunk_struct_size; + +/* the address of the first chunk which starts with the kernel static area */ +void *pcpu_base_addr; +EXPORT_SYMBOL_GPL(pcpu_base_addr); + +/* the size of kernel static area */ +static int pcpu_static_size; + +/* + * One mutex to rule them all. + * + * The following mutex is grabbed in the outermost public alloc/free + * interface functions and released only when the operation is + * complete. As such, every function in this file other than the + * outermost functions are called under pcpu_mutex. + * + * It can easily be switched to use spinlock such that only the area + * allocation and page population commit are protected with it doing + * actual [de]allocation without holding any lock. However, given + * what this allocator does, I think it's better to let them run + * sequentially. + */ +static DEFINE_MUTEX(pcpu_mutex); + +static struct list_head *pcpu_slot; /* chunk list slots */ +static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ + +static int pcpu_size_to_slot(int size) +{ + int highbit = fls(size); + return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1); +} + +static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) +{ + if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int)) + return 0; + + return pcpu_size_to_slot(chunk->free_size); +} + +static int pcpu_page_idx(unsigned int cpu, int page_idx) +{ + return (cpu << pcpu_unit_pages_shift) + page_idx; +} + +static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk, + unsigned int cpu, int page_idx) +{ + return &chunk->page[pcpu_page_idx(cpu, page_idx)]; +} + +static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, + unsigned int cpu, int page_idx) +{ + return (unsigned long)chunk->vm->addr + + (pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT); +} + +static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, + int page_idx) +{ + return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; +} + +/** + * pcpu_realloc - versatile realloc + * @p: the current pointer (can be NULL for new allocations) + * @size: the current size (can be 0 for new allocations) + * @new_size: the wanted new size (can be 0 for free) + * + * More robust realloc which can be used to allocate, resize or free a + * memory area of arbitrary size. If the needed size goes over + * PAGE_SIZE, kernel VM is used. + * + * RETURNS: + * The new pointer on success, NULL on failure. + */ +static void *pcpu_realloc(void *p, size_t size, size_t new_size) +{ + void *new; + + if (new_size <= PAGE_SIZE) + new = kmalloc(new_size, GFP_KERNEL); + else + new = vmalloc(new_size); + if (new_size && !new) + return NULL; + + memcpy(new, p, min(size, new_size)); + if (new_size > size) + memset(new + size, 0, new_size - size); + + if (size <= PAGE_SIZE) + kfree(p); + else + vfree(p); + + return new; +} + +/** + * pcpu_chunk_relocate - put chunk in the appropriate chunk slot + * @chunk: chunk of interest + * @oslot: the previous slot it was on + * + * This function is called after an allocation or free changed @chunk. + * New slot according to the changed state is determined and @chunk is + * moved to the slot. + */ +static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) +{ + int nslot = pcpu_chunk_slot(chunk); + + if (oslot != nslot) { + if (oslot < nslot) + list_move(&chunk->list, &pcpu_slot[nslot]); + else + list_move_tail(&chunk->list, &pcpu_slot[nslot]); + } +} + +static struct rb_node **pcpu_chunk_rb_search(void *addr, + struct rb_node **parentp) +{ + struct rb_node **p = &pcpu_addr_root.rb_node; + struct rb_node *parent = NULL; + struct pcpu_chunk *chunk; + + while (*p) { + parent = *p; + chunk = rb_entry(parent, struct pcpu_chunk, rb_node); + + if (addr < chunk->vm->addr) + p = &(*p)->rb_left; + else if (addr > chunk->vm->addr) + p = &(*p)->rb_right; + else + break; + } + + if (parentp) + *parentp = parent; + return p; +} + +/** + * pcpu_chunk_addr_search - search for chunk containing specified address + * @addr: address to search for + * + * Look for chunk which might contain @addr. More specifically, it + * searchs for the chunk with the highest start address which isn't + * beyond @addr. + * + * RETURNS: + * The address of the found chunk. + */ +static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) +{ + struct rb_node *n, *parent; + struct pcpu_chunk *chunk; + + n = *pcpu_chunk_rb_search(addr, &parent); + if (!n) { + /* no exactly matching chunk, the parent is the closest */ + n = parent; + BUG_ON(!n); + } + chunk = rb_entry(n, struct pcpu_chunk, rb_node); + + if (addr < chunk->vm->addr) { + /* the parent was the next one, look for the previous one */ + n = rb_prev(n); + BUG_ON(!n); + chunk = rb_entry(n, struct pcpu_chunk, rb_node); + } + + return chunk; +} + +/** + * pcpu_chunk_addr_insert - insert chunk into address rb tree + * @new: chunk to insert + * + * Insert @new into address rb tree. + */ +static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) +{ + struct rb_node **p, *parent; + + p = pcpu_chunk_rb_search(new->vm->addr, &parent); + BUG_ON(*p); + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, &pcpu_addr_root); +} + +/** + * pcpu_split_block - split a map block + * @chunk: chunk of interest + * @i: index of map block to split + * @head: head size (can be 0) + * @tail: tail size (can be 0) + * + * Split the @i'th map block into two or three blocks. If @head is + * non-zero, @head bytes block is inserted before block @i moving it + * to @i+1 and reducing its size by @head bytes. + * + * If @tail is non-zero, the target block, which can be @i or @i+1 + * depending on @head, is reduced by @tail bytes and @tail byte block + * is inserted after the target block. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) +{ + int nr_extra = !!head + !!tail; + int target = chunk->map_used + nr_extra; + + /* reallocation required? */ + if (chunk->map_alloc < target) { + int new_alloc = chunk->map_alloc; + int *new; + + while (new_alloc < target) + new_alloc *= 2; + + new = pcpu_realloc(chunk->map, + chunk->map_alloc * sizeof(new[0]), + new_alloc * sizeof(new[0])); + if (!new) + return -ENOMEM; + + chunk->map_alloc = new_alloc; + chunk->map = new; + } + + /* insert a new subblock */ + memmove(&chunk->map[i + nr_extra], &chunk->map[i], + sizeof(chunk->map[0]) * (chunk->map_used - i)); + chunk->map_used += nr_extra; + + if (head) { + chunk->map[i + 1] = chunk->map[i] - head; + chunk->map[i++] = head; + } + if (tail) { + chunk->map[i++] -= tail; + chunk->map[i] = tail; + } + return 0; +} + +/** + * pcpu_alloc_area - allocate area from a pcpu_chunk + * @chunk: chunk of interest + * @size: wanted size + * @align: wanted align + * + * Try to allocate @size bytes area aligned at @align from @chunk. + * Note that this function only allocates the offset. It doesn't + * populate or map the area. + * + * RETURNS: + * Allocated offset in @chunk on success, -errno on failure. + */ +static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) +{ + int oslot = pcpu_chunk_slot(chunk); + int max_contig = 0; + int i, off; + + /* + * The static chunk initially doesn't have map attached + * because kmalloc wasn't available during init. Give it one. + */ + if (unlikely(!chunk->map)) { + chunk->map = pcpu_realloc(NULL, 0, + PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); + if (!chunk->map) + return -ENOMEM; + + chunk->map_alloc = PCPU_DFL_MAP_ALLOC; + chunk->map[chunk->map_used++] = -pcpu_static_size; + if (chunk->free_size) + chunk->map[chunk->map_used++] = chunk->free_size; + } + + for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { + bool is_last = i + 1 == chunk->map_used; + int head, tail; + + /* extra for alignment requirement */ + head = ALIGN(off, align) - off; + BUG_ON(i == 0 && head != 0); + + if (chunk->map[i] < 0) + continue; + if (chunk->map[i] < head + size) { + max_contig = max(chunk->map[i], max_contig); + continue; + } + + /* + * If head is small or the previous block is free, + * merge'em. Note that 'small' is defined as smaller + * than sizeof(int), which is very small but isn't too + * uncommon for percpu allocations. + */ + if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) { + if (chunk->map[i - 1] > 0) + chunk->map[i - 1] += head; + else { + chunk->map[i - 1] -= head; + chunk->free_size -= head; + } + chunk->map[i] -= head; + off += head; + head = 0; + } + + /* if tail is small, just keep it around */ + tail = chunk->map[i] - head - size; + if (tail < sizeof(int)) + tail = 0; + + /* split if warranted */ + if (head || tail) { + if (pcpu_split_block(chunk, i, head, tail)) + return -ENOMEM; + if (head) { + i++; + off += head; + max_contig = max(chunk->map[i - 1], max_contig); + } + if (tail) + max_contig = max(chunk->map[i + 1], max_contig); + } + + /* update hint and mark allocated */ + if (is_last) + chunk->contig_hint = max_contig; /* fully scanned */ + else + chunk->contig_hint = max(chunk->contig_hint, + max_contig); + + chunk->free_size -= chunk->map[i]; + chunk->map[i] = -chunk->map[i]; + + pcpu_chunk_relocate(chunk, oslot); + return off; + } + + chunk->contig_hint = max_contig; /* fully scanned */ + pcpu_chunk_relocate(chunk, oslot); + + /* + * Tell the upper layer that this chunk has no area left. + * Note that this is not an error condition but a notification + * to upper layer that it needs to look at other chunks. + * -ENOSPC is chosen as it isn't used in memory subsystem and + * matches the meaning in a way. + */ + return -ENOSPC; +} + +/** + * pcpu_free_area - free area to a pcpu_chunk + * @chunk: chunk of interest + * @freeme: offset of area to free + * + * Free area starting from @freeme to @chunk. Note that this function + * only modifies the allocation map. It doesn't depopulate or unmap + * the area. + */ +static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) +{ + int oslot = pcpu_chunk_slot(chunk); + int i, off; + + for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) + if (off == freeme) + break; + BUG_ON(off != freeme); + BUG_ON(chunk->map[i] > 0); + + chunk->map[i] = -chunk->map[i]; + chunk->free_size += chunk->map[i]; + + /* merge with previous? */ + if (i > 0 && chunk->map[i - 1] >= 0) { + chunk->map[i - 1] += chunk->map[i]; + chunk->map_used--; + memmove(&chunk->map[i], &chunk->map[i + 1], + (chunk->map_used - i) * sizeof(chunk->map[0])); + i--; + } + /* merge with next? */ + if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) { + chunk->map[i] += chunk->map[i + 1]; + chunk->map_used--; + memmove(&chunk->map[i + 1], &chunk->map[i + 2], + (chunk->map_used - (i + 1)) * sizeof(chunk->map[0])); + } + + chunk->contig_hint = max(chunk->map[i], chunk->contig_hint); + pcpu_chunk_relocate(chunk, oslot); +} + +/** + * pcpu_unmap - unmap pages out of a pcpu_chunk + * @chunk: chunk of interest + * @page_start: page index of the first page to unmap + * @page_end: page index of the last page to unmap + 1 + * @flush: whether to flush cache and tlb or not + * + * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. + * If @flush is true, vcache is flushed before unmapping and tlb + * after. + */ +static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, + bool flush) +{ + unsigned int last = num_possible_cpus() - 1; + unsigned int cpu; + + /* + * Each flushing trial can be very expensive, issue flush on + * the whole region at once rather than doing it for each cpu. + * This could be an overkill but is more scalable. + */ + if (flush) + flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), + pcpu_chunk_addr(chunk, last, page_end)); + + for_each_possible_cpu(cpu) + unmap_kernel_range_noflush( + pcpu_chunk_addr(chunk, cpu, page_start), + (page_end - page_start) << PAGE_SHIFT); + + /* ditto as flush_cache_vunmap() */ + if (flush) + flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), + pcpu_chunk_addr(chunk, last, page_end)); +} + +/** + * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk + * @chunk: chunk to depopulate + * @off: offset to the area to depopulate + * @size: size of the area to depopulate + * @flush: whether to flush cache and tlb or not + * + * For each cpu, depopulate and unmap pages [@page_start,@page_end) + * from @chunk. If @flush is true, vcache is flushed before unmapping + * and tlb after. + */ +static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, size_t off, + size_t size, bool flush) +{ + int page_start = PFN_DOWN(off); + int page_end = PFN_UP(off + size); + int unmap_start = -1; + int uninitialized_var(unmap_end); + unsigned int cpu; + int i; + + for (i = page_start; i < page_end; i++) { + for_each_possible_cpu(cpu) { + struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); + + if (!*pagep) + continue; + + __free_page(*pagep); + + /* + * If it's partial depopulation, it might get + * populated or depopulated again. Mark the + * page gone. + */ + *pagep = NULL; + + unmap_start = unmap_start < 0 ? i : unmap_start; + unmap_end = i + 1; + } + } + + if (unmap_start >= 0) + pcpu_unmap(chunk, unmap_start, unmap_end, flush); +} + +/** + * pcpu_map - map pages into a pcpu_chunk + * @chunk: chunk of interest + * @page_start: page index of the first page to map + * @page_end: page index of the last page to map + 1 + * + * For each cpu, map pages [@page_start,@page_end) into @chunk. + * vcache is flushed afterwards. + */ +static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) +{ + unsigned int last = num_possible_cpus() - 1; + unsigned int cpu; + int err; + + for_each_possible_cpu(cpu) { + err = map_kernel_range_noflush( + pcpu_chunk_addr(chunk, cpu, page_start), + (page_end - page_start) << PAGE_SHIFT, + PAGE_KERNEL, + pcpu_chunk_pagep(chunk, cpu, page_start)); + if (err < 0) + return err; + } + + /* flush at once, please read comments in pcpu_unmap() */ + flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start), + pcpu_chunk_addr(chunk, last, page_end)); + return 0; +} + +/** + * pcpu_populate_chunk - populate and map an area of a pcpu_chunk + * @chunk: chunk of interest + * @off: offset to the area to populate + * @size: size of the area to populate + * + * For each cpu, populate and map pages [@page_start,@page_end) into + * @chunk. The area is cleared on return. + */ +static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) +{ + const gfp_t alloc_mask = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; + int page_start = PFN_DOWN(off); + int page_end = PFN_UP(off + size); + int map_start = -1; + int map_end; + unsigned int cpu; + int i; + + for (i = page_start; i < page_end; i++) { + if (pcpu_chunk_page_occupied(chunk, i)) { + if (map_start >= 0) { + if (pcpu_map(chunk, map_start, map_end)) + goto err; + map_start = -1; + } + continue; + } + + map_start = map_start < 0 ? i : map_start; + map_end = i + 1; + + for_each_possible_cpu(cpu) { + struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); + + *pagep = alloc_pages_node(cpu_to_node(cpu), + alloc_mask, 0); + if (!*pagep) + goto err; + } + } + + if (map_start >= 0 && pcpu_map(chunk, map_start, map_end)) + goto err; + + for_each_possible_cpu(cpu) + memset(chunk->vm->addr + (cpu << pcpu_unit_shift) + off, 0, + size); + + return 0; +err: + /* likely under heavy memory pressure, give memory back */ + pcpu_depopulate_chunk(chunk, off, size, true); + return -ENOMEM; +} + +static void free_pcpu_chunk(struct pcpu_chunk *chunk) +{ + if (!chunk) + return; + if (chunk->vm) + free_vm_area(chunk->vm); + pcpu_realloc(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]), 0); + kfree(chunk); +} + +static struct pcpu_chunk *alloc_pcpu_chunk(void) +{ + struct pcpu_chunk *chunk; + + chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL); + if (!chunk) + return NULL; + + chunk->map = pcpu_realloc(NULL, 0, + PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); + chunk->map_alloc = PCPU_DFL_MAP_ALLOC; + chunk->map[chunk->map_used++] = pcpu_unit_size; + + chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); + if (!chunk->vm) { + free_pcpu_chunk(chunk); + return NULL; + } + + INIT_LIST_HEAD(&chunk->list); + chunk->free_size = pcpu_unit_size; + chunk->contig_hint = pcpu_unit_size; + + return chunk; +} + +/** + * __alloc_percpu - allocate percpu area + * @size: size of area to allocate + * @align: alignment of area (max PAGE_SIZE) + * + * Allocate percpu area of @size bytes aligned at @align. Might + * sleep. Might trigger writeouts. + * + * RETURNS: + * Percpu pointer to the allocated area on success, NULL on failure. + */ +void *__alloc_percpu(size_t size, size_t align) +{ + void *ptr = NULL; + struct pcpu_chunk *chunk; + int slot, off; + + if (unlikely(!size || size > PAGE_SIZE << PCPU_MIN_UNIT_PAGES_SHIFT || + align > PAGE_SIZE)) { + WARN(true, "illegal size (%zu) or align (%zu) for " + "percpu allocation\n", size, align); + return NULL; + } + + mutex_lock(&pcpu_mutex); + + /* allocate area */ + for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { + list_for_each_entry(chunk, &pcpu_slot[slot], list) { + if (size > chunk->contig_hint) + continue; + off = pcpu_alloc_area(chunk, size, align); + if (off >= 0) + goto area_found; + if (off != -ENOSPC) + goto out_unlock; + } + } + + /* hmmm... no space left, create a new chunk */ + chunk = alloc_pcpu_chunk(); + if (!chunk) + goto out_unlock; + pcpu_chunk_relocate(chunk, -1); + pcpu_chunk_addr_insert(chunk); + + off = pcpu_alloc_area(chunk, size, align); + if (off < 0) + goto out_unlock; + +area_found: + /* populate, map and clear the area */ + if (pcpu_populate_chunk(chunk, off, size)) { + pcpu_free_area(chunk, off); + goto out_unlock; + } + + ptr = __addr_to_pcpu_ptr(chunk->vm->addr + off); +out_unlock: + mutex_unlock(&pcpu_mutex); + return ptr; +} +EXPORT_SYMBOL_GPL(__alloc_percpu); + +static void pcpu_kill_chunk(struct pcpu_chunk *chunk) +{ + pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); + list_del(&chunk->list); + rb_erase(&chunk->rb_node, &pcpu_addr_root); + free_pcpu_chunk(chunk); +} + +/** + * free_percpu - free percpu area + * @ptr: pointer to area to free + * + * Free percpu area @ptr. Might sleep. + */ +void free_percpu(void *ptr) +{ + void *addr = __pcpu_ptr_to_addr(ptr); + struct pcpu_chunk *chunk; + int off; + + if (!ptr) + return; + + mutex_lock(&pcpu_mutex); + + chunk = pcpu_chunk_addr_search(addr); + off = addr - chunk->vm->addr; + + pcpu_free_area(chunk, off); + + /* the chunk became fully free, kill one if there are other free ones */ + if (chunk->free_size == pcpu_unit_size) { + struct pcpu_chunk *pos; + + list_for_each_entry(pos, + &pcpu_slot[pcpu_chunk_slot(chunk)], list) + if (pos != chunk) { + pcpu_kill_chunk(pos); + break; + } + } + + mutex_unlock(&pcpu_mutex); +} +EXPORT_SYMBOL_GPL(free_percpu); + +/** + * pcpu_setup_static - initialize kernel static percpu area + * @populate_pte_fn: callback to allocate pagetable + * @pages: num_possible_cpus() * PFN_UP(cpu_size) pages + * + * Initialize kernel static percpu area. The caller should allocate + * all the necessary pages and pass them in @pages. + * @populate_pte_fn() is called on each page to be used for percpu + * mapping and is responsible for making sure all the necessary page + * tables for the page is allocated. + * + * RETURNS: + * The determined pcpu_unit_size which can be used to initialize + * percpu access. + */ +size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, + struct page **pages, size_t cpu_size) +{ + static struct vm_struct static_vm; + struct pcpu_chunk *static_chunk; + int nr_cpu_pages = DIV_ROUND_UP(cpu_size, PAGE_SIZE); + unsigned int cpu; + int err, i; + + pcpu_unit_pages_shift = max_t(int, PCPU_MIN_UNIT_PAGES_SHIFT, + order_base_2(cpu_size) - PAGE_SHIFT); + + pcpu_static_size = cpu_size; + pcpu_unit_pages = 1 << pcpu_unit_pages_shift; + pcpu_unit_shift = PAGE_SHIFT + pcpu_unit_pages_shift; + pcpu_unit_size = 1 << pcpu_unit_shift; + pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; + pcpu_nr_slots = pcpu_size_to_slot(pcpu_unit_size) + 1; + pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + + (1 << pcpu_unit_pages_shift) * sizeof(struct page *); + + /* allocate chunk slots */ + pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0])); + for (i = 0; i < pcpu_nr_slots; i++) + INIT_LIST_HEAD(&pcpu_slot[i]); + + /* init and register vm area */ + static_vm.flags = VM_ALLOC; + static_vm.size = pcpu_chunk_size; + vm_area_register_early(&static_vm); + + /* init static_chunk */ + static_chunk = alloc_bootmem(pcpu_chunk_struct_size); + INIT_LIST_HEAD(&static_chunk->list); + static_chunk->vm = &static_vm; + static_chunk->free_size = pcpu_unit_size - pcpu_static_size; + static_chunk->contig_hint = static_chunk->free_size; + + /* assign pages and map them */ + for_each_possible_cpu(cpu) { + for (i = 0; i < nr_cpu_pages; i++) { + *pcpu_chunk_pagep(static_chunk, cpu, i) = *pages++; + populate_pte_fn(pcpu_chunk_addr(static_chunk, cpu, i)); + } + } + + err = pcpu_map(static_chunk, 0, nr_cpu_pages); + if (err) + panic("failed to setup static percpu area, err=%d\n", err); + + /* link static_chunk in */ + pcpu_chunk_relocate(static_chunk, -1); + pcpu_chunk_addr_insert(static_chunk); + + /* we're done */ + pcpu_base_addr = (void *)pcpu_chunk_addr(static_chunk, 0, 0); + return pcpu_unit_size; +} -- cgit v1.2.3 From edcb463997ed7b2ffa3bac76e3e75957318f2e01 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu, module: implement reserved allocation and use it for module percpu variables Impact: add reserved allocation functionality and use it for module percpu variables This patch implements reserved allocation from the first chunk. When setting up the first chunk, arch can ask to set aside certain number of bytes right after the core static area which is available only through a separate reserved allocator. This will be used primarily for module static percpu variables on architectures with limited relocation range to ensure that the module perpcu symbols are inside the relocatable range. If reserved area is requested, the first chunk becomes reserved and isn't available for regular allocation. If the first chunk also includes piggy-back dynamic allocation area, a separate chunk mapping the same region is created to serve dynamic allocation. The first one is called static first chunk and the second dynamic first chunk. Although they share the page map, their different area map initializations guarantee they serve disjoint areas according to their purposes. If arch doesn't setup reserved area, reserved allocation is handled like any other allocation. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 8 +-- include/linux/percpu.h | 10 +-- kernel/module.c | 2 +- mm/percpu.c | 153 +++++++++++++++++++++++++++++++++++------ 4 files changed, 144 insertions(+), 29 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 38e2b2a470a5..dd4eabc747c8 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -217,7 +217,7 @@ proceed: pr_info("PERCPU: Remapped at %p with large pages, static data " "%zu bytes\n", vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, PMD_SIZE, + ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 0, PMD_SIZE, pcpur_size - static_size, vm.addr, NULL); goto out_free_ar; @@ -297,7 +297,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); - return pcpu_setup_first_chunk(pcpue_get_page, static_size, + return pcpu_setup_first_chunk(pcpue_get_page, static_size, 0, pcpue_unit_size, dyn_size, pcpue_ptr, NULL); } @@ -356,8 +356,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", pcpu4k_nr_static_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, -1, -1, NULL, - pcpu4k_populate_pte); + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, -1, -1, + NULL, pcpu4k_populate_pte); goto out_free_ar; enomem: diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a96fc53bbd62..8ff15153ae20 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -117,10 +117,10 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, - ssize_t unit_size, ssize_t dyn_size, - void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn); + size_t static_size, size_t reserved_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn); /* * Use this to get to a cpu's version of the per-cpu object @@ -129,6 +129,8 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, */ #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +extern void *__alloc_reserved_percpu(size_t size, size_t align); + #else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ struct percpu_data { diff --git a/kernel/module.c b/kernel/module.c index 1f0657ae555b..f0e04d6b67d8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -381,7 +381,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, align = PAGE_SIZE; } - ptr = __alloc_percpu(size, align); + ptr = __alloc_reserved_percpu(size, align); if (!ptr) printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", size); diff --git a/mm/percpu.c b/mm/percpu.c index 5b47d9fe65f5..ef8e169b7731 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -94,6 +94,11 @@ static size_t pcpu_chunk_struct_size __read_mostly; void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); +/* optional reserved chunk, only accessible for reserved allocations */ +static struct pcpu_chunk *pcpu_reserved_chunk; +/* offset limit of the reserved chunk */ +static int pcpu_reserved_chunk_limit; + /* * One mutex to rule them all. * @@ -201,13 +206,14 @@ static void *pcpu_realloc(void *p, size_t size, size_t new_size) * * This function is called after an allocation or free changed @chunk. * New slot according to the changed state is determined and @chunk is - * moved to the slot. + * moved to the slot. Note that the reserved chunk is never put on + * chunk slots. */ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) { int nslot = pcpu_chunk_slot(chunk); - if (oslot != nslot) { + if (chunk != pcpu_reserved_chunk && oslot != nslot) { if (oslot < nslot) list_move(&chunk->list, &pcpu_slot[nslot]); else @@ -255,6 +261,15 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) struct rb_node *n, *parent; struct pcpu_chunk *chunk; + /* is it in the reserved chunk? */ + if (pcpu_reserved_chunk) { + void *start = pcpu_reserved_chunk->vm->addr; + + if (addr >= start && addr < start + pcpu_reserved_chunk_limit) + return pcpu_reserved_chunk; + } + + /* nah... search the regular ones */ n = *pcpu_chunk_rb_search(addr, &parent); if (!n) { /* no exactly matching chunk, the parent is the closest */ @@ -713,9 +728,10 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) } /** - * __alloc_percpu - allocate percpu area + * pcpu_alloc - the percpu allocator * @size: size of area to allocate in bytes * @align: alignment of area (max PAGE_SIZE) + * @reserved: allocate from the reserved chunk if available * * Allocate percpu area of @size bytes aligned at @align. Might * sleep. Might trigger writeouts. @@ -723,7 +739,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -void *__alloc_percpu(size_t size, size_t align) +static void *pcpu_alloc(size_t size, size_t align, bool reserved) { void *ptr = NULL; struct pcpu_chunk *chunk; @@ -737,7 +753,18 @@ void *__alloc_percpu(size_t size, size_t align) mutex_lock(&pcpu_mutex); - /* allocate area */ + /* serve reserved allocations from the reserved chunk if available */ + if (reserved && pcpu_reserved_chunk) { + chunk = pcpu_reserved_chunk; + if (size > chunk->contig_hint) + goto out_unlock; + off = pcpu_alloc_area(chunk, size, align); + if (off >= 0) + goto area_found; + goto out_unlock; + } + + /* search through normal chunks */ for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { list_for_each_entry(chunk, &pcpu_slot[slot], list) { if (size > chunk->contig_hint) @@ -773,8 +800,41 @@ out_unlock: mutex_unlock(&pcpu_mutex); return ptr; } + +/** + * __alloc_percpu - allocate dynamic percpu area + * @size: size of area to allocate in bytes + * @align: alignment of area (max PAGE_SIZE) + * + * Allocate percpu area of @size bytes aligned at @align. Might + * sleep. Might trigger writeouts. + * + * RETURNS: + * Percpu pointer to the allocated area on success, NULL on failure. + */ +void *__alloc_percpu(size_t size, size_t align) +{ + return pcpu_alloc(size, align, false); +} EXPORT_SYMBOL_GPL(__alloc_percpu); +/** + * __alloc_reserved_percpu - allocate reserved percpu area + * @size: size of area to allocate in bytes + * @align: alignment of area (max PAGE_SIZE) + * + * Allocate percpu area of @size bytes aligned at @align from reserved + * percpu area if arch has set it up; otherwise, allocation is served + * from the same dynamic area. Might sleep. Might trigger writeouts. + * + * RETURNS: + * Percpu pointer to the allocated area on success, NULL on failure. + */ +void *__alloc_reserved_percpu(size_t size, size_t align) +{ + return pcpu_alloc(size, align, true); +} + static void pcpu_kill_chunk(struct pcpu_chunk *chunk) { WARN_ON(chunk->immutable); @@ -826,6 +886,7 @@ EXPORT_SYMBOL_GPL(free_percpu); * pcpu_setup_first_chunk - initialize the first percpu chunk * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto * @dyn_size: free size for dynamic allocation in bytes, -1 for auto * @base_addr: mapped address, NULL for auto @@ -844,14 +905,22 @@ EXPORT_SYMBOL_GPL(free_percpu); * indicates end of pages for the cpu. Note that @get_page_fn() must * return the same number of pages for all cpus. * + * @reserved_size, if non-zero, specifies the amount of bytes to + * reserve after the static area in the first chunk. This reserves + * the first chunk such that it's available only through reserved + * percpu allocation. This is primarily used to serve module percpu + * static areas on architectures where the addressing model has + * limited offset range for symbol relocations to guarantee module + * percpu symbols fall inside the relocatable range. + * * @unit_size, if non-negative, specifies unit size and must be * aligned to PAGE_SIZE and equal to or larger than @static_size + - * @dyn_size. + * @reserved_size + @dyn_size. * * @dyn_size, if non-negative, limits the number of bytes available * for dynamic allocation in the first chunk. Specifying non-negative * value make percpu leave alone the area beyond @static_size + - * @dyn_size. + * @reserved_size + @dyn_size. * * Non-null @base_addr means that the caller already allocated virtual * region for the first chunk and mapped it. percpu must not mess @@ -861,28 +930,36 @@ EXPORT_SYMBOL_GPL(free_percpu); * @populate_pte_fn is used to populate the pagetable. NULL means the * caller already populated the pagetable. * + * If the first chunk ends up with both reserved and dynamic areas, it + * is served by two chunks - one to serve the core static and reserved + * areas and the other for the dynamic area. They share the same vm + * and page map but uses different area allocation map to stay away + * from each other. The latter chunk is circulated in the chunk slots + * and available for dynamic allocation like any other chunks. + * * RETURNS: * The determined pcpu_unit_size which can be used to initialize * percpu access. */ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, + size_t static_size, size_t reserved_size, ssize_t unit_size, ssize_t dyn_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn) { static struct vm_struct first_vm; - static int smap[2]; - struct pcpu_chunk *schunk; + static int smap[2], dmap[2]; + struct pcpu_chunk *schunk, *dchunk = NULL; unsigned int cpu; int nr_pages; int err, i; /* santiy checks */ - BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC); + BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || + ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); if (unit_size >= 0) { - BUG_ON(unit_size < static_size + + BUG_ON(unit_size < static_size + reserved_size + (dyn_size >= 0 ? dyn_size : 0)); BUG_ON(unit_size & ~PAGE_MASK); } else { @@ -895,7 +972,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, pcpu_unit_pages = unit_size >> PAGE_SHIFT; else pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, - PFN_UP(static_size)); + PFN_UP(static_size + reserved_size)); pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; @@ -903,7 +980,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); if (dyn_size < 0) - dyn_size = pcpu_unit_size - static_size; + dyn_size = pcpu_unit_size - static_size - reserved_size; /* * Allocate chunk slots. The additional last slot is for @@ -914,20 +991,49 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, for (i = 0; i < pcpu_nr_slots; i++) INIT_LIST_HEAD(&pcpu_slot[i]); - /* init static chunk */ + /* + * Initialize static chunk. If reserved_size is zero, the + * static chunk covers static area + dynamic allocation area + * in the first chunk. If reserved_size is not zero, it + * covers static area + reserved area (mostly used for module + * static percpu allocation). + */ schunk = alloc_bootmem(pcpu_chunk_struct_size); INIT_LIST_HEAD(&schunk->list); schunk->vm = &first_vm; schunk->map = smap; schunk->map_alloc = ARRAY_SIZE(smap); schunk->page = schunk->page_ar; - schunk->free_size = dyn_size; + + if (reserved_size) { + schunk->free_size = reserved_size; + pcpu_reserved_chunk = schunk; /* not for dynamic alloc */ + } else { + schunk->free_size = dyn_size; + dyn_size = 0; /* dynamic area covered */ + } schunk->contig_hint = schunk->free_size; schunk->map[schunk->map_used++] = -static_size; if (schunk->free_size) schunk->map[schunk->map_used++] = schunk->free_size; + pcpu_reserved_chunk_limit = static_size + schunk->free_size; + + /* init dynamic chunk if necessary */ + if (dyn_size) { + dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); + INIT_LIST_HEAD(&dchunk->list); + dchunk->vm = &first_vm; + dchunk->map = dmap; + dchunk->map_alloc = ARRAY_SIZE(dmap); + dchunk->page = schunk->page_ar; /* share page map with schunk */ + + dchunk->contig_hint = dchunk->free_size = dyn_size; + dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; + dchunk->map[dchunk->map_used++] = dchunk->free_size; + } + /* allocate vm address */ first_vm.flags = VM_ALLOC; first_vm.size = pcpu_chunk_size; @@ -937,12 +1043,14 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, else { /* * Pages already mapped. No need to remap into - * vmalloc area. In this case the static chunk can't - * be mapped or unmapped by percpu and is marked + * vmalloc area. In this case the first chunks can't + * be mapped or unmapped by percpu and are marked * immutable. */ first_vm.addr = base_addr; schunk->immutable = true; + if (dchunk) + dchunk->immutable = true; } /* assign pages */ @@ -978,8 +1086,13 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, } /* link the first chunk in */ - pcpu_chunk_relocate(schunk, -1); - pcpu_chunk_addr_insert(schunk); + if (!dchunk) { + pcpu_chunk_relocate(schunk, -1); + pcpu_chunk_addr_insert(schunk); + } else { + pcpu_chunk_relocate(dchunk, -1); + pcpu_chunk_addr_insert(dchunk); + } /* we're done */ pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); -- cgit v1.2.3 From 6e2b75740bed35df98b8113300579e13ed2ce848 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 16 Mar 2009 18:13:36 -0400 Subject: module: fix refptr allocation and release order Impact: fix ref-after-free crash on failed module load Fix refptr bug: Change refptr allocation and release order not to access a module data structure pointed by 'mod' after freeing mod->module_core. This bug will cause kernel panic(e.g. failed to find undefined symbols). This bug was reported on systemtap bugzilla. http://sources.redhat.com/bugzilla/show_bug.cgi?id=9927 Signed-off-by: Masami Hiramatsu Cc: Eric Dumazet Signed-off-by: Rusty Russell --- kernel/module.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index ba22484a987e..1196f5d11700 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2015,14 +2015,6 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto free_mod; -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), - mod->name); - if (!mod->refptr) { - err = -ENOMEM; - goto free_mod; - } -#endif if (pcpuindex) { /* We have a special allocation for this section. */ percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, @@ -2030,7 +2022,7 @@ static noinline struct module *load_module(void __user *umod, mod->name); if (!percpu) { err = -ENOMEM; - goto free_percpu; + goto free_mod; } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; mod->percpu = percpu; @@ -2082,6 +2074,14 @@ static noinline struct module *load_module(void __user *umod, /* Module has been moved. */ mod = (void *)sechdrs[modindex].sh_addr; +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), + mod->name); + if (!mod->refptr) { + err = -ENOMEM; + goto free_init; + } +#endif /* Now we've moved module, initialize linked lists, etc. */ module_unload_init(mod); @@ -2288,15 +2288,17 @@ static noinline struct module *load_module(void __user *umod, ftrace_release(mod->module_core, mod->core_size); free_unload: module_unload_free(mod); + free_init: +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + percpu_modfree(mod->refptr); +#endif module_free(mod, mod->module_init); free_core: module_free(mod, mod->module_core); + /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: if (percpu) percpu_modfree(percpu); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - percpu_modfree(mod->refptr); -#endif free_mod: kfree(args); free_hdr: -- cgit v1.2.3 From e9d376f0fa66bd630fe27403669c6ae6c22a868f Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 5 Feb 2009 11:51:38 -0500 Subject: dynamic debug: combine dprintk and dynamic printk This patch combines Greg Bank's dprintk() work with the existing dynamic printk patchset, we are now calling it 'dynamic debug'. The new feature of this patchset is a richer /debugfs control file interface, (an example output from my system is at the bottom), which allows fined grained control over the the debug output. The output can be controlled by function, file, module, format string, and line number. for example, enabled all debug messages in module 'nf_conntrack': echo -n 'module nf_conntrack +p' > /mnt/debugfs/dynamic_debug/control to disable them: echo -n 'module nf_conntrack -p' > /mnt/debugfs/dynamic_debug/control A further explanation can be found in the documentation patch. Signed-off-by: Greg Banks Signed-off-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 5 - include/asm-generic/vmlinux.lds.h | 15 +- include/linux/device.h | 2 +- include/linux/dynamic_debug.h | 88 +++++ include/linux/dynamic_printk.h | 93 ----- include/linux/kernel.h | 4 +- kernel/module.c | 25 +- lib/Kconfig.debug | 2 +- lib/Makefile | 2 +- lib/dynamic_debug.c | 756 ++++++++++++++++++++++++++++++++++++ lib/dynamic_printk.c | 414 -------------------- net/netfilter/nf_conntrack_pptp.c | 2 +- scripts/Makefile.lib | 2 +- 13 files changed, 867 insertions(+), 543 deletions(-) create mode 100644 include/linux/dynamic_debug.h delete mode 100644 include/linux/dynamic_printk.h create mode 100644 lib/dynamic_debug.c delete mode 100644 lib/dynamic_printk.c (limited to 'kernel/module.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 54f21a5c262b..3a1aa8a4affc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1816,11 +1816,6 @@ and is between 256 and 4096 characters. It is defined in the file autoconfiguration. Ranges are in pairs (memory base and size). - dynamic_printk Enables pr_debug()/dev_dbg() calls if - CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. - These can also be switched on/off via - /dynamic_printk/modules - print-fatal-signals= [KNL] debug: print fatal signals print-fatal-signals=1: print segfault info to diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c61fab1dd2f8..aca40b93bd28 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -80,6 +80,11 @@ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ VMLINUX_SYMBOL(__stop___tracepoints) = .; \ + /* implement dynamic printk debug */ \ + . = ALIGN(8); \ + VMLINUX_SYMBOL(__start___verbose) = .; \ + *(__verbose) \ + VMLINUX_SYMBOL(__stop___verbose) = .; \ LIKELY_PROFILE() \ BRANCH_PROFILE() @@ -309,15 +314,7 @@ CPU_DISCARD(init.data) \ CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.data) \ - MEM_DISCARD(init.rodata) \ - /* implement dynamic printk debug */ \ - VMLINUX_SYMBOL(__start___verbose_strings) = .; \ - *(__verbose_strings) \ - VMLINUX_SYMBOL(__stop___verbose_strings) = .; \ - . = ALIGN(8); \ - VMLINUX_SYMBOL(__start___verbose) = .; \ - *(__verbose) \ - VMLINUX_SYMBOL(__stop___verbose) = .; + MEM_DISCARD(init.rodata) #define INIT_TEXT \ *(.init.text) \ diff --git a/include/linux/device.h b/include/linux/device.h index f98d0cfb4f81..2918c0e8fdfd 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -582,7 +582,7 @@ extern const char *dev_driver_string(const struct device *dev); #if defined(DEBUG) #define dev_dbg(dev, format, arg...) \ dev_printk(KERN_DEBUG , dev , format , ## arg) -#elif defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#elif defined(CONFIG_DYNAMIC_DEBUG) #define dev_dbg(dev, format, ...) do { \ dynamic_dev_dbg(dev, format, ##__VA_ARGS__); \ } while (0) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h new file mode 100644 index 000000000000..07781aaa1164 --- /dev/null +++ b/include/linux/dynamic_debug.h @@ -0,0 +1,88 @@ +#ifndef _DYNAMIC_DEBUG_H +#define _DYNAMIC_DEBUG_H + +/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which + * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They + * use independent hash functions, to reduce the chance of false positives. + */ +extern long long dynamic_debug_enabled; +extern long long dynamic_debug_enabled2; + +/* + * An instance of this structure is created in a special + * ELF section at every dynamic debug callsite. At runtime, + * the special section is treated as an array of these. + */ +struct _ddebug { + /* + * These fields are used to drive the user interface + * for selecting and displaying debug callsites. + */ + const char *modname; + const char *function; + const char *filename; + const char *format; + char primary_hash; + char secondary_hash; + unsigned int lineno:24; + /* + * The flags field controls the behaviour at the callsite. + * The bits here are changed dynamically when the user + * writes commands to /dynamic_debug/ddebug + */ +#define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ +#define _DPRINTK_FLAGS_DEFAULT 0 + unsigned int flags:8; +} __attribute__((aligned(8))); + + +int ddebug_add_module(struct _ddebug *tab, unsigned int n, + const char *modname); + +#if defined(CONFIG_DYNAMIC_DEBUG) +extern int ddebug_remove_module(char *mod_name); + +#define __dynamic_dbg_enabled(dd) ({ \ + int __ret = 0; \ + if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) && \ + (dynamic_debug_enabled2 & (1LL << DEBUG_HASH2)))) \ + if (unlikely(dd.flags)) \ + __ret = 1; \ + __ret; }) + +#define dynamic_pr_debug(fmt, ...) do { \ + static struct _ddebug descriptor \ + __used \ + __attribute__((section("__verbose"), aligned(8))) = \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ + DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ + if (__dynamic_dbg_enabled(descriptor)) \ + printk(KERN_DEBUG KBUILD_MODNAME ":" fmt, \ + ##__VA_ARGS__); \ + } while (0) + + +#define dynamic_dev_dbg(dev, fmt, ...) do { \ + static struct _ddebug descriptor \ + __used \ + __attribute__((section("__verbose"), aligned(8))) = \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ + DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ + if (__dynamic_dbg_enabled(descriptor)) \ + dev_printk(KERN_DEBUG, dev, \ + KBUILD_MODNAME ": " fmt, \ + ##__VA_ARGS__); \ + } while (0) + +#else + +static inline int ddebug_remove_module(char *mod) +{ + return 0; +} + +#define dynamic_pr_debug(fmt, ...) do { } while (0) +#define dynamic_dev_dbg(dev, format, ...) do { } while (0) +#endif + +#endif diff --git a/include/linux/dynamic_printk.h b/include/linux/dynamic_printk.h deleted file mode 100644 index 2d528d009074..000000000000 --- a/include/linux/dynamic_printk.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef _DYNAMIC_PRINTK_H -#define _DYNAMIC_PRINTK_H - -#define DYNAMIC_DEBUG_HASH_BITS 6 -#define DEBUG_HASH_TABLE_SIZE (1 << DYNAMIC_DEBUG_HASH_BITS) - -#define TYPE_BOOLEAN 1 - -#define DYNAMIC_ENABLED_ALL 0 -#define DYNAMIC_ENABLED_NONE 1 -#define DYNAMIC_ENABLED_SOME 2 - -extern int dynamic_enabled; - -/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which - * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They - * use independent hash functions, to reduce the chance of false positives. - */ -extern long long dynamic_printk_enabled; -extern long long dynamic_printk_enabled2; - -struct mod_debug { - char *modname; - char *logical_modname; - char *flag_names; - int type; - int hash; - int hash2; -} __attribute__((aligned(8))); - -int register_dynamic_debug_module(char *mod_name, int type, char *share_name, - char *flags, int hash, int hash2); - -#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) -extern int unregister_dynamic_debug_module(char *mod_name); -extern int __dynamic_dbg_enabled_helper(char *modname, int type, - int value, int hash); - -#define __dynamic_dbg_enabled(module, type, value, level, hash) ({ \ - int __ret = 0; \ - if (unlikely((dynamic_printk_enabled & (1LL << DEBUG_HASH)) && \ - (dynamic_printk_enabled2 & (1LL << DEBUG_HASH2)))) \ - __ret = __dynamic_dbg_enabled_helper(module, type, \ - value, hash);\ - __ret; }) - -#define dynamic_pr_debug(fmt, ...) do { \ - static char mod_name[] \ - __attribute__((section("__verbose_strings"))) \ - = KBUILD_MODNAME; \ - static struct mod_debug descriptor \ - __used \ - __attribute__((section("__verbose"), aligned(8))) = \ - { mod_name, mod_name, NULL, TYPE_BOOLEAN, DEBUG_HASH, DEBUG_HASH2 };\ - if (__dynamic_dbg_enabled(KBUILD_MODNAME, TYPE_BOOLEAN, \ - 0, 0, DEBUG_HASH)) \ - printk(KERN_DEBUG KBUILD_MODNAME ":" fmt, \ - ##__VA_ARGS__); \ - } while (0) - -#define dynamic_dev_dbg(dev, format, ...) do { \ - static char mod_name[] \ - __attribute__((section("__verbose_strings"))) \ - = KBUILD_MODNAME; \ - static struct mod_debug descriptor \ - __used \ - __attribute__((section("__verbose"), aligned(8))) = \ - { mod_name, mod_name, NULL, TYPE_BOOLEAN, DEBUG_HASH, DEBUG_HASH2 };\ - if (__dynamic_dbg_enabled(KBUILD_MODNAME, TYPE_BOOLEAN, \ - 0, 0, DEBUG_HASH)) \ - dev_printk(KERN_DEBUG, dev, \ - KBUILD_MODNAME ": " format, \ - ##__VA_ARGS__); \ - } while (0) - -#else - -static inline int unregister_dynamic_debug_module(const char *mod_name) -{ - return 0; -} -static inline int __dynamic_dbg_enabled_helper(char *modname, int type, - int value, int hash) -{ - return 0; -} - -#define __dynamic_dbg_enabled(module, type, value, level, hash) ({ 0; }) -#define dynamic_pr_debug(fmt, ...) do { } while (0) -#define dynamic_dev_dbg(dev, format, ...) do { } while (0) -#endif - -#endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7fa371898e3e..b5496ecbec71 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include @@ -358,7 +358,7 @@ static inline char *pack_hex_byte(char *buf, u8 byte) #if defined(DEBUG) #define pr_debug(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) -#elif defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#elif defined(CONFIG_DYNAMIC_DEBUG) #define pr_debug(fmt, ...) do { \ dynamic_pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) diff --git a/kernel/module.c b/kernel/module.c index 1196f5d11700..77672233387f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -822,7 +822,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, mutex_lock(&module_mutex); /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); - unregister_dynamic_debug_module(mod->name); + ddebug_remove_module(mod->name); free_module(mod); out: @@ -1827,19 +1827,13 @@ static inline void add_kallsyms(struct module *mod, } #endif /* CONFIG_KALLSYMS */ -static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num) +static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num) { -#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG - unsigned int i; - - for (i = 0; i < num; i++) { - register_dynamic_debug_module(debug[i].modname, - debug[i].type, - debug[i].logical_modname, - debug[i].flag_names, - debug[i].hash, debug[i].hash2); - } -#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */ +#ifdef CONFIG_DYNAMIC_DEBUG + if (ddebug_add_module(debug, num, debug->modname)) + printk(KERN_ERR "dynamic debug error adding module: %s\n", + debug->modname); +#endif } static void *module_alloc_update_bounds(unsigned long size) @@ -2213,12 +2207,13 @@ static noinline struct module *load_module(void __user *umod, add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); if (!mod->taints) { - struct mod_debug *debug; + struct _ddebug *debug; unsigned int num_debug; debug = section_objs(hdr, sechdrs, secstrings, "__verbose", sizeof(*debug), &num_debug); - dynamic_printk_setup(debug, num_debug); + if (debug) + dynamic_debug_setup(debug, num_debug); } /* sechdrs[0].sh_size is always zero */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1bcf9cd4baa0..0dd1c04c7323 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -847,7 +847,7 @@ config BUILD_DOCSRC Say N if you are unsure. -config DYNAMIC_PRINTK_DEBUG +config DYNAMIC_DEBUG bool "Enable dynamic printk() call support" default n depends on PRINTK diff --git a/lib/Makefile b/lib/Makefile index 32b0e64ded27..8633d6be9d21 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -82,7 +82,7 @@ obj-$(CONFIG_HAVE_LMB) += lmb.o obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o -obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o +obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c new file mode 100644 index 000000000000..9e123ae326bc --- /dev/null +++ b/lib/dynamic_debug.c @@ -0,0 +1,756 @@ +/* + * lib/dynamic_debug.c + * + * make pr_debug()/dev_dbg() calls runtime configurable based upon their + * source module. + * + * Copyright (C) 2008 Jason Baron + * By Greg Banks + * Copyright (c) 2008 Silicon Graphics Inc. All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct _ddebug __start___verbose[]; +extern struct _ddebug __stop___verbose[]; + +/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which + * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They + * use independent hash functions, to reduce the chance of false positives. + */ +long long dynamic_debug_enabled; +EXPORT_SYMBOL_GPL(dynamic_debug_enabled); +long long dynamic_debug_enabled2; +EXPORT_SYMBOL_GPL(dynamic_debug_enabled2); + +struct ddebug_table { + struct list_head link; + char *mod_name; + unsigned int num_ddebugs; + unsigned int num_enabled; + struct _ddebug *ddebugs; +}; + +struct ddebug_query { + const char *filename; + const char *module; + const char *function; + const char *format; + unsigned int first_lineno, last_lineno; +}; + +struct ddebug_iter { + struct ddebug_table *table; + unsigned int idx; +}; + +static DEFINE_MUTEX(ddebug_lock); +static LIST_HEAD(ddebug_tables); +static int verbose = 0; + +/* Return the last part of a pathname */ +static inline const char *basename(const char *path) +{ + const char *tail = strrchr(path, '/'); + return tail ? tail+1 : path; +} + +/* format a string into buf[] which describes the _ddebug's flags */ +static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, + size_t maxlen) +{ + char *p = buf; + + BUG_ON(maxlen < 4); + if (dp->flags & _DPRINTK_FLAGS_PRINT) + *p++ = 'p'; + if (p == buf) + *p++ = '-'; + *p = '\0'; + + return buf; +} + +/* + * must be called with ddebug_lock held + */ + +static int disabled_hash(char hash, bool first_table) +{ + struct ddebug_table *dt; + char table_hash_value; + + list_for_each_entry(dt, &ddebug_tables, link) { + if (first_table) + table_hash_value = dt->ddebugs->primary_hash; + else + table_hash_value = dt->ddebugs->secondary_hash; + if (dt->num_enabled && (hash == table_hash_value)) + return 0; + } + return 1; +} + +/* + * Search the tables for _ddebug's which match the given + * `query' and apply the `flags' and `mask' to them. Tells + * the user which ddebug's were changed, or whether none + * were matched. + */ +static void ddebug_change(const struct ddebug_query *query, + unsigned int flags, unsigned int mask) +{ + int i; + struct ddebug_table *dt; + unsigned int newflags; + unsigned int nfound = 0; + char flagbuf[8]; + + /* search for matching ddebugs */ + mutex_lock(&ddebug_lock); + list_for_each_entry(dt, &ddebug_tables, link) { + + /* match against the module name */ + if (query->module != NULL && + strcmp(query->module, dt->mod_name)) + continue; + + for (i = 0 ; i < dt->num_ddebugs ; i++) { + struct _ddebug *dp = &dt->ddebugs[i]; + + /* match against the source filename */ + if (query->filename != NULL && + strcmp(query->filename, dp->filename) && + strcmp(query->filename, basename(dp->filename))) + continue; + + /* match against the function */ + if (query->function != NULL && + strcmp(query->function, dp->function)) + continue; + + /* match against the format */ + if (query->format != NULL && + strstr(dp->format, query->format) == NULL) + continue; + + /* match against the line number range */ + if (query->first_lineno && + dp->lineno < query->first_lineno) + continue; + if (query->last_lineno && + dp->lineno > query->last_lineno) + continue; + + nfound++; + + newflags = (dp->flags & mask) | flags; + if (newflags == dp->flags) + continue; + + if (!newflags) + dt->num_enabled--; + else if (!dp-flags) + dt->num_enabled++; + dp->flags = newflags; + if (newflags) { + dynamic_debug_enabled |= + (1LL << dp->primary_hash); + dynamic_debug_enabled2 |= + (1LL << dp->secondary_hash); + } else { + if (disabled_hash(dp->primary_hash, true)) + dynamic_debug_enabled &= + ~(1LL << dp->primary_hash); + if (disabled_hash(dp->secondary_hash, false)) + dynamic_debug_enabled2 &= + ~(1LL << dp->secondary_hash); + } + if (verbose) + printk(KERN_INFO + "ddebug: changed %s:%d [%s]%s %s\n", + dp->filename, dp->lineno, + dt->mod_name, dp->function, + ddebug_describe_flags(dp, flagbuf, + sizeof(flagbuf))); + } + } + mutex_unlock(&ddebug_lock); + + if (!nfound && verbose) + printk(KERN_INFO "ddebug: no matches for query\n"); +} + +/* + * Wrapper around strsep() to collapse the multiple empty tokens + * that it returns when fed sequences of separator characters. + * Now, if we had strtok_r()... + */ +static inline char *nearly_strtok_r(char **p, const char *sep) +{ + char *r; + + while ((r = strsep(p, sep)) != NULL && *r == '\0') + ; + return r; +} + +/* + * Split the buffer `buf' into space-separated words. + * Return the number of such words or <0 on error. + */ +static int ddebug_tokenize(char *buf, char *words[], int maxwords) +{ + int nwords = 0; + + while (nwords < maxwords && + (words[nwords] = nearly_strtok_r(&buf, " \t\r\n")) != NULL) + nwords++; + if (buf) + return -EINVAL; /* ran out of words[] before bytes */ + + if (verbose) { + int i; + printk(KERN_INFO "%s: split into words:", __func__); + for (i = 0 ; i < nwords ; i++) + printk(" \"%s\"", words[i]); + printk("\n"); + } + + return nwords; +} + +/* + * Parse a single line number. Note that the empty string "" + * is treated as a special case and converted to zero, which + * is later treated as a "don't care" value. + */ +static inline int parse_lineno(const char *str, unsigned int *val) +{ + char *end = NULL; + BUG_ON(str == NULL); + if (*str == '\0') { + *val = 0; + return 0; + } + *val = simple_strtoul(str, &end, 10); + return end == NULL || end == str || *end != '\0' ? -EINVAL : 0; +} + +/* + * Undo octal escaping in a string, inplace. This is useful to + * allow the user to express a query which matches a format + * containing embedded spaces. + */ +#define isodigit(c) ((c) >= '0' && (c) <= '7') +static char *unescape(char *str) +{ + char *in = str; + char *out = str; + + while (*in) { + if (*in == '\\') { + if (in[1] == '\\') { + *out++ = '\\'; + in += 2; + continue; + } else if (in[1] == 't') { + *out++ = '\t'; + in += 2; + continue; + } else if (in[1] == 'n') { + *out++ = '\n'; + in += 2; + continue; + } else if (isodigit(in[1]) && + isodigit(in[2]) && + isodigit(in[3])) { + *out++ = ((in[1] - '0')<<6) | + ((in[2] - '0')<<3) | + (in[3] - '0'); + in += 4; + continue; + } + } + *out++ = *in++; + } + *out = '\0'; + + return str; +} + +/* + * Parse words[] as a ddebug query specification, which is a series + * of (keyword, value) pairs chosen from these possibilities: + * + * func + * file + * file + * module + * format + * line + * line - // where either may be empty + */ +static int ddebug_parse_query(char *words[], int nwords, + struct ddebug_query *query) +{ + unsigned int i; + + /* check we have an even number of words */ + if (nwords % 2 != 0) + return -EINVAL; + memset(query, 0, sizeof(*query)); + + for (i = 0 ; i < nwords ; i += 2) { + if (!strcmp(words[i], "func")) + query->function = words[i+1]; + else if (!strcmp(words[i], "file")) + query->filename = words[i+1]; + else if (!strcmp(words[i], "module")) + query->module = words[i+1]; + else if (!strcmp(words[i], "format")) + query->format = unescape(words[i+1]); + else if (!strcmp(words[i], "line")) { + char *first = words[i+1]; + char *last = strchr(first, '-'); + if (last) + *last++ = '\0'; + if (parse_lineno(first, &query->first_lineno) < 0) + return -EINVAL; + if (last != NULL) { + /* range - */ + if (parse_lineno(last, &query->last_lineno) < 0) + return -EINVAL; + } else { + query->last_lineno = query->first_lineno; + } + } else { + if (verbose) + printk(KERN_ERR "%s: unknown keyword \"%s\"\n", + __func__, words[i]); + return -EINVAL; + } + } + + if (verbose) + printk(KERN_INFO "%s: q->function=\"%s\" q->filename=\"%s\" " + "q->module=\"%s\" q->format=\"%s\" q->lineno=%u-%u\n", + __func__, query->function, query->filename, + query->module, query->format, query->first_lineno, + query->last_lineno); + + return 0; +} + +/* + * Parse `str' as a flags specification, format [-+=][p]+. + * Sets up *maskp and *flagsp to be used when changing the + * flags fields of matched _ddebug's. Returns 0 on success + * or <0 on error. + */ +static int ddebug_parse_flags(const char *str, unsigned int *flagsp, + unsigned int *maskp) +{ + unsigned flags = 0; + int op = '='; + + switch (*str) { + case '+': + case '-': + case '=': + op = *str++; + break; + default: + return -EINVAL; + } + if (verbose) + printk(KERN_INFO "%s: op='%c'\n", __func__, op); + + for ( ; *str ; ++str) { + switch (*str) { + case 'p': + flags |= _DPRINTK_FLAGS_PRINT; + break; + default: + return -EINVAL; + } + } + if (flags == 0) + return -EINVAL; + if (verbose) + printk(KERN_INFO "%s: flags=0x%x\n", __func__, flags); + + /* calculate final *flagsp, *maskp according to mask and op */ + switch (op) { + case '=': + *maskp = 0; + *flagsp = flags; + break; + case '+': + *maskp = ~0U; + *flagsp = flags; + break; + case '-': + *maskp = ~flags; + *flagsp = 0; + break; + } + if (verbose) + printk(KERN_INFO "%s: *flagsp=0x%x *maskp=0x%x\n", + __func__, *flagsp, *maskp); + return 0; +} + +/* + * File_ops->write method for /dynamic_debug/conrol. Gathers the + * command text from userspace, parses and executes it. + */ +static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + unsigned int flags = 0, mask = 0; + struct ddebug_query query; +#define MAXWORDS 9 + int nwords; + char *words[MAXWORDS]; + char tmpbuf[256]; + + if (len == 0) + return 0; + /* we don't check *offp -- multiple writes() are allowed */ + if (len > sizeof(tmpbuf)-1) + return -E2BIG; + if (copy_from_user(tmpbuf, ubuf, len)) + return -EFAULT; + tmpbuf[len] = '\0'; + if (verbose) + printk(KERN_INFO "%s: read %d bytes from userspace\n", + __func__, (int)len); + + nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS); + if (nwords < 0) + return -EINVAL; + if (ddebug_parse_query(words, nwords-1, &query)) + return -EINVAL; + if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) + return -EINVAL; + + /* actually go and implement the change */ + ddebug_change(&query, flags, mask); + + *offp += len; + return len; +} + +/* + * Set the iterator to point to the first _ddebug object + * and return a pointer to that first object. Returns + * NULL if there are no _ddebugs at all. + */ +static struct _ddebug *ddebug_iter_first(struct ddebug_iter *iter) +{ + if (list_empty(&ddebug_tables)) { + iter->table = NULL; + iter->idx = 0; + return NULL; + } + iter->table = list_entry(ddebug_tables.next, + struct ddebug_table, link); + iter->idx = 0; + return &iter->table->ddebugs[iter->idx]; +} + +/* + * Advance the iterator to point to the next _ddebug + * object from the one the iterator currently points at, + * and returns a pointer to the new _ddebug. Returns + * NULL if the iterator has seen all the _ddebugs. + */ +static struct _ddebug *ddebug_iter_next(struct ddebug_iter *iter) +{ + if (iter->table == NULL) + return NULL; + if (++iter->idx == iter->table->num_ddebugs) { + /* iterate to next table */ + iter->idx = 0; + if (list_is_last(&iter->table->link, &ddebug_tables)) { + iter->table = NULL; + return NULL; + } + iter->table = list_entry(iter->table->link.next, + struct ddebug_table, link); + } + return &iter->table->ddebugs[iter->idx]; +} + +/* + * Seq_ops start method. Called at the start of every + * read() call from userspace. Takes the ddebug_lock and + * seeks the seq_file's iterator to the given position. + */ +static void *ddebug_proc_start(struct seq_file *m, loff_t *pos) +{ + struct ddebug_iter *iter = m->private; + struct _ddebug *dp; + int n = *pos; + + if (verbose) + printk(KERN_INFO "%s: called m=%p *pos=%lld\n", + __func__, m, (unsigned long long)*pos); + + mutex_lock(&ddebug_lock); + + if (!n) + return SEQ_START_TOKEN; + if (n < 0) + return NULL; + dp = ddebug_iter_first(iter); + while (dp != NULL && --n > 0) + dp = ddebug_iter_next(iter); + return dp; +} + +/* + * Seq_ops next method. Called several times within a read() + * call from userspace, with ddebug_lock held. Walks to the + * next _ddebug object with a special case for the header line. + */ +static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct ddebug_iter *iter = m->private; + struct _ddebug *dp; + + if (verbose) + printk(KERN_INFO "%s: called m=%p p=%p *pos=%lld\n", + __func__, m, p, (unsigned long long)*pos); + + if (p == SEQ_START_TOKEN) + dp = ddebug_iter_first(iter); + else + dp = ddebug_iter_next(iter); + ++*pos; + return dp; +} + +/* + * Seq_ops show method. Called several times within a read() + * call from userspace, with ddebug_lock held. Formats the + * current _ddebug as a single human-readable line, with a + * special case for the header line. + */ +static int ddebug_proc_show(struct seq_file *m, void *p) +{ + struct ddebug_iter *iter = m->private; + struct _ddebug *dp = p; + char flagsbuf[8]; + + if (verbose) + printk(KERN_INFO "%s: called m=%p p=%p\n", + __func__, m, p); + + if (p == SEQ_START_TOKEN) { + seq_puts(m, + "# filename:lineno [module]function flags format\n"); + return 0; + } + + seq_printf(m, "%s:%u [%s]%s %s \"", + dp->filename, dp->lineno, + iter->table->mod_name, dp->function, + ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf))); + seq_escape(m, dp->format, "\t\r\n\""); + seq_puts(m, "\"\n"); + + return 0; +} + +/* + * Seq_ops stop method. Called at the end of each read() + * call from userspace. Drops ddebug_lock. + */ +static void ddebug_proc_stop(struct seq_file *m, void *p) +{ + if (verbose) + printk(KERN_INFO "%s: called m=%p p=%p\n", + __func__, m, p); + mutex_unlock(&ddebug_lock); +} + +static const struct seq_operations ddebug_proc_seqops = { + .start = ddebug_proc_start, + .next = ddebug_proc_next, + .show = ddebug_proc_show, + .stop = ddebug_proc_stop +}; + +/* + * File_ops->open method for /dynamic_debug/control. Does the seq_file + * setup dance, and also creates an iterator to walk the _ddebugs. + * Note that we create a seq_file always, even for O_WRONLY files + * where it's not needed, as doing so simplifies the ->release method. + */ +static int ddebug_proc_open(struct inode *inode, struct file *file) +{ + struct ddebug_iter *iter; + int err; + + if (verbose) + printk(KERN_INFO "%s: called\n", __func__); + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (iter == NULL) + return -ENOMEM; + + err = seq_open(file, &ddebug_proc_seqops); + if (err) { + kfree(iter); + return err; + } + ((struct seq_file *) file->private_data)->private = iter; + return 0; +} + +static const struct file_operations ddebug_proc_fops = { + .owner = THIS_MODULE, + .open = ddebug_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, + .write = ddebug_proc_write +}; + +/* + * Allocate a new ddebug_table for the given module + * and add it to the global list. + */ +int ddebug_add_module(struct _ddebug *tab, unsigned int n, + const char *name) +{ + struct ddebug_table *dt; + char *new_name; + + dt = kzalloc(sizeof(*dt), GFP_KERNEL); + if (dt == NULL) + return -ENOMEM; + new_name = kstrdup(name, GFP_KERNEL); + if (new_name == NULL) { + kfree(dt); + return -ENOMEM; + } + dt->mod_name = new_name; + dt->num_ddebugs = n; + dt->num_enabled = 0; + dt->ddebugs = tab; + + mutex_lock(&ddebug_lock); + list_add_tail(&dt->link, &ddebug_tables); + mutex_unlock(&ddebug_lock); + + if (verbose) + printk(KERN_INFO "%u debug prints in module %s\n", + n, dt->mod_name); + return 0; +} +EXPORT_SYMBOL_GPL(ddebug_add_module); + +static void ddebug_table_free(struct ddebug_table *dt) +{ + list_del_init(&dt->link); + kfree(dt->mod_name); + kfree(dt); +} + +/* + * Called in response to a module being unloaded. Removes + * any ddebug_table's which point at the module. + */ +int ddebug_remove_module(char *mod_name) +{ + struct ddebug_table *dt, *nextdt; + int ret = -ENOENT; + + if (verbose) + printk(KERN_INFO "%s: removing module \"%s\"\n", + __func__, mod_name); + + mutex_lock(&ddebug_lock); + list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) { + if (!strcmp(dt->mod_name, mod_name)) { + ddebug_table_free(dt); + ret = 0; + } + } + mutex_unlock(&ddebug_lock); + return ret; +} +EXPORT_SYMBOL_GPL(ddebug_remove_module); + +static void ddebug_remove_all_tables(void) +{ + mutex_lock(&ddebug_lock); + while (!list_empty(&ddebug_tables)) { + struct ddebug_table *dt = list_entry(ddebug_tables.next, + struct ddebug_table, + link); + ddebug_table_free(dt); + } + mutex_unlock(&ddebug_lock); +} + +static int __init dynamic_debug_init(void) +{ + struct dentry *dir, *file; + struct _ddebug *iter, *iter_start; + const char *modname = NULL; + int ret = 0; + int n = 0; + + dir = debugfs_create_dir("dynamic_debug", NULL); + if (!dir) + return -ENOMEM; + file = debugfs_create_file("control", 0644, dir, NULL, + &ddebug_proc_fops); + if (!file) { + debugfs_remove(dir); + return -ENOMEM; + } + if (__start___verbose != __stop___verbose) { + iter = __start___verbose; + modname = iter->modname; + iter_start = iter; + for (; iter < __stop___verbose; iter++) { + if (strcmp(modname, iter->modname)) { + ret = ddebug_add_module(iter_start, n, modname); + if (ret) + goto out_free; + n = 0; + modname = iter->modname; + iter_start = iter; + } + n++; + } + ret = ddebug_add_module(iter_start, n, modname); + } +out_free: + if (ret) { + ddebug_remove_all_tables(); + debugfs_remove(dir); + debugfs_remove(file); + } + return 0; +} +module_init(dynamic_debug_init); diff --git a/lib/dynamic_printk.c b/lib/dynamic_printk.c deleted file mode 100644 index 165a19763dc9..000000000000 --- a/lib/dynamic_printk.c +++ /dev/null @@ -1,414 +0,0 @@ -/* - * lib/dynamic_printk.c - * - * make pr_debug()/dev_dbg() calls runtime configurable based upon their - * their source module. - * - * Copyright (C) 2008 Red Hat, Inc., Jason Baron - */ - -#include -#include -#include -#include -#include -#include - -extern struct mod_debug __start___verbose[]; -extern struct mod_debug __stop___verbose[]; - -struct debug_name { - struct hlist_node hlist; - struct hlist_node hlist2; - int hash1; - int hash2; - char *name; - int enable; - int type; -}; - -static int nr_entries; -static int num_enabled; -int dynamic_enabled = DYNAMIC_ENABLED_NONE; -static struct hlist_head module_table[DEBUG_HASH_TABLE_SIZE] = - { [0 ... DEBUG_HASH_TABLE_SIZE-1] = HLIST_HEAD_INIT }; -static struct hlist_head module_table2[DEBUG_HASH_TABLE_SIZE] = - { [0 ... DEBUG_HASH_TABLE_SIZE-1] = HLIST_HEAD_INIT }; -static DECLARE_MUTEX(debug_list_mutex); - -/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which - * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They - * use independent hash functions, to reduce the chance of false positives. - */ -long long dynamic_printk_enabled; -EXPORT_SYMBOL_GPL(dynamic_printk_enabled); -long long dynamic_printk_enabled2; -EXPORT_SYMBOL_GPL(dynamic_printk_enabled2); - -/* returns the debug module pointer. */ -static struct debug_name *find_debug_module(char *module_name) -{ - int i; - struct hlist_head *head; - struct hlist_node *node; - struct debug_name *element; - - element = NULL; - for (i = 0; i < DEBUG_HASH_TABLE_SIZE; i++) { - head = &module_table[i]; - hlist_for_each_entry_rcu(element, node, head, hlist) - if (!strcmp(element->name, module_name)) - return element; - } - return NULL; -} - -/* returns the debug module pointer. */ -static struct debug_name *find_debug_module_hash(char *module_name, int hash) -{ - struct hlist_head *head; - struct hlist_node *node; - struct debug_name *element; - - element = NULL; - head = &module_table[hash]; - hlist_for_each_entry_rcu(element, node, head, hlist) - if (!strcmp(element->name, module_name)) - return element; - return NULL; -} - -/* caller must hold mutex*/ -static int __add_debug_module(char *mod_name, int hash, int hash2) -{ - struct debug_name *new; - char *module_name; - int ret = 0; - - if (find_debug_module(mod_name)) { - ret = -EINVAL; - goto out; - } - module_name = kmalloc(strlen(mod_name) + 1, GFP_KERNEL); - if (!module_name) { - ret = -ENOMEM; - goto out; - } - module_name = strcpy(module_name, mod_name); - module_name[strlen(mod_name)] = '\0'; - new = kzalloc(sizeof(struct debug_name), GFP_KERNEL); - if (!new) { - kfree(module_name); - ret = -ENOMEM; - goto out; - } - INIT_HLIST_NODE(&new->hlist); - INIT_HLIST_NODE(&new->hlist2); - new->name = module_name; - new->hash1 = hash; - new->hash2 = hash2; - hlist_add_head_rcu(&new->hlist, &module_table[hash]); - hlist_add_head_rcu(&new->hlist2, &module_table2[hash2]); - nr_entries++; -out: - return ret; -} - -int unregister_dynamic_debug_module(char *mod_name) -{ - struct debug_name *element; - int ret = 0; - - down(&debug_list_mutex); - element = find_debug_module(mod_name); - if (!element) { - ret = -EINVAL; - goto out; - } - hlist_del_rcu(&element->hlist); - hlist_del_rcu(&element->hlist2); - synchronize_rcu(); - kfree(element->name); - if (element->enable) - num_enabled--; - kfree(element); - nr_entries--; -out: - up(&debug_list_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(unregister_dynamic_debug_module); - -int register_dynamic_debug_module(char *mod_name, int type, char *share_name, - char *flags, int hash, int hash2) -{ - struct debug_name *elem; - int ret = 0; - - down(&debug_list_mutex); - elem = find_debug_module(mod_name); - if (!elem) { - if (__add_debug_module(mod_name, hash, hash2)) - goto out; - elem = find_debug_module(mod_name); - if (dynamic_enabled == DYNAMIC_ENABLED_ALL && - !strcmp(mod_name, share_name)) { - elem->enable = true; - num_enabled++; - } - } - elem->type |= type; -out: - up(&debug_list_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(register_dynamic_debug_module); - -int __dynamic_dbg_enabled_helper(char *mod_name, int type, int value, int hash) -{ - struct debug_name *elem; - int ret = 0; - - if (dynamic_enabled == DYNAMIC_ENABLED_ALL) - return 1; - rcu_read_lock(); - elem = find_debug_module_hash(mod_name, hash); - if (elem && elem->enable) - ret = 1; - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(__dynamic_dbg_enabled_helper); - -static void set_all(bool enable) -{ - struct debug_name *e; - struct hlist_node *node; - int i; - long long enable_mask; - - for (i = 0; i < DEBUG_HASH_TABLE_SIZE; i++) { - if (module_table[i].first != NULL) { - hlist_for_each_entry(e, node, &module_table[i], hlist) { - e->enable = enable; - } - } - } - if (enable) - enable_mask = ULLONG_MAX; - else - enable_mask = 0; - dynamic_printk_enabled = enable_mask; - dynamic_printk_enabled2 = enable_mask; -} - -static int disabled_hash(int i, bool first_table) -{ - struct debug_name *e; - struct hlist_node *node; - - if (first_table) { - hlist_for_each_entry(e, node, &module_table[i], hlist) { - if (e->enable) - return 0; - } - } else { - hlist_for_each_entry(e, node, &module_table2[i], hlist2) { - if (e->enable) - return 0; - } - } - return 1; -} - -static ssize_t pr_debug_write(struct file *file, const char __user *buf, - size_t length, loff_t *ppos) -{ - char *buffer, *s, *value_str, *setting_str; - int err, value; - struct debug_name *elem = NULL; - int all = 0; - - if (length > PAGE_SIZE || length < 0) - return -EINVAL; - - buffer = (char *)__get_free_page(GFP_KERNEL); - if (!buffer) - return -ENOMEM; - - err = -EFAULT; - if (copy_from_user(buffer, buf, length)) - goto out; - - err = -EINVAL; - if (length < PAGE_SIZE) - buffer[length] = '\0'; - else if (buffer[PAGE_SIZE-1]) - goto out; - - err = -EINVAL; - down(&debug_list_mutex); - - if (strncmp("set", buffer, 3)) - goto out_up; - s = buffer + 3; - setting_str = strsep(&s, "="); - if (s == NULL) - goto out_up; - setting_str = strstrip(setting_str); - value_str = strsep(&s, " "); - if (s == NULL) - goto out_up; - s = strstrip(s); - if (!strncmp(s, "all", 3)) - all = 1; - else - elem = find_debug_module(s); - if (!strncmp(setting_str, "enable", 6)) { - value = !!simple_strtol(value_str, NULL, 10); - if (all) { - if (value) { - set_all(true); - num_enabled = nr_entries; - dynamic_enabled = DYNAMIC_ENABLED_ALL; - } else { - set_all(false); - num_enabled = 0; - dynamic_enabled = DYNAMIC_ENABLED_NONE; - } - err = 0; - } else if (elem) { - if (value && (elem->enable == 0)) { - dynamic_printk_enabled |= (1LL << elem->hash1); - dynamic_printk_enabled2 |= (1LL << elem->hash2); - elem->enable = 1; - num_enabled++; - dynamic_enabled = DYNAMIC_ENABLED_SOME; - err = 0; - printk(KERN_DEBUG - "debugging enabled for module %s\n", - elem->name); - } else if (!value && (elem->enable == 1)) { - elem->enable = 0; - num_enabled--; - if (disabled_hash(elem->hash1, true)) - dynamic_printk_enabled &= - ~(1LL << elem->hash1); - if (disabled_hash(elem->hash2, false)) - dynamic_printk_enabled2 &= - ~(1LL << elem->hash2); - if (num_enabled) - dynamic_enabled = DYNAMIC_ENABLED_SOME; - else - dynamic_enabled = DYNAMIC_ENABLED_NONE; - err = 0; - printk(KERN_DEBUG - "debugging disabled for module %s\n", - elem->name); - } - } - } - if (!err) - err = length; -out_up: - up(&debug_list_mutex); -out: - free_page((unsigned long)buffer); - return err; -} - -static void *pr_debug_seq_start(struct seq_file *f, loff_t *pos) -{ - return (*pos < DEBUG_HASH_TABLE_SIZE) ? pos : NULL; -} - -static void *pr_debug_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - (*pos)++; - if (*pos >= DEBUG_HASH_TABLE_SIZE) - return NULL; - return pos; -} - -static void pr_debug_seq_stop(struct seq_file *s, void *v) -{ - /* Nothing to do */ -} - -static int pr_debug_seq_show(struct seq_file *s, void *v) -{ - struct hlist_head *head; - struct hlist_node *node; - struct debug_name *elem; - unsigned int i = *(loff_t *) v; - - rcu_read_lock(); - head = &module_table[i]; - hlist_for_each_entry_rcu(elem, node, head, hlist) { - seq_printf(s, "%s enabled=%d", elem->name, elem->enable); - seq_printf(s, "\n"); - } - rcu_read_unlock(); - return 0; -} - -static struct seq_operations pr_debug_seq_ops = { - .start = pr_debug_seq_start, - .next = pr_debug_seq_next, - .stop = pr_debug_seq_stop, - .show = pr_debug_seq_show -}; - -static int pr_debug_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &pr_debug_seq_ops); -} - -static const struct file_operations pr_debug_operations = { - .open = pr_debug_open, - .read = seq_read, - .write = pr_debug_write, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int __init dynamic_printk_init(void) -{ - struct dentry *dir, *file; - struct mod_debug *iter; - unsigned long value; - - dir = debugfs_create_dir("dynamic_printk", NULL); - if (!dir) - return -ENOMEM; - file = debugfs_create_file("modules", 0644, dir, NULL, - &pr_debug_operations); - if (!file) { - debugfs_remove(dir); - return -ENOMEM; - } - for (value = (unsigned long)__start___verbose; - value < (unsigned long)__stop___verbose; - value += sizeof(struct mod_debug)) { - iter = (struct mod_debug *)value; - register_dynamic_debug_module(iter->modname, - iter->type, - iter->logical_modname, - iter->flag_names, iter->hash, iter->hash2); - } - if (dynamic_enabled == DYNAMIC_ENABLED_ALL) - set_all(true); - return 0; -} -module_init(dynamic_printk_init); -/* may want to move this earlier so we can get traces as early as possible */ - -static int __init dynamic_printk_setup(char *str) -{ - if (str) - return -ENOENT; - dynamic_enabled = DYNAMIC_ENABLED_ALL; - return 0; -} -/* Use early_param(), so we can get debug output as early as possible */ -early_param("dynamic_printk", dynamic_printk_setup); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 9e169ef2e854..12bd09dbd36c 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -66,7 +66,7 @@ void struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); -#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) /* PptpControlMessageType names */ const char *const pptp_msg_name[] = { "UNKNOWN_MESSAGE", diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index e06365775bdf..c18fa150b6fe 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -97,7 +97,7 @@ modname_flags = $(if $(filter 1,$(words $(modname))),\ -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") #hash values -ifdef CONFIG_DYNAMIC_PRINTK_DEBUG +ifdef CONFIG_DYNAMIC_DEBUG debug_flags = -D"DEBUG_HASH=$(shell ./scripts/basic/hash djb2 $(@D)$(modname))"\ -D"DEBUG_HASH2=$(shell ./scripts/basic/hash r5 $(@D)$(modname))" else -- cgit v1.2.3