From bebfa1013eee1d91b3242e5801cc8fbdfaf148ec Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:56:52 +0200 Subject: [PATCH] x86_64: Add compat_printk and sysctl to turn off compat layer warnings Sometimes e.g. with crashme the compat layer warnings can be noisy. Add a way to turn them off by gating all output through compat_printk that checks a global sysctl. The default is not changed. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2c0e65819448..f1a4eb1a655e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -73,6 +73,7 @@ extern int printk_ratelimit_burst; extern int pid_max_min, pid_max_max; extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; +extern int compat_log; #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) int unknown_nmi_panic; @@ -676,6 +677,16 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif +#ifdef CONFIG_COMPAT + { + .ctl_name = KERN_COMPAT_LOG, + .procname = "compat-log", + .data = &compat_log, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif { .ctl_name = 0 } }; -- cgit v1.2.3 From 4552d5dc08b79868829b4be8951b29b07284753f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Jun 2006 13:57:28 +0200 Subject: [PATCH] x86_64: reliable stack trace support These are the generic bits needed to enable reliable stack traces based on Dwarf2-like (.eh_frame) unwind information. Subsequent patches will enable x86-64 and i386 to make use of this. Thanks to Andi Kleen and Ingo Molnar, who pointed out several possibilities for improvement. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 7 + include/linux/module.h | 3 + include/linux/unwind.h | 119 +++++++ init/main.c | 2 + kernel/Makefile | 1 + kernel/module.c | 16 +- kernel/unwind.c | 915 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 12 +- 8 files changed, 1072 insertions(+), 3 deletions(-) create mode 100644 include/linux/unwind.h create mode 100644 kernel/unwind.c (limited to 'kernel') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3c5e4c2e517d..5c1ec1f84eab 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -32,6 +32,7 @@ extern const char linux_banner[]; #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) +#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define KERN_EMERG "<0>" /* system is unusable */ #define KERN_ALERT "<1>" /* action must be taken immediately */ @@ -336,6 +337,12 @@ struct sysinfo { /* Force a compilation error if condition is true */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +/* Force a compilation error if condition is true, but also produce a + result (of value 0 and type size_t), so the expression can be used + e.g. in a structure initializer (or where-ever else comma expressions + aren't permitted). */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1) + /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) diff --git a/include/linux/module.h b/include/linux/module.h index 2d366098eab5..9ebbb74b7b72 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -285,6 +285,9 @@ struct module /* The size of the executable code in each section. */ unsigned long init_text_size, core_text_size; + /* The handle returned from unwind_add_table. */ + void *unwind_info; + /* Arch-specific module values */ struct mod_arch_specific arch; diff --git a/include/linux/unwind.h b/include/linux/unwind.h new file mode 100644 index 000000000000..0295aa789ab4 --- /dev/null +++ b/include/linux/unwind.h @@ -0,0 +1,119 @@ +#ifndef _LINUX_UNWIND_H +#define _LINUX_UNWIND_H + +/* + * Copyright (C) 2002-2006 Novell, Inc. + * Jan Beulich + * This code is released under version 2 of the GNU GPL. + * + * A simple API for unwinding kernel stacks. This is used for + * debugging and error reporting purposes. The kernel doesn't need + * full-blown stack unwinding with all the bells and whistles, so there + * is not much point in implementing the full Dwarf2 unwind API. + */ + +#include + +struct module; + +#ifdef CONFIG_STACK_UNWIND + +#include + +#ifndef ARCH_UNWIND_SECTION_NAME +#define ARCH_UNWIND_SECTION_NAME ".eh_frame" +#endif + +/* + * Initialize unwind support. + */ +extern void unwind_init(void); + +extern void *unwind_add_table(struct module *, + const void *table_start, + unsigned long table_size); + +extern void unwind_remove_table(void *handle, int init_only); + +extern int unwind_init_frame_info(struct unwind_frame_info *, + struct task_struct *, + /*const*/ struct pt_regs *); + +/* + * Prepare to unwind a blocked task. + */ +extern int unwind_init_blocked(struct unwind_frame_info *, + struct task_struct *); + +/* + * Prepare to unwind the currently running thread. + */ +extern int unwind_init_running(struct unwind_frame_info *, + asmlinkage void (*callback)(struct unwind_frame_info *, + void *arg), + void *arg); + +/* + * Unwind to previous to frame. Returns 0 if successful, negative + * number in case of an error. + */ +extern int unwind(struct unwind_frame_info *); + +/* + * Unwind until the return pointer is in user-land (or until an error + * occurs). Returns 0 if successful, negative number in case of + * error. + */ +extern int unwind_to_user(struct unwind_frame_info *); + +#else + +struct unwind_frame_info {}; + +static inline void unwind_init(void) {} + +static inline void *unwind_add_table(struct module *mod, + const void *table_start, + unsigned long table_size) +{ + return NULL; +} + +static inline void unwind_remove_table(void *handle, int init_only) +{ +} + +static inline int unwind_init_frame_info(struct unwind_frame_info *info, + struct task_struct *tsk, + const struct pt_regs *regs) +{ + return -ENOSYS; +} + +static inline int unwind_init_blocked(struct unwind_frame_info *info, + struct task_struct *tsk) +{ + return -ENOSYS; +} + +static inline int unwind_init_running(struct unwind_frame_info *info, + asmlinkage void (*cb)(struct unwind_frame_info *, + void *arg), + void *arg) +{ + return -ENOSYS; +} + +static inline int unwind(struct unwind_frame_info *info) +{ + return -ENOSYS; +} + +static inline int unwind_to_user(struct unwind_frame_info *info) +{ + return -ENOSYS; +} + +#endif + +#endif /* _LINUX_UNWIND_H */ diff --git a/init/main.c b/init/main.c index f715b9b89753..f556fd0a0b66 100644 --- a/init/main.c +++ b/init/main.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -482,6 +483,7 @@ asmlinkage void __init start_kernel(void) __stop___param - __start___param, &unknown_bootoption); sort_main_extable(); + unwind_init(); trap_init(); rcu_init(); init_IRQ(); diff --git a/kernel/Makefile b/kernel/Makefile index f6ef00f4f90f..a31276e190f5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o +obj-$(CONFIG_STACK_UNWIND) += unwind.o obj-$(CONFIG_PM) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o diff --git a/kernel/module.c b/kernel/module.c index d75275de1c28..08811e26ac9d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -1051,6 +1052,8 @@ static void free_module(struct module *mod) remove_sect_attrs(mod); mod_kobject_remove(mod); + unwind_remove_table(mod->unwind_info, 0); + /* Arch-specific cleanup. */ module_arch_cleanup(mod); @@ -1412,7 +1415,7 @@ static struct module *load_module(void __user *umod, unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, exportindex, modindex, obsparmindex, infoindex, gplindex, crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, - gplfuturecrcindex; + gplfuturecrcindex, unwindex = 0; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ @@ -1502,6 +1505,9 @@ static struct module *load_module(void __user *umod, versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); +#ifdef ARCH_UNWIND_SECTION_NAME + unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); +#endif /* Don't keep modinfo section */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; @@ -1510,6 +1516,8 @@ static struct module *load_module(void __user *umod, sechdrs[symindex].sh_flags |= SHF_ALLOC; sechdrs[strindex].sh_flags |= SHF_ALLOC; #endif + if (unwindex) + sechdrs[unwindex].sh_flags |= SHF_ALLOC; /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(sechdrs, versindex, mod)) { @@ -1738,6 +1746,11 @@ static struct module *load_module(void __user *umod, goto arch_cleanup; add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); + /* Size of section 0 is 0, so this works well if no unwind info. */ + mod->unwind_info = unwind_add_table(mod, + (void *)sechdrs[unwindex].sh_addr, + sechdrs[unwindex].sh_size); + /* Get rid of temporary copy */ vfree(hdr); @@ -1836,6 +1849,7 @@ sys_init_module(void __user *umod, mod->state = MODULE_STATE_LIVE; /* Drop initial reference. */ module_put(mod); + unwind_remove_table(mod->unwind_info, 1); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; diff --git a/kernel/unwind.c b/kernel/unwind.c new file mode 100644 index 000000000000..d36bcd3ad3b5 --- /dev/null +++ b/kernel/unwind.c @@ -0,0 +1,915 @@ +/* + * Copyright (C) 2002-2006 Novell, Inc. + * Jan Beulich + * This code is released under version 2 of the GNU GPL. + * + * A simple API for unwinding kernel stacks. This is used for + * debugging and error reporting purposes. The kernel doesn't need + * full-blown stack unwinding with all the bells and whistles, so there + * is not much point in implementing the full Dwarf2 unwind API. + */ + +#include +#include +#include +#include +#include +#include +#include + +extern char __start_unwind[], __end_unwind[]; + +#define MAX_STACK_DEPTH 8 + +#define EXTRA_INFO(f) { \ + BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ + % FIELD_SIZEOF(struct unwind_frame_info, f)) \ + + offsetof(struct unwind_frame_info, f) \ + / FIELD_SIZEOF(struct unwind_frame_info, f), \ + FIELD_SIZEOF(struct unwind_frame_info, f) \ + } +#define PTREGS_INFO(f) EXTRA_INFO(regs.f) + +static const struct { + unsigned offs:BITS_PER_LONG / 2; + unsigned width:BITS_PER_LONG / 2; +} reg_info[] = { + UNW_REGISTER_INFO +}; + +#undef PTREGS_INFO +#undef EXTRA_INFO + +#ifndef REG_INVALID +#define REG_INVALID(r) (reg_info[r].width == 0) +#endif + +#define DW_CFA_nop 0x00 +#define DW_CFA_set_loc 0x01 +#define DW_CFA_advance_loc1 0x02 +#define DW_CFA_advance_loc2 0x03 +#define DW_CFA_advance_loc4 0x04 +#define DW_CFA_offset_extended 0x05 +#define DW_CFA_restore_extended 0x06 +#define DW_CFA_undefined 0x07 +#define DW_CFA_same_value 0x08 +#define DW_CFA_register 0x09 +#define DW_CFA_remember_state 0x0a +#define DW_CFA_restore_state 0x0b +#define DW_CFA_def_cfa 0x0c +#define DW_CFA_def_cfa_register 0x0d +#define DW_CFA_def_cfa_offset 0x0e +#define DW_CFA_def_cfa_expression 0x0f +#define DW_CFA_expression 0x10 +#define DW_CFA_offset_extended_sf 0x11 +#define DW_CFA_def_cfa_sf 0x12 +#define DW_CFA_def_cfa_offset_sf 0x13 +#define DW_CFA_val_offset 0x14 +#define DW_CFA_val_offset_sf 0x15 +#define DW_CFA_val_expression 0x16 +#define DW_CFA_lo_user 0x1c +#define DW_CFA_GNU_window_save 0x2d +#define DW_CFA_GNU_args_size 0x2e +#define DW_CFA_GNU_negative_offset_extended 0x2f +#define DW_CFA_hi_user 0x3f + +#define DW_EH_PE_FORM 0x07 +#define DW_EH_PE_native 0x00 +#define DW_EH_PE_leb128 0x01 +#define DW_EH_PE_data2 0x02 +#define DW_EH_PE_data4 0x03 +#define DW_EH_PE_data8 0x04 +#define DW_EH_PE_signed 0x08 +#define DW_EH_PE_ADJUST 0x70 +#define DW_EH_PE_abs 0x00 +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_indirect 0x80 +#define DW_EH_PE_omit 0xff + +typedef unsigned long uleb128_t; +typedef signed long sleb128_t; + +static struct unwind_table { + struct { + unsigned long pc; + unsigned long range; + } core, init; + const void *address; + unsigned long size; + struct unwind_table *link; + const char *name; +} root_table, *last_table; + +struct unwind_item { + enum item_location { + Nowhere, + Memory, + Register, + Value + } where; + uleb128_t value; +}; + +struct unwind_state { + uleb128_t loc, org; + const u8 *cieStart, *cieEnd; + uleb128_t codeAlign; + sleb128_t dataAlign; + struct cfa { + uleb128_t reg, offs; + } cfa; + struct unwind_item regs[ARRAY_SIZE(reg_info)]; + unsigned stackDepth:8; + unsigned version:8; + const u8 *label; + const u8 *stack[MAX_STACK_DEPTH]; +}; + +static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; + +static struct unwind_table *find_table(unsigned long pc) +{ + struct unwind_table *table; + + for (table = &root_table; table; table = table->link) + if ((pc >= table->core.pc + && pc < table->core.pc + table->core.range) + || (pc >= table->init.pc + && pc < table->init.pc + table->init.range)) + break; + + return table; +} + +static void init_unwind_table(struct unwind_table *table, + const char *name, + const void *core_start, + unsigned long core_size, + const void *init_start, + unsigned long init_size, + const void *table_start, + unsigned long table_size) +{ + table->core.pc = (unsigned long)core_start; + table->core.range = core_size; + table->init.pc = (unsigned long)init_start; + table->init.range = init_size; + table->address = table_start; + table->size = table_size; + table->link = NULL; + table->name = name; +} + +void __init unwind_init(void) +{ + init_unwind_table(&root_table, "kernel", + _text, _end - _text, + NULL, 0, + __start_unwind, __end_unwind - __start_unwind); +} + +/* Must be called with module_mutex held. */ +void *unwind_add_table(struct module *module, + const void *table_start, + unsigned long table_size) +{ + struct unwind_table *table; + + if (table_size <= 0) + return NULL; + + table = kmalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return NULL; + + init_unwind_table(table, module->name, + module->module_core, module->core_size, + module->module_init, module->init_size, + table_start, table_size); + + if (last_table) + last_table->link = table; + else + root_table.link = table; + last_table = table; + + return table; +} + +struct unlink_table_info +{ + struct unwind_table *table; + int init_only; +}; + +static int unlink_table(void *arg) +{ + struct unlink_table_info *info = arg; + struct unwind_table *table = info->table, *prev; + + for (prev = &root_table; prev->link && prev->link != table; prev = prev->link) + ; + + if (prev->link) { + if (info->init_only) { + table->init.pc = 0; + table->init.range = 0; + info->table = NULL; + } else { + prev->link = table->link; + if (!prev->link) + last_table = prev; + } + } else + info->table = NULL; + + return 0; +} + +/* Must be called with module_mutex held. */ +void unwind_remove_table(void *handle, int init_only) +{ + struct unwind_table *table = handle; + struct unlink_table_info info; + + if (!table || table == &root_table) + return; + + if (init_only && table == last_table) { + table->init.pc = 0; + table->init.range = 0; + return; + } + + info.table = table; + info.init_only = init_only; + stop_machine_run(unlink_table, &info, NR_CPUS); + + if (info.table) + kfree(table); +} + +static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) +{ + const u8 *cur = *pcur; + uleb128_t value; + unsigned shift; + + for (shift = 0, value = 0; cur < end; shift += 7) { + if (shift + 7 > 8 * sizeof(value) + && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { + cur = end + 1; + break; + } + value |= (uleb128_t)(*cur & 0x7f) << shift; + if (!(*cur++ & 0x80)) + break; + } + *pcur = cur; + + return value; +} + +static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) +{ + const u8 *cur = *pcur; + sleb128_t value; + unsigned shift; + + for (shift = 0, value = 0; cur < end; shift += 7) { + if (shift + 7 > 8 * sizeof(value) + && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { + cur = end + 1; + break; + } + value |= (sleb128_t)(*cur & 0x7f) << shift; + if (!(*cur & 0x80)) { + value |= -(*cur++ & 0x40) << shift; + break; + } + } + *pcur = cur; + + return value; +} + +static unsigned long read_pointer(const u8 **pLoc, + const void *end, + signed ptrType) +{ + unsigned long value = 0; + union { + const u8 *p8; + const u16 *p16u; + const s16 *p16s; + const u32 *p32u; + const s32 *p32s; + const unsigned long *pul; + } ptr; + + if (ptrType < 0 || ptrType == DW_EH_PE_omit) + return 0; + ptr.p8 = *pLoc; + switch(ptrType & DW_EH_PE_FORM) { + case DW_EH_PE_data2: + if (end < (const void *)(ptr.p16u + 1)) + return 0; + if(ptrType & DW_EH_PE_signed) + value = get_unaligned(ptr.p16s++); + else + value = get_unaligned(ptr.p16u++); + break; + case DW_EH_PE_data4: +#ifdef CONFIG_64BIT + if (end < (const void *)(ptr.p32u + 1)) + return 0; + if(ptrType & DW_EH_PE_signed) + value = get_unaligned(ptr.p32s++); + else + value = get_unaligned(ptr.p32u++); + break; + case DW_EH_PE_data8: + BUILD_BUG_ON(sizeof(u64) != sizeof(value)); +#else + BUILD_BUG_ON(sizeof(u32) != sizeof(value)); +#endif + case DW_EH_PE_native: + if (end < (const void *)(ptr.pul + 1)) + return 0; + value = get_unaligned(ptr.pul++); + break; + case DW_EH_PE_leb128: + BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value)); + value = ptrType & DW_EH_PE_signed + ? get_sleb128(&ptr.p8, end) + : get_uleb128(&ptr.p8, end); + if ((const void *)ptr.p8 > end) + return 0; + break; + default: + return 0; + } + switch(ptrType & DW_EH_PE_ADJUST) { + case DW_EH_PE_abs: + break; + case DW_EH_PE_pcrel: + value += (unsigned long)*pLoc; + break; + default: + return 0; + } + if ((ptrType & DW_EH_PE_indirect) + && __get_user(value, (unsigned long *)value)) + return 0; + *pLoc = ptr.p8; + + return value; +} + +static signed fde_pointer_type(const u32 *cie) +{ + const u8 *ptr = (const u8 *)(cie + 2); + unsigned version = *ptr; + + if (version != 1) + return -1; /* unsupported */ + if (*++ptr) { + const char *aug; + const u8 *end = (const u8 *)(cie + 1) + *cie; + uleb128_t len; + + /* check if augmentation size is first (and thus present) */ + if (*ptr != 'z') + return -1; + /* check if augmentation string is nul-terminated */ + if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL) + return -1; + ++ptr; /* skip terminator */ + get_uleb128(&ptr, end); /* skip code alignment */ + get_sleb128(&ptr, end); /* skip data alignment */ + /* skip return address column */ + version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end); + len = get_uleb128(&ptr, end); /* augmentation length */ + if (ptr + len < ptr || ptr + len > end) + return -1; + end = ptr + len; + while (*++aug) { + if (ptr >= end) + return -1; + switch(*aug) { + case 'L': + ++ptr; + break; + case 'P': { + signed ptrType = *ptr++; + + if (!read_pointer(&ptr, end, ptrType) || ptr > end) + return -1; + } + break; + case 'R': + return *ptr; + default: + return -1; + } + } + } + return DW_EH_PE_native|DW_EH_PE_abs; +} + +static int advance_loc(unsigned long delta, struct unwind_state *state) +{ + state->loc += delta * state->codeAlign; + + return delta > 0; +} + +static void set_rule(uleb128_t reg, + enum item_location where, + uleb128_t value, + struct unwind_state *state) +{ + if (reg < ARRAY_SIZE(state->regs)) { + state->regs[reg].where = where; + state->regs[reg].value = value; + } +} + +static int processCFI(const u8 *start, + const u8 *end, + unsigned long targetLoc, + signed ptrType, + struct unwind_state *state) +{ + union { + const u8 *p8; + const u16 *p16; + const u32 *p32; + } ptr; + int result = 1; + + if (start != state->cieStart) { + state->loc = state->org; + result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state); + if (targetLoc == 0 && state->label == NULL) + return result; + } + for (ptr.p8 = start; result && ptr.p8 < end; ) { + switch(*ptr.p8 >> 6) { + uleb128_t value; + + case 0: + switch(*ptr.p8++) { + case DW_CFA_nop: + break; + case DW_CFA_set_loc: + if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0) + result = 0; + break; + case DW_CFA_advance_loc1: + result = ptr.p8 < end && advance_loc(*ptr.p8++, state); + break; + case DW_CFA_advance_loc2: + result = ptr.p8 <= end + 2 + && advance_loc(*ptr.p16++, state); + break; + case DW_CFA_advance_loc4: + result = ptr.p8 <= end + 4 + && advance_loc(*ptr.p32++, state); + break; + case DW_CFA_offset_extended: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_val_offset: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Value, get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_offset_extended_sf: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Memory, get_sleb128(&ptr.p8, end), state); + break; + case DW_CFA_val_offset_sf: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Value, get_sleb128(&ptr.p8, end), state); + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state); + break; + case DW_CFA_register: + value = get_uleb128(&ptr.p8, end); + set_rule(value, + Register, + get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_remember_state: + if (ptr.p8 == state->label) { + state->label = NULL; + return 1; + } + if (state->stackDepth >= MAX_STACK_DEPTH) + return 0; + state->stack[state->stackDepth++] = ptr.p8; + break; + case DW_CFA_restore_state: + if (state->stackDepth) { + const uleb128_t loc = state->loc; + const u8 *label = state->label; + + state->label = state->stack[state->stackDepth - 1]; + memcpy(&state->cfa, &badCFA, sizeof(state->cfa)); + memset(state->regs, 0, sizeof(state->regs)); + state->stackDepth = 0; + result = processCFI(start, end, 0, ptrType, state); + state->loc = loc; + state->label = label; + } else + return 0; + break; + case DW_CFA_def_cfa: + state->cfa.reg = get_uleb128(&ptr.p8, end); + /*nobreak*/ + case DW_CFA_def_cfa_offset: + state->cfa.offs = get_uleb128(&ptr.p8, end); + break; + case DW_CFA_def_cfa_sf: + state->cfa.reg = get_uleb128(&ptr.p8, end); + /*nobreak*/ + case DW_CFA_def_cfa_offset_sf: + state->cfa.offs = get_sleb128(&ptr.p8, end) + * state->dataAlign; + break; + case DW_CFA_def_cfa_register: + state->cfa.reg = get_uleb128(&ptr.p8, end); + break; + /*todo case DW_CFA_def_cfa_expression: */ + /*todo case DW_CFA_expression: */ + /*todo case DW_CFA_val_expression: */ + case DW_CFA_GNU_args_size: + get_uleb128(&ptr.p8, end); + break; + case DW_CFA_GNU_negative_offset_extended: + value = get_uleb128(&ptr.p8, end); + set_rule(value, + Memory, + (uleb128_t)0 - get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_GNU_window_save: + default: + result = 0; + break; + } + break; + case 1: + result = advance_loc(*ptr.p8++ & 0x3f, state); + break; + case 2: + value = *ptr.p8++ & 0x3f; + set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); + break; + case 3: + set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); + break; + } + if (ptr.p8 > end) + result = 0; + if (result && targetLoc != 0 && targetLoc < state->loc) + return 1; + } + + return result + && ptr.p8 == end + && (targetLoc == 0 + || (/*todo While in theory this should apply, gcc in practice omits + everything past the function prolog, and hence the location + never reaches the end of the function. + targetLoc < state->loc &&*/ state->label == NULL)); +} + +/* Unwind to previous to frame. Returns 0 if successful, negative + * number in case of an error. */ +int unwind(struct unwind_frame_info *frame) +{ +#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) + const u32 *fde = NULL, *cie = NULL; + const u8 *ptr = NULL, *end = NULL; + unsigned long startLoc = 0, endLoc = 0, cfa; + unsigned i; + signed ptrType = -1; + uleb128_t retAddrReg = 0; + struct unwind_table *table; + struct unwind_state state; + + if (UNW_PC(frame) == 0) + return -EINVAL; + if ((table = find_table(UNW_PC(frame))) != NULL + && !(table->size & (sizeof(*fde) - 1))) { + unsigned long tableSize = table->size; + + for (fde = table->address; + tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; + tableSize -= sizeof(*fde) + *fde, + fde += 1 + *fde / sizeof(*fde)) { + if (!*fde || (*fde & (sizeof(*fde) - 1))) + break; + if (!fde[1]) + continue; /* this is a CIE */ + if ((fde[1] & (sizeof(*fde) - 1)) + || fde[1] > (unsigned long)(fde + 1) + - (unsigned long)table->address) + continue; /* this is not a valid FDE */ + cie = fde + 1 - fde[1] / sizeof(*fde); + if (*cie <= sizeof(*cie) + 4 + || *cie >= fde[1] - sizeof(*fde) + || (*cie & (sizeof(*cie) - 1)) + || cie[1] + || (ptrType = fde_pointer_type(cie)) < 0) { + cie = NULL; /* this is not a (valid) CIE */ + continue; + } + ptr = (const u8 *)(fde + 2); + startLoc = read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType); + endLoc = startLoc + + read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType & DW_EH_PE_indirect + ? ptrType + : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed)); + if (UNW_PC(frame) >= startLoc && UNW_PC(frame) < endLoc) + break; + cie = NULL; + } + } + if (cie != NULL) { + memset(&state, 0, sizeof(state)); + state.cieEnd = ptr; /* keep here temporarily */ + ptr = (const u8 *)(cie + 2); + end = (const u8 *)(cie + 1) + *cie; + if ((state.version = *ptr) != 1) + cie = NULL; /* unsupported version */ + else if (*++ptr) { + /* check if augmentation size is first (and thus present) */ + if (*ptr == 'z') { + /* check for ignorable (or already handled) + * nul-terminated augmentation string */ + while (++ptr < end && *ptr) + if (strchr("LPR", *ptr) == NULL) + break; + } + if (ptr >= end || *ptr) + cie = NULL; + } + ++ptr; + } + if (cie != NULL) { + /* get code aligment factor */ + state.codeAlign = get_uleb128(&ptr, end); + /* get data aligment factor */ + state.dataAlign = get_sleb128(&ptr, end); + if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) + cie = NULL; + else { + retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); + /* skip augmentation */ + if (((const char *)(cie + 2))[1] == 'z') + ptr += get_uleb128(&ptr, end); + if (ptr > end + || retAddrReg >= ARRAY_SIZE(reg_info) + || REG_INVALID(retAddrReg) + || reg_info[retAddrReg].width != sizeof(unsigned long)) + cie = NULL; + } + } + if (cie != NULL) { + state.cieStart = ptr; + ptr = state.cieEnd; + state.cieEnd = end; + end = (const u8 *)(fde + 1) + *fde; + /* skip augmentation */ + if (((const char *)(cie + 2))[1] == 'z') { + uleb128_t augSize = get_uleb128(&ptr, end); + + if ((ptr += augSize) > end) + fde = NULL; + } + } + if (cie == NULL || fde == NULL) { +#ifdef CONFIG_FRAME_POINTER + unsigned long top, bottom; +#endif + +#ifdef CONFIG_FRAME_POINTER + top = STACK_TOP(frame->task); + bottom = STACK_BOTTOM(frame->task); +# if FRAME_RETADDR_OFFSET < 0 + if (UNW_SP(frame) < top + && UNW_FP(frame) <= UNW_SP(frame) + && bottom < UNW_FP(frame) +# else + if (UNW_SP(frame) > top + && UNW_FP(frame) >= UNW_SP(frame) + && bottom > UNW_FP(frame) +# endif + && !((UNW_SP(frame) | UNW_FP(frame)) + & (sizeof(unsigned long) - 1))) { + unsigned long link; + + if (!__get_user(link, + (unsigned long *)(UNW_FP(frame) + + FRAME_LINK_OFFSET)) +# if FRAME_RETADDR_OFFSET < 0 + && link > bottom && link < UNW_FP(frame) +# else + && link > UNW_FP(frame) && link < bottom +# endif + && !(link & (sizeof(link) - 1)) + && !__get_user(UNW_PC(frame), + (unsigned long *)(UNW_FP(frame) + + FRAME_RETADDR_OFFSET))) { + UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET +# if FRAME_RETADDR_OFFSET < 0 + - +# else + + +# endif + sizeof(UNW_PC(frame)); + UNW_FP(frame) = link; + return 0; + } + } +#endif + return -ENXIO; + } + state.org = startLoc; + memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); + /* process instructions */ + if (!processCFI(ptr, end, UNW_PC(frame), ptrType, &state) + || state.loc > endLoc + || state.regs[retAddrReg].where == Nowhere + || state.cfa.reg >= ARRAY_SIZE(reg_info) + || reg_info[state.cfa.reg].width != sizeof(unsigned long) + || state.cfa.offs % sizeof(unsigned long)) + return -EIO; + /* update frame */ + cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; + startLoc = min((unsigned long)UNW_SP(frame), cfa); + endLoc = max((unsigned long)UNW_SP(frame), cfa); + if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) { + startLoc = min(STACK_LIMIT(cfa), cfa); + endLoc = max(STACK_LIMIT(cfa), cfa); + } +#ifndef CONFIG_64BIT +# define CASES CASE(8); CASE(16); CASE(32) +#else +# define CASES CASE(8); CASE(16); CASE(32); CASE(64) +#endif + for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { + if (REG_INVALID(i)) { + if (state.regs[i].where == Nowhere) + continue; + return -EIO; + } + switch(state.regs[i].where) { + default: + break; + case Register: + if (state.regs[i].value >= ARRAY_SIZE(reg_info) + || REG_INVALID(state.regs[i].value) + || reg_info[i].width > reg_info[state.regs[i].value].width) + return -EIO; + switch(reg_info[state.regs[i].value].width) { +#define CASE(n) \ + case sizeof(u##n): \ + state.regs[i].value = FRAME_REG(state.regs[i].value, \ + const u##n); \ + break + CASES; +#undef CASE + default: + return -EIO; + } + break; + } + } + for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { + if (REG_INVALID(i)) + continue; + switch(state.regs[i].where) { + case Nowhere: + if (reg_info[i].width != sizeof(UNW_SP(frame)) + || &FRAME_REG(i, __typeof__(UNW_SP(frame))) + != &UNW_SP(frame)) + continue; + UNW_SP(frame) = cfa; + break; + case Register: + switch(reg_info[i].width) { +#define CASE(n) case sizeof(u##n): \ + FRAME_REG(i, u##n) = state.regs[i].value; \ + break + CASES; +#undef CASE + default: + return -EIO; + } + break; + case Value: + if (reg_info[i].width != sizeof(unsigned long)) + return -EIO; + FRAME_REG(i, unsigned long) = cfa + state.regs[i].value + * state.dataAlign; + break; + case Memory: { + unsigned long addr = cfa + state.regs[i].value + * state.dataAlign; + + if ((state.regs[i].value * state.dataAlign) + % sizeof(unsigned long) + || addr < startLoc + || addr + sizeof(unsigned long) < addr + || addr + sizeof(unsigned long) > endLoc) + return -EIO; + switch(reg_info[i].width) { +#define CASE(n) case sizeof(u##n): \ + __get_user(FRAME_REG(i, u##n), (u##n *)addr); \ + break + CASES; +#undef CASE + default: + return -EIO; + } + } + break; + } + } + + return 0; +#undef CASES +#undef FRAME_REG +} +EXPORT_SYMBOL(unwind); + +int unwind_init_frame_info(struct unwind_frame_info *info, + struct task_struct *tsk, + /*const*/ struct pt_regs *regs) +{ + info->task = tsk; + arch_unw_init_frame_info(info, regs); + + return 0; +} +EXPORT_SYMBOL(unwind_init_frame_info); + +/* + * Prepare to unwind a blocked task. + */ +int unwind_init_blocked(struct unwind_frame_info *info, + struct task_struct *tsk) +{ + info->task = tsk; + arch_unw_init_blocked(info); + + return 0; +} +EXPORT_SYMBOL(unwind_init_blocked); + +/* + * Prepare to unwind the currently running thread. + */ +int unwind_init_running(struct unwind_frame_info *info, + asmlinkage void (*callback)(struct unwind_frame_info *, + void *arg), + void *arg) +{ + info->task = current; + arch_unwind_init_running(info, callback, arg); + + return 0; +} +EXPORT_SYMBOL(unwind_init_running); + +/* + * Unwind until the return pointer is in user-land (or until an error + * occurs). Returns 0 if successful, negative number in case of + * error. + */ +int unwind_to_user(struct unwind_frame_info *info) +{ + while (!arch_unw_user_mode(info)) { + int err = unwind(info); + + if (err < 0) + return err; + } + + return 0; +} +EXPORT_SYMBOL(unwind_to_user); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ccb0c1fdf1b5..256b3b805c5c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -188,14 +188,22 @@ config FRAME_POINTER config UNWIND_INFO bool "Compile the kernel with frame unwind information" - depends on !IA64 - depends on !MODULES || !(MIPS || PARISC || PPC || SUPERH || V850) + depends on !IA64 && !PARISC + depends on !MODULES || !(MIPS || PPC || SUPERH || V850) help If you say Y here the resulting kernel image will be slightly larger but not slower, and it will give very useful debugging information. If you don't debug the kernel, you can say N, but we may not be able to solve problems without frame unwind information or frame pointers. +config STACK_UNWIND + bool "Stack unwind support" + depends on UNWIND_INFO + depends on n + help + This enables more precise stack traces, omitting all unrelated + occurrences of pointers into kernel code from the dump. + config FORCED_INLINING bool "Force gcc to inline functions marked 'inline'" depends on DEBUG_KERNEL -- cgit v1.2.3 From c33bd9aac0597eeedaaa01ea5aafe456894b2f2b Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Jun 2006 13:57:47 +0200 Subject: [PATCH] i386/x86-64: fall back to old-style call trace if no unwinding If no unwinding is possible at all for a certain exception instance, fall back to the old style call trace instead of not showing any trace at all. Also, allow setting the stack trace mode at the command line. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 46 ++++++++++++++++++++++++++++------------ arch/x86_64/kernel/traps.c | 51 +++++++++++++++++++++++++++++++-------------- include/asm-i386/unwind.h | 8 +++---- include/asm-x86_64/unwind.h | 8 +++---- include/linux/unwind.h | 8 +++---- kernel/unwind.c | 7 +++---- 6 files changed, 83 insertions(+), 45 deletions(-) (limited to 'kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 286584667865..78464097470a 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -93,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; +static int call_trace = 1; ATOMIC_NOTIFIER_HEAD(i386die_chain); int register_die_notifier(struct notifier_block *nb) @@ -171,40 +172,47 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, return ebp; } -static asmlinkage void show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) +static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) { + int n = 0; int printed = 0; /* nr of entries already printed on current line */ while (unwind(info) == 0 && UNW_PC(info)) { + ++n; printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed); if (arch_unw_user_mode(info)) break; } if (printed) printk("\n"); + return n; } static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl) { unsigned long ebp; - struct unwind_frame_info info; if (!task) task = current; - if (regs) { - if (unwind_init_frame_info(&info, task, regs) == 0) { - show_trace_unwind(&info, log_lvl); - return; + if (call_trace >= 0) { + int unw_ret = 0; + struct unwind_frame_info info; + + if (regs) { + if (unwind_init_frame_info(&info, task, regs) == 0) + unw_ret = show_trace_unwind(&info, log_lvl); + } else if (task == current) + unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl); + else { + if (unwind_init_blocked(&info, task) == 0) + unw_ret = show_trace_unwind(&info, log_lvl); } - } else if (task == current) { - if (unwind_init_running(&info, show_trace_unwind, log_lvl) == 0) - return; - } else { - if (unwind_init_blocked(&info, task) == 0) { - show_trace_unwind(&info, log_lvl); - return; + if (unw_ret > 0) { + if (call_trace > 0) + return; + printk("%sLegacy call trace:\n", log_lvl); } } @@ -1245,3 +1253,15 @@ static int __init kstack_setup(char *s) return 1; } __setup("kstack=", kstack_setup); + +static int __init call_trace_setup(char *s) +{ + if (strcmp(s, "old") == 0) + call_trace = -1; + else if (strcmp(s, "both") == 0) + call_trace = 0; + else if (strcmp(s, "new") == 0) + call_trace = 1; + return 1; +} +__setup("call_trace=", call_trace_setup); diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index eb1534ff1f5f..bd0891f4c2c7 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -107,6 +107,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) } static int kstack_depth_to_print = 10; +static int call_trace = 1; #ifdef CONFIG_KALLSYMS #include @@ -190,11 +191,12 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, return NULL; } -static void show_trace_unwind(struct unwind_frame_info *info, void *context) +static int show_trace_unwind(struct unwind_frame_info *info, void *context) { - int i = 11; + int i = 11, n = 0; while (unwind(info) == 0 && UNW_PC(info)) { + ++n; if (i > 50) { printk("\n "); i = 7; @@ -205,6 +207,7 @@ static void show_trace_unwind(struct unwind_frame_info *info, void *context) break; } printk("\n"); + return n; } /* @@ -218,27 +221,32 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s { const unsigned cpu = safe_smp_processor_id(); unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; - int i; + int i = 11; unsigned used = 0; - struct unwind_frame_info info; printk("\nCall Trace:"); if (!tsk) tsk = current; - if (regs) { - if (unwind_init_frame_info(&info, tsk, regs) == 0) { - show_trace_unwind(&info, NULL); - return; + if (call_trace >= 0) { + int unw_ret = 0; + struct unwind_frame_info info; + + if (regs) { + if (unwind_init_frame_info(&info, tsk, regs) == 0) + unw_ret = show_trace_unwind(&info, NULL); + } else if (tsk == current) + unw_ret = unwind_init_running(&info, show_trace_unwind, NULL); + else { + if (unwind_init_blocked(&info, tsk) == 0) + unw_ret = show_trace_unwind(&info, NULL); } - } else if (tsk == current) { - if (unwind_init_running(&info, show_trace_unwind, NULL) == 0) - return; - } else { - if (unwind_init_blocked(&info, tsk) == 0) { - show_trace_unwind(&info, NULL); - return; + if (unw_ret > 0) { + if (call_trace > 0) + return; + printk("Legacy call trace:"); + i = 18; } } @@ -264,7 +272,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s } \ } while (0) - for(i = 11; ; ) { + for(; ; ) { const char *id; unsigned long *estack_end; estack_end = in_exception_stack(cpu, (unsigned long)stack, @@ -1052,3 +1060,14 @@ static int __init kstack_setup(char *s) } __setup("kstack=", kstack_setup); +static int __init call_trace_setup(char *s) +{ + if (strcmp(s, "old") == 0) + call_trace = -1; + else if (strcmp(s, "both") == 0) + call_trace = 0; + else if (strcmp(s, "new") == 0) + call_trace = 1; + return 1; +} +__setup("call_trace=", call_trace_setup); diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h index 1c076897ac21..d480f2e38215 100644 --- a/include/asm-i386/unwind.h +++ b/include/asm-i386/unwind.h @@ -66,10 +66,10 @@ static inline void arch_unw_init_blocked(struct unwind_frame_info *info) info->regs.xes = __USER_DS; } -extern asmlinkage void arch_unwind_init_running(struct unwind_frame_info *, - asmlinkage void (*callback)(struct unwind_frame_info *, - void *arg), - void *arg); +extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *, + asmlinkage int (*callback)(struct unwind_frame_info *, + void *arg), + void *arg); static inline int arch_unw_user_mode(const struct unwind_frame_info *info) { diff --git a/include/asm-x86_64/unwind.h b/include/asm-x86_64/unwind.h index 4f61de246179..f3e7124effe3 100644 --- a/include/asm-x86_64/unwind.h +++ b/include/asm-x86_64/unwind.h @@ -75,10 +75,10 @@ static inline void arch_unw_init_blocked(struct unwind_frame_info *info) info->regs.ss = __KERNEL_DS; } -extern void arch_unwind_init_running(struct unwind_frame_info *, - void (*callback)(struct unwind_frame_info *, - void *arg), - void *arg); +extern int arch_unwind_init_running(struct unwind_frame_info *, + int (*callback)(struct unwind_frame_info *, + void *arg), + void *arg); static inline int arch_unw_user_mode(const struct unwind_frame_info *info) { diff --git a/include/linux/unwind.h b/include/linux/unwind.h index 0295aa789ab4..13c7b2cd87ce 100644 --- a/include/linux/unwind.h +++ b/include/linux/unwind.h @@ -49,8 +49,8 @@ extern int unwind_init_blocked(struct unwind_frame_info *, * Prepare to unwind the currently running thread. */ extern int unwind_init_running(struct unwind_frame_info *, - asmlinkage void (*callback)(struct unwind_frame_info *, - void *arg), + asmlinkage int (*callback)(struct unwind_frame_info *, + void *arg), void *arg); /* @@ -97,8 +97,8 @@ static inline int unwind_init_blocked(struct unwind_frame_info *info, } static inline int unwind_init_running(struct unwind_frame_info *info, - asmlinkage void (*cb)(struct unwind_frame_info *, - void *arg), + asmlinkage int (*cb)(struct unwind_frame_info *, + void *arg), void *arg) { return -ENOSYS; diff --git a/kernel/unwind.c b/kernel/unwind.c index d36bcd3ad3b5..0421035272d9 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -885,14 +885,13 @@ EXPORT_SYMBOL(unwind_init_blocked); * Prepare to unwind the currently running thread. */ int unwind_init_running(struct unwind_frame_info *info, - asmlinkage void (*callback)(struct unwind_frame_info *, - void *arg), + asmlinkage int (*callback)(struct unwind_frame_info *, + void *arg), void *arg) { info->task = current; - arch_unwind_init_running(info, callback, arg); - return 0; + return arch_unwind_init_running(info, callback, arg); } EXPORT_SYMBOL(unwind_init_running); -- cgit v1.2.3 From 83f4fcce7fdd213bd570b899862c3838871f8cf7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Jun 2006 13:57:50 +0200 Subject: [PATCH] x86_64: allow unwinder to build without module support Add proper conditionals to be able to build with CONFIG_MODULES=n. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/unwind.h | 8 ++++++++ kernel/unwind.c | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'kernel') diff --git a/include/linux/unwind.h b/include/linux/unwind.h index 13c7b2cd87ce..ce48e2cd37a2 100644 --- a/include/linux/unwind.h +++ b/include/linux/unwind.h @@ -29,12 +29,16 @@ struct module; */ extern void unwind_init(void); +#ifdef CONFIG_MODULES + extern void *unwind_add_table(struct module *, const void *table_start, unsigned long table_size); extern void unwind_remove_table(void *handle, int init_only); +#endif + extern int unwind_init_frame_info(struct unwind_frame_info *, struct task_struct *, /*const*/ struct pt_regs *); @@ -72,6 +76,8 @@ struct unwind_frame_info {}; static inline void unwind_init(void) {} +#ifdef CONFIG_MODULES + static inline void *unwind_add_table(struct module *mod, const void *table_start, unsigned long table_size) @@ -79,6 +85,8 @@ static inline void *unwind_add_table(struct module *mod, return NULL; } +#endif + static inline void unwind_remove_table(void *handle, int init_only) { } diff --git a/kernel/unwind.c b/kernel/unwind.c index 0421035272d9..f69c804c8e62 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -172,6 +172,8 @@ void __init unwind_init(void) __start_unwind, __end_unwind - __start_unwind); } +#ifdef CONFIG_MODULES + /* Must be called with module_mutex held. */ void *unwind_add_table(struct module *module, const void *table_start, @@ -253,6 +255,8 @@ void unwind_remove_table(void *handle, int init_only) kfree(table); } +#endif /* CONFIG_MODULES */ + static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) { const u8 *cur = *pcur; -- cgit v1.2.3 From 495ab9c045e1b0e5c82951b762257fe1c9d81564 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:59:11 +0200 Subject: [PATCH] i386/x86-64/ia64: Move polling flag into thread_info_status During some profiling I noticed that default_idle causes a lot of memory traffic. I think that is caused by the atomic operations to clear/set the polling flag in thread_info. There is actually no reason to make this atomic - only the idle thread does it to itself, other CPUs only read it. So I moved it into ti->status. Converted i386/x86-64/ia64 for now because that was the easiest way to fix ACPI which also manipulates these flags in its idle function. Cc: Nick Piggin Cc: Tony Luck Cc: Len Brown Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/apm.c | 6 +++--- arch/i386/kernel/process.c | 6 +++--- arch/ia64/kernel/process.c | 4 ++-- arch/x86_64/kernel/process.c | 7 +++---- drivers/acpi/processor_idle.c | 12 ++++++------ include/asm-i386/thread_info.h | 7 ++++--- include/asm-ia64/thread_info.h | 5 +++++ include/asm-x86_64/thread_info.h | 6 ++++-- kernel/sched.c | 9 +++++++-- 9 files changed, 37 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 9e819eb68229..7c5729d1fd06 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -764,9 +764,9 @@ static int apm_do_idle(void) int idled = 0; int polling; - polling = test_thread_flag(TIF_POLLING_NRFLAG); + polling = !!(current_thread_info()->status & TS_POLLING); if (polling) { - clear_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); } if (!need_resched()) { @@ -774,7 +774,7 @@ static int apm_do_idle(void) ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); } if (polling) - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; if (!idled) return 0; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 525432e3fef7..6946b06e2784 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -102,7 +102,7 @@ void default_idle(void) local_irq_enable(); if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - clear_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); while (!need_resched()) { local_irq_disable(); @@ -111,7 +111,7 @@ void default_idle(void) else local_irq_enable(); } - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; } else { while (!need_resched()) cpu_relax(); @@ -174,7 +174,7 @@ void cpu_idle(void) { int cpu = smp_processor_id(); - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; /* endless idle loop with no priority at all */ while (1) { diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 355d57970ba3..b045c279136c 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -272,9 +272,9 @@ cpu_idle (void) /* endless idle loop with no priority at all */ while (1) { if (can_do_pal_halt) - clear_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status &= ~TS_POLLING; else - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; if (!need_resched()) { void (*idle)(void); diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index d6fa41459c80..b596837a1527 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -110,7 +110,7 @@ static void default_idle(void) { local_irq_enable(); - clear_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); while (!need_resched()) { local_irq_disable(); @@ -119,7 +119,7 @@ static void default_idle(void) else local_irq_enable(); } - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; } /* @@ -202,8 +202,7 @@ static inline void play_dead(void) */ void cpu_idle (void) { - set_thread_flag(TIF_POLLING_NRFLAG); - + current_thread_info()->status |= TS_POLLING; /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 3b97a5eae9e8..74173ce6aaf4 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -206,11 +206,11 @@ acpi_processor_power_activate(struct acpi_processor *pr, static void acpi_safe_halt(void) { - clear_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); if (!need_resched()) safe_halt(); - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; } static atomic_t c3_cpu_count; @@ -330,10 +330,10 @@ static void acpi_processor_idle(void) * Invoke the current Cx state to put the processor to sleep. */ if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) { - clear_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); if (need_resched()) { - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; local_irq_enable(); return; } @@ -371,7 +371,7 @@ static void acpi_processor_idle(void) t2 = inl(acpi_fadt.xpm_tmr_blk.address); /* Re-enable interrupts */ local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; /* Compute time (ticks) that we were actually asleep */ sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; @@ -411,7 +411,7 @@ static void acpi_processor_idle(void) /* Re-enable interrupts */ local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; /* Compute time (ticks) that we were actually asleep */ sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index 8420ed12491e..fdbc7f422ea5 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h @@ -140,8 +140,7 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__; #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ -#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ -#define TIF_MEMDIE 17 +#define TIF_MEMDIE 16 #define _TIF_SYSCALL_TRACE (1<thread_info->status & TS_POLLING) #endif /* __KERNEL__ */ diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index e5392c4d30c6..8bc9869e5765 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h @@ -27,6 +27,7 @@ struct thread_info { __u32 flags; /* thread_info flags (see TIF_*) */ __u32 cpu; /* current CPU */ __u32 last_cpu; /* Last CPU thread ran on */ + __u32 status; /* Thread synchronous flags */ mm_segment_t addr_limit; /* user-level address space limit */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ struct restart_block restart_block; @@ -103,4 +104,8 @@ struct thread_info { /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) +#define TS_POLLING 1 /* true if in idle loop and not sleeping */ + +#define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING) + #endif /* _ASM_IA64_THREAD_INFO_H */ diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h index 4ac0e0a36934..b5e88216fd80 100644 --- a/include/asm-x86_64/thread_info.h +++ b/include/asm-x86_64/thread_info.h @@ -101,7 +101,7 @@ static inline struct thread_info *stack_thread_info(void) #define TIF_IRET 5 /* force IRET */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ -#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ +/* 16 free */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ #define TIF_ABI_PENDING 19 @@ -115,7 +115,6 @@ static inline struct thread_info *stack_thread_info(void) #define _TIF_IRET (1<thread_info->status & TS_POLLING) #endif /* __KERNEL__ */ diff --git a/kernel/sched.c b/kernel/sched.c index f06d059edef5..7d1027a4dd21 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -818,6 +818,11 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) * the target CPU. */ #ifdef CONFIG_SMP + +#ifndef tsk_is_polling +#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) +#endif + static void resched_task(task_t *p) { int cpu; @@ -833,9 +838,9 @@ static void resched_task(task_t *p) if (cpu == smp_processor_id()) return; - /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ + /* NEED_RESCHED must be visible before we test polling */ smp_mb(); - if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) + if (!tsk_is_polling(p)) smp_send_reschedule(cpu); } #else -- cgit v1.2.3