diff options
Diffstat (limited to 'lib')
49 files changed, 2883 insertions, 906 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 170d8ca901d8..fa9bf2c06199 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -7,6 +7,9 @@ config BINARY_PRINTF menu "Library routines" +config RAID6_PQ + tristate + config BITREVERSE tristate @@ -181,9 +184,6 @@ config HAS_DMA config CHECK_SIGNATURE bool -config HAVE_LMB - boolean - config CPUMASK_OFFSTACK bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS help diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 935248bdbc47..9e06b7f5ecf1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -76,7 +76,6 @@ config UNUSED_SYMBOLS config DEBUG_FS bool "Debug Filesystem" - depends on SYSFS help debugfs is a virtual file system that kernel developers use to put debugging files into. Enable this option to be able to read and @@ -152,28 +151,33 @@ config DEBUG_SHIRQ Drivers ought to be able to handle interrupts coming in at those points; some don't and need to be caught. -config DETECT_SOFTLOCKUP - bool "Detect Soft Lockups" +config LOCKUP_DETECTOR + bool "Detect Hard and Soft Lockups" depends on DEBUG_KERNEL && !S390 - default y help - Say Y here to enable the kernel to detect "soft lockups", - which are bugs that cause the kernel to loop in kernel + Say Y here to enable the kernel to act as a watchdog to detect + hard and soft lockups. + + Softlockups are bugs that cause the kernel to loop in kernel mode for more than 60 seconds, without giving other tasks a - chance to run. + chance to run. The current stack trace is displayed upon + detection and the system will stay locked up. - When a soft-lockup is detected, the kernel will print the - current stack trace (which you should report), but the - system will stay locked up. This feature has negligible - overhead. + Hardlockups are bugs that cause the CPU to loop in kernel mode + for more than 60 seconds, without letting other interrupts have a + chance to run. The current stack trace is displayed upon detection + and the system will stay locked up. + + The overhead should be minimal. A periodic hrtimer runs to + generate interrupts and kick the watchdog task every 10-12 seconds. + An NMI is generated every 60 seconds or so to check for hardlockups. - (Note that "hard lockups" are separate type of bugs that - can be detected via the NMI-watchdog, on platforms that - support it.) +config HARDLOCKUP_DETECTOR + def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" - depends on DETECT_SOFTLOCKUP + depends on LOCKUP_DETECTOR help Say Y here to enable the kernel to panic on "soft lockups", which are bugs that cause the kernel to loop in kernel @@ -190,7 +194,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE int - depends on DETECT_SOFTLOCKUP + depends on LOCKUP_DETECTOR range 0 1 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC @@ -307,6 +311,12 @@ config DEBUG_OBJECTS_WORK work queue routines to track the life time of work objects and validate the work operations. +config DEBUG_OBJECTS_RCU_HEAD + bool "Debug RCU callbacks objects" + depends on DEBUG_OBJECTS && PREEMPT + help + Enable this to turn on debugging of RCU list heads (call_rcu() usage). + config DEBUG_OBJECTS_ENABLE_DEFAULT int "debug_objects bootup default value (0-1)" range 0 1 @@ -400,6 +410,13 @@ config DEBUG_KMEMLEAK_TEST If unsure, say N. +config DEBUG_KMEMLEAK_DEFAULT_OFF + bool "Default kmemleak to off" + depends on DEBUG_KMEMLEAK + help + Say Y here to disable kmemleak by default. It can then be enabled + on the command line via kmemleak=on. + config DEBUG_PREEMPT bool "Debug preemptible kernel" depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT @@ -512,11 +529,23 @@ config PROVE_RCU Say N if you are unsure. +config PROVE_RCU_REPEATEDLY + bool "RCU debugging: don't disable PROVE_RCU on first splat" + depends on PROVE_RCU + default n + help + By itself, PROVE_RCU will disable checking upon issuing the + first warning (or "splat"). This feature prevents such + disabling, allowing multiple RCU-lockdep warnings to be printed + on a single reboot. + + Say N if you are unsure. + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 + select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE select KALLSYMS select KALLSYMS_ALL @@ -616,6 +645,19 @@ config DEBUG_INFO If unsure, say N. +config DEBUG_INFO_REDUCED + bool "Reduce debugging information" + depends on DEBUG_INFO + help + If you say Y here gcc is instructed to generate less debugging + information for structure types. This means that tools that + need full debugging information (like kgdb or systemtap) won't + be happy. But if you merely need debugging information to + resolve line numbers there is no loss. Advantage is that + build directory object sizes shrink dramatically over a full + DEBUG_INFO build and compile times are reduced too. + Only works with newer gcc versions. + config DEBUG_VM bool "Debug VM" depends on DEBUG_KERNEL @@ -793,7 +835,7 @@ config RCU_CPU_STALL_DETECTOR config RCU_CPU_STALL_VERBOSE bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU - default n + default y help This option causes RCU to printk detailed per-task information for any tasks that are stalling the current RCU grace period. @@ -886,6 +928,18 @@ config LKDTM Documentation on how to use the module can be found in Documentation/fault-injection/provoke-crashes.txt +config CPU_NOTIFIER_ERROR_INJECT + tristate "CPU notifier error injection module" + depends on HOTPLUG_CPU && DEBUG_KERNEL + help + This option provides a kernel module that can be used to test + the error handling of the cpu notifiers + + To compile this code as a module, choose M here: the module will + be called cpu-notifier-error-inject. + + If unsure, say N. + config FAULT_INJECTION bool "Fault-injection framework" depends on DEBUG_KERNEL @@ -913,7 +967,7 @@ config FAIL_MAKE_REQUEST Provide fault-injection capability for disk IO. config FAIL_IO_TIMEOUT - bool "Faul-injection capability for faking disk interrupts" + bool "Fault-injection capability for faking disk interrupts" depends on FAULT_INJECTION && BLOCK help Provide fault-injection capability on end IO handling. This @@ -934,13 +988,13 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !PPC && !S390 + select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE help Provide stacktrace filter for fault-injection capabilities config LATENCYTOP bool "Latency measuring infrastructure" - select FRAME_POINTER if !MIPS && !PPC && !S390 + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE select KALLSYMS select KALLSYMS_ALL select STACKTRACE @@ -1027,10 +1081,10 @@ config DYNAMIC_DEBUG Usage: - Dynamic debugging is controlled via the 'dynamic_debug/ddebug' file, + Dynamic debugging is controlled via the 'dynamic_debug/control' file, which is contained in the 'debugfs' filesystem. Thus, the debugfs filesystem must first be mounted before making use of this feature. - We refer the control file as: <debugfs>/dynamic_debug/ddebug. This + We refer the control file as: <debugfs>/dynamic_debug/control. This file contains a list of the debug statements that can be enabled. The format for each line of the file is: @@ -1045,7 +1099,7 @@ config DYNAMIC_DEBUG From a live system: - nullarbor:~ # cat <debugfs>/dynamic_debug/ddebug + nullarbor:~ # cat <debugfs>/dynamic_debug/control # filename:lineno [module]function flags format fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012" fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012" @@ -1055,23 +1109,23 @@ config DYNAMIC_DEBUG // enable the message at line 1603 of file svcsock.c nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > - <debugfs>/dynamic_debug/ddebug + <debugfs>/dynamic_debug/control // enable all the messages in file svcsock.c nullarbor:~ # echo -n 'file svcsock.c +p' > - <debugfs>/dynamic_debug/ddebug + <debugfs>/dynamic_debug/control // enable all the messages in the NFS server module nullarbor:~ # echo -n 'module nfsd +p' > - <debugfs>/dynamic_debug/ddebug + <debugfs>/dynamic_debug/control // enable all 12 messages in the function svc_process() nullarbor:~ # echo -n 'func svc_process +p' > - <debugfs>/dynamic_debug/ddebug + <debugfs>/dynamic_debug/control // disable all 12 messages in the function svc_process() nullarbor:~ # echo -n 'func svc_process -p' > - <debugfs>/dynamic_debug/ddebug + <debugfs>/dynamic_debug/control See Documentation/dynamic-debug-howto.txt for additional information. @@ -1086,6 +1140,13 @@ config DMA_API_DEBUG This option causes a performance degredation. Use only if you want to debug device drivers. If unsure, say N. +config ATOMIC64_SELFTEST + bool "Perform an atomic64_t self-test at boot" + help + Enable this option to test the atomic64_t functions at boot. + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 9b5d1d7f2ef7..43cb93fa2651 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -3,7 +3,7 @@ config HAVE_ARCH_KGDB bool menuconfig KGDB - bool "KGDB: kernel debugging with remote gdb" + bool "KGDB: kernel debugger" depends on HAVE_ARCH_KGDB depends on DEBUG_KERNEL && EXPERIMENTAL help @@ -57,4 +57,26 @@ config KGDB_TESTS_BOOT_STRING information about other strings you could use beyond the default of V1F100. +config KGDB_LOW_LEVEL_TRAP + bool "KGDB: Allow debugging with traps in notifiers" + depends on X86 || MIPS + default n + help + This will add an extra call back to kgdb for the breakpoint + exception handler on which will will allow kgdb to step + through a notify handler. + +config KGDB_KDB + bool "KGDB_KDB: include kdb frontend for kgdb" + default n + help + KDB frontend for kernel + +config KDB_KEYBOARD + bool "KGDB_KDB: keyboard as input device" + depends on VT && KGDB_KDB + default n + help + KDB can use a PS/2 type keyboard for an input device + endif # KGDB diff --git a/lib/Makefile b/lib/Makefile index 0d4015205c64..e6a3763b8212 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - string_helpers.o gcd.o lcm.o list_sort.o + string_helpers.o gcd.o lcm.o list_sort.o uuid.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -39,7 +39,10 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o + +CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o + obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o obj-$(CONFIG_BTREE) += btree.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o @@ -66,6 +69,7 @@ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_RAID6_PQ) += raid6/ lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o @@ -82,11 +86,10 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o +obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o lib-$(CONFIG_GENERIC_BUG) += bug.o -obj-$(CONFIG_HAVE_LMB) += lmb.o - obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o @@ -101,6 +104,8 @@ obj-$(CONFIG_GENERIC_CSUM) += checksum.o obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o +obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/atomic64.c b/lib/atomic64.c index 8bee16ec7524..a21c12bc727c 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -162,12 +162,12 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u) { unsigned long flags; spinlock_t *lock = lock_addr(v); - int ret = 1; + int ret = 0; spin_lock_irqsave(lock, flags); if (v->counter != u) { v->counter += a; - ret = 0; + ret = 1; } spin_unlock_irqrestore(lock, flags); return ret; diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c new file mode 100644 index 000000000000..44524cc8c32a --- /dev/null +++ b/lib/atomic64_test.c @@ -0,0 +1,166 @@ +/* + * Testsuite for atomic64_t functions + * + * Copyright © 2010 Luca Barbieri + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <asm/atomic.h> + +#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0) +static __init int test_atomic64(void) +{ + long long v0 = 0xaaa31337c001d00dLL; + long long v1 = 0xdeadbeefdeafcafeLL; + long long v2 = 0xfaceabadf00df001LL; + long long onestwos = 0x1111111122222222LL; + long long one = 1LL; + + atomic64_t v = ATOMIC64_INIT(v0); + long long r = v0; + BUG_ON(v.counter != r); + + atomic64_set(&v, v1); + r = v1; + BUG_ON(v.counter != r); + BUG_ON(atomic64_read(&v) != r); + + INIT(v0); + atomic64_add(onestwos, &v); + r += onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_add(-one, &v); + r += -one; + BUG_ON(v.counter != r); + + INIT(v0); + r += onestwos; + BUG_ON(atomic64_add_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r += -one; + BUG_ON(atomic64_add_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(onestwos, &v); + r -= onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(-one, &v); + r -= -one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= onestwos; + BUG_ON(atomic64_sub_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r -= -one; + BUG_ON(atomic64_sub_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_inc(&v); + r += one; + BUG_ON(v.counter != r); + + INIT(v0); + r += one; + BUG_ON(atomic64_inc_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_dec(&v); + r -= one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= one; + BUG_ON(atomic64_dec_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_xchg(&v, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_add_unless(&v, one, v0)); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(!atomic64_add_unless(&v, one, v1)); + r += one; + BUG_ON(v.counter != r); + +#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \ + defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM) + INIT(onestwos); + BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); + r -= one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(atomic64_dec_if_positive(&v) != -one); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); + BUG_ON(v.counter != r); +#else +#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above +#endif + + INIT(onestwos); + BUG_ON(!atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(atomic64_inc_not_zero(&v)); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(!atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + +#ifdef CONFIG_X86 + printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n", +#ifdef CONFIG_X86_64 + "x86-64", +#elif defined(CONFIG_X86_CMPXCHG64) + "i586+", +#else + "i386+", +#endif + boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", + boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); +#else + printk(KERN_INFO "atomic64 test passed\n"); +#endif + + return 0; +} + +core_initcall(test_atomic64); diff --git a/lib/btree.c b/lib/btree.c index 41859a820218..c9c6f0351526 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -95,7 +95,8 @@ static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp) unsigned long *node; node = mempool_alloc(head->mempool, gfp); - memset(node, 0, NODESIZE); + if (likely(node)) + memset(node, 0, NODESIZE); return node; } diff --git a/lib/bug.c b/lib/bug.c index 300e41afbf97..7cdfad88128f 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -136,8 +136,6 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) bug = find_bug(bugaddr); - printk(KERN_EMERG "------------[ cut here ]------------\n"); - file = NULL; line = 0; warning = 0; @@ -156,19 +154,25 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) if (warning) { /* this is a WARN_ON rather than BUG/BUG_ON */ + printk(KERN_WARNING "------------[ cut here ]------------\n"); + if (file) - printk(KERN_ERR "Badness at %s:%u\n", + printk(KERN_WARNING "WARNING: at %s:%u\n", file, line); else - printk(KERN_ERR "Badness at %p " + printk(KERN_WARNING "WARNING: at %p " "[verbose debug info unavailable]\n", (void *)bugaddr); + print_modules(); show_regs(regs); - add_taint(TAINT_WARN); + print_oops_end_marker(); + add_taint(BUG_GET_TAINT(bug)); return BUG_TRAP_TYPE_WARN; } + printk(KERN_EMERG "------------[ cut here ]------------\n"); + if (file) printk(KERN_CRIT "kernel BUG at %s:%u!\n", file, line); diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c new file mode 100644 index 000000000000..4dc20321b0d5 --- /dev/null +++ b/lib/cpu-notifier-error-inject.c @@ -0,0 +1,63 @@ +#include <linux/kernel.h> +#include <linux/cpu.h> +#include <linux/module.h> +#include <linux/notifier.h> + +static int priority; +static int cpu_up_prepare_error; +static int cpu_down_prepare_error; + +module_param(priority, int, 0); +MODULE_PARM_DESC(priority, "specify cpu notifier priority"); + +module_param(cpu_up_prepare_error, int, 0644); +MODULE_PARM_DESC(cpu_up_prepare_error, + "specify error code to inject CPU_UP_PREPARE action"); + +module_param(cpu_down_prepare_error, int, 0644); +MODULE_PARM_DESC(cpu_down_prepare_error, + "specify error code to inject CPU_DOWN_PREPARE action"); + +static int err_inject_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + int err = 0; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = cpu_up_prepare_error; + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + err = cpu_down_prepare_error; + break; + } + if (err) + printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err); + + return notifier_from_errno(err); +} + +static struct notifier_block err_inject_cpu_notifier = { + .notifier_call = err_inject_cpu_callback, +}; + +static int err_inject_init(void) +{ + err_inject_cpu_notifier.priority = priority; + + return register_hotcpu_notifier(&err_inject_cpu_notifier); +} + +static void err_inject_exit(void) +{ + unregister_hotcpu_notifier(&err_inject_cpu_notifier); +} + +module_init(err_inject_init); +module_exit(err_inject_exit); + +MODULE_DESCRIPTION("CPU notifier error injection module"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); diff --git a/lib/crc32.c b/lib/crc32.c index bc5b936e9142..4855995fcde9 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -48,12 +48,20 @@ MODULE_LICENSE("GPL"); #if CRC_LE_BITS == 8 || CRC_BE_BITS == 8 static inline u32 -crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab) +crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) { # ifdef __LITTLE_ENDIAN -# define DO_CRC(x) crc = tab[(crc ^ (x)) & 255 ] ^ (crc >> 8) +# define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8) +# define DO_CRC4 crc = tab[3][(crc) & 255] ^ \ + tab[2][(crc >> 8) & 255] ^ \ + tab[1][(crc >> 16) & 255] ^ \ + tab[0][(crc >> 24) & 255] # else -# define DO_CRC(x) crc = tab[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) +# define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8) +# define DO_CRC4 crc = tab[0][(crc) & 255] ^ \ + tab[1][(crc >> 8) & 255] ^ \ + tab[2][(crc >> 16) & 255] ^ \ + tab[3][(crc >> 24) & 255] # endif const u32 *b; size_t rem_len; @@ -70,10 +78,7 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab) b = (const u32 *)buf; for (--b; len; --len) { crc ^= *++b; /* use pre increment for speed */ - DO_CRC(0); - DO_CRC(0); - DO_CRC(0); - DO_CRC(0); + DO_CRC4; } len = rem_len; /* And the last few bytes */ @@ -85,6 +90,7 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab) } return crc; #undef DO_CRC +#undef DO_CRC4 } #endif /** @@ -117,7 +123,7 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) { # if CRC_LE_BITS == 8 - const u32 *tab = crc32table_le; + const u32 (*tab)[] = crc32table_le; crc = __cpu_to_le32(crc); crc = crc32_body(crc, p, len, tab); @@ -174,7 +180,7 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) { # if CRC_BE_BITS == 8 - const u32 *tab = crc32table_be; + const u32 (*tab)[] = crc32table_be; crc = __cpu_to_be32(crc); crc = crc32_body(crc, p, len, tab); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index b862b30369ff..deebcc57d4e6 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -141,6 +141,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) obj->object = addr; obj->descr = descr; obj->state = ODEBUG_STATE_NONE; + obj->astate = 0; hlist_del(&obj->node); hlist_add_head(&obj->node, &b->list); @@ -252,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg) if (limit < 5 && obj->descr != descr_test) { limit++; - WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg, - obj_states[obj->state], obj->descr->name); + WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " + "object type: %s\n", + msg, obj_states[obj->state], obj->astate, + obj->descr->name); } debug_objects_warnings++; } @@ -447,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: case ODEBUG_STATE_ACTIVE: - obj->state = ODEBUG_STATE_INACTIVE; + if (!obj->astate) + obj->state = ODEBUG_STATE_INACTIVE; + else + debug_print_object(obj, "deactivate"); break; case ODEBUG_STATE_DESTROYED: @@ -553,6 +559,53 @@ out_unlock: raw_spin_unlock_irqrestore(&db->lock, flags); } +/** + * debug_object_active_state - debug checks object usage state machine + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + * @expect: expected state + * @next: state to move to if expected state is found + */ +void +debug_object_active_state(void *addr, struct debug_obj_descr *descr, + unsigned int expect, unsigned int next) +{ + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (obj) { + switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + if (obj->astate == expect) + obj->astate = next; + else + debug_print_object(obj, "active_state"); + break; + + default: + debug_print_object(obj, "active_state"); + break; + } + } else { + struct debug_obj o = { .object = addr, + .state = ODEBUG_STATE_NOTAVAILABLE, + .descr = descr }; + + debug_print_object(&o, "active_state"); + } + + raw_spin_unlock_irqrestore(&db->lock, flags); +} + #ifdef CONFIG_DEBUG_OBJECTS_FREE static void __debug_check_no_obj_freed(const void *address, unsigned long size) { @@ -774,7 +827,7 @@ static int __init fixup_free(void *addr, enum debug_obj_state state) } } -static int +static int __init check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) { struct debug_bucket *db; @@ -917,7 +970,7 @@ void __init debug_objects_early_init(void) /* * Convert the statically allocated objects to dynamic ones: */ -static int debug_objects_replace_static_objects(void) +static int __init debug_objects_replace_static_objects(void) { struct debug_bucket *db = obj_hash; struct hlist_node *node, *tmp; diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index a4e971dee102..81c8bb1cc6aa 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -107,6 +107,8 @@ struct bunzip_data { unsigned char selectors[32768]; /* nSelectors = 15 bits */ struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ int io_error; /* non-zero if we have IO error */ + int byteCount[256]; + unsigned char symToByte[256], mtfSymbol[256]; }; @@ -158,14 +160,16 @@ static int INIT get_next_block(struct bunzip_data *bd) int *base = NULL; int *limit = NULL; int dbufCount, nextSym, dbufSize, groupCount, selector, - i, j, k, t, runPos, symCount, symTotal, nSelectors, - byteCount[256]; - unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; + i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount; + unsigned char uc, *symToByte, *mtfSymbol, *selectors; unsigned int *dbuf, origPtr; dbuf = bd->dbuf; dbufSize = bd->dbufSize; selectors = bd->selectors; + byteCount = bd->byteCount; + symToByte = bd->symToByte; + mtfSymbol = bd->mtfSymbol; /* Read in header signature and CRC, then validate signature. (last block signature means CRC is for whole file, return now) */ diff --git a/lib/devres.c b/lib/devres.c index 49368608f988..6efddf53b90c 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -328,7 +328,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all); * @pdev: PCI device to map IO resources for * @mask: Mask of BARs to unmap and release * - * Unamp and release regions specified by @mask. + * Unmap and release regions specified by @mask. */ void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) { diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index d6b8b9b1abfe..02afc2533728 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -456,7 +456,7 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, __func__, (int)len); nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS); - if (nwords < 0) + if (nwords <= 0) return -EINVAL; if (ddebug_parse_query(words, nwords-1, &query)) return -EINVAL; @@ -692,7 +692,7 @@ static void ddebug_table_free(struct ddebug_table *dt) * Called in response to a module being unloaded. Removes * any ddebug_table's which point at the module. */ -int ddebug_remove_module(char *mod_name) +int ddebug_remove_module(const char *mod_name) { struct ddebug_table *dt, *nextdt; int ret = -ENOENT; diff --git a/lib/flex_array.c b/lib/flex_array.c index 41b1804fa728..77a6fea7481e 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -171,6 +171,8 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) * Note that this *copies* the contents of @src into * the array. If you are trying to store an array of * pointers, make sure to pass in &ptr instead of ptr. + * You may instead wish to use the flex_array_put_ptr() + * helper function. * * Locking must be provided by the caller. */ @@ -265,7 +267,8 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start, * * Returns a pointer to the data at index @element_nr. Note * that this is a copy of the data that was passed in. If you - * are using this to store pointers, you'll get back &ptr. + * are using this to store pointers, you'll get back &ptr. You + * may instead wish to use the flex_array_get_ptr helper. * * Locking must be provided by the caller. */ @@ -286,6 +289,26 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr) return &part->elements[index_inside_part(fa, element_nr)]; } +/** + * flex_array_get_ptr - pull a ptr back out of the array + * @fa: the flex array from which to extract data + * @element_nr: index of the element to fetch from the array + * + * Returns the pointer placed in the flex array at element_nr using + * flex_array_put_ptr(). This function should not be called if the + * element in question was not set using the _put_ptr() helper. + */ +void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr) +{ + void **tmp; + + tmp = flex_array_get(fa, element_nr); + if (!tmp) + return NULL; + + return *tmp; +} + static int part_is_free(struct flex_array_part *part) { int i; diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index bea5d97df991..85d0e412a04f 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -7,8 +7,8 @@ #define LE_TABLE_SIZE (1 << CRC_LE_BITS) #define BE_TABLE_SIZE (1 << CRC_BE_BITS) -static uint32_t crc32table_le[LE_TABLE_SIZE]; -static uint32_t crc32table_be[BE_TABLE_SIZE]; +static uint32_t crc32table_le[4][LE_TABLE_SIZE]; +static uint32_t crc32table_be[4][BE_TABLE_SIZE]; /** * crc32init_le() - allocate and initialize LE table data @@ -22,12 +22,19 @@ static void crc32init_le(void) unsigned i, j; uint32_t crc = 1; - crc32table_le[0] = 0; + crc32table_le[0][0] = 0; for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) { crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); for (j = 0; j < LE_TABLE_SIZE; j += 2 * i) - crc32table_le[i + j] = crc ^ crc32table_le[j]; + crc32table_le[0][i + j] = crc ^ crc32table_le[0][j]; + } + for (i = 0; i < LE_TABLE_SIZE; i++) { + crc = crc32table_le[0][i]; + for (j = 1; j < 4; j++) { + crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8); + crc32table_le[j][i] = crc; + } } } @@ -39,25 +46,35 @@ static void crc32init_be(void) unsigned i, j; uint32_t crc = 0x80000000; - crc32table_be[0] = 0; + crc32table_be[0][0] = 0; for (i = 1; i < BE_TABLE_SIZE; i <<= 1) { crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0); for (j = 0; j < i; j++) - crc32table_be[i + j] = crc ^ crc32table_be[j]; + crc32table_be[0][i + j] = crc ^ crc32table_be[0][j]; + } + for (i = 0; i < BE_TABLE_SIZE; i++) { + crc = crc32table_be[0][i]; + for (j = 1; j < 4; j++) { + crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8); + crc32table_be[j][i] = crc; + } } } -static void output_table(uint32_t table[], int len, char *trans) +static void output_table(uint32_t table[4][256], int len, char *trans) { - int i; + int i, j; - for (i = 0; i < len - 1; i++) { - if (i % ENTRIES_PER_LINE == 0) - printf("\n"); - printf("%s(0x%8.8xL), ", trans, table[i]); + for (j = 0 ; j < 4; j++) { + printf("{"); + for (i = 0; i < len - 1; i++) { + if (i % ENTRIES_PER_LINE == 0) + printf("\n"); + printf("%s(0x%8.8xL), ", trans, table[j][i]); + } + printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]); } - printf("%s(0x%8.8xL)\n", trans, table[len - 1]); } int main(int argc, char** argv) @@ -66,14 +83,14 @@ int main(int argc, char** argv) if (CRC_LE_BITS > 1) { crc32init_le(); - printf("static const u32 crc32table_le[] = {"); + printf("static const u32 crc32table_le[4][256] = {"); output_table(crc32table_le, LE_TABLE_SIZE, "tole"); printf("};\n"); } if (CRC_BE_BITS > 1) { crc32init_be(); - printf("static const u32 crc32table_be[] = {"); + printf("static const u32 crc32table_be[4][256] = {"); output_table(crc32table_be, BE_TABLE_SIZE, "tobe"); printf("};\n"); } diff --git a/lib/genalloc.c b/lib/genalloc.c index 736c3b06398e..1923f1490e72 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -128,7 +128,6 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); end_bit = (chunk->end_addr - chunk->start_addr) >> order; - end_bit -= nbits + 1; spin_lock_irqsave(&chunk->lock, flags); start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0, diff --git a/lib/hexdump.c b/lib/hexdump.c index 39af2560f765..5d7a4802c562 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -16,6 +16,24 @@ const char hex_asc[] = "0123456789abcdef"; EXPORT_SYMBOL(hex_asc); /** + * hex_to_bin - convert a hex digit to its real value + * @ch: ascii character represents hex digit + * + * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad + * input. + */ +int hex_to_bin(char ch) +{ + if ((ch >= '0') && (ch <= '9')) + return ch - '0'; + ch = tolower(ch); + if ((ch >= 'a') && (ch <= 'f')) + return ch - 'a' + 10; + return -1; +} +EXPORT_SYMBOL(hex_to_bin); + +/** * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory * @buf: data blob to dump * @len: number of bytes in the @buf @@ -34,7 +52,7 @@ EXPORT_SYMBOL(hex_asc); * * E.g.: * hex_dump_to_buffer(frame->data, frame->len, 16, 1, - * linebuf, sizeof(linebuf), 1); + * linebuf, sizeof(linebuf), true); * * example output buffer: * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO @@ -65,8 +83,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, for (j = 0; j < ngroups; j++) lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%16.16llx", j ? " " : "", - (unsigned long long)*(ptr8 + j)); + "%s%16.16llx", j ? " " : "", + (unsigned long long)*(ptr8 + j)); ascii_column = 17 * ngroups + 2; break; } @@ -77,7 +95,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, for (j = 0; j < ngroups; j++) lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%8.8x", j ? " " : "", *(ptr4 + j)); + "%s%8.8x", j ? " " : "", *(ptr4 + j)); ascii_column = 9 * ngroups + 2; break; } @@ -88,7 +106,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, for (j = 0; j < ngroups; j++) lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%4.4x", j ? " " : "", *(ptr2 + j)); + "%s%4.4x", j ? " " : "", *(ptr2 + j)); ascii_column = 5 * ngroups + 2; break; } @@ -111,9 +129,10 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) linebuf[lx++] = ' '; - for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) - linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] - : '.'; + for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) { + ch = ptr[j]; + linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.'; + } nil: linebuf[lx++] = '\0'; } @@ -143,7 +162,7 @@ EXPORT_SYMBOL(hex_dump_to_buffer); * * E.g.: * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS, - * 16, 1, frame->data, frame->len, 1); + * 16, 1, frame->data, frame->len, true); * * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode: * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO @@ -151,12 +170,12 @@ EXPORT_SYMBOL(hex_dump_to_buffer); * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~. */ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, - int rowsize, int groupsize, - const void *buf, size_t len, bool ascii) + int rowsize, int groupsize, + const void *buf, size_t len, bool ascii) { const u8 *ptr = buf; int i, linelen, remaining = len; - unsigned char linebuf[200]; + unsigned char linebuf[32 * 3 + 2 + 32 + 1]; if (rowsize != 16 && rowsize != 32) rowsize = 16; @@ -164,13 +183,14 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, for (i = 0; i < len; i += rowsize) { linelen = min(remaining, rowsize); remaining -= rowsize; + hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, - linebuf, sizeof(linebuf), ascii); + linebuf, sizeof(linebuf), ascii); switch (prefix_type) { case DUMP_PREFIX_ADDRESS: - printk("%s%s%*p: %s\n", level, prefix_str, - (int)(2 * sizeof(void *)), ptr + i, linebuf); + printk("%s%s%p: %s\n", + level, prefix_str, ptr + i, linebuf); break; case DUMP_PREFIX_OFFSET: printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); @@ -196,9 +216,9 @@ EXPORT_SYMBOL(print_hex_dump); * rowsize of 16, groupsize of 1, and ASCII output included. */ void print_hex_dump_bytes(const char *prefix_str, int prefix_type, - const void *buf, size_t len) + const void *buf, size_t len) { print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1, - buf, len, 1); + buf, len, true); } EXPORT_SYMBOL(print_hex_dump_bytes); diff --git a/lib/hweight.c b/lib/hweight.c index 63ee4eb1228d..3c79d50814cf 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -9,7 +9,7 @@ * The Hamming Weight of a number is the total number of bits set in it. */ -unsigned int hweight32(unsigned int w) +unsigned int __sw_hweight32(unsigned int w) { #ifdef ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x55555555; @@ -24,29 +24,30 @@ unsigned int hweight32(unsigned int w) return (res + (res >> 16)) & 0x000000FF; #endif } -EXPORT_SYMBOL(hweight32); +EXPORT_SYMBOL(__sw_hweight32); -unsigned int hweight16(unsigned int w) +unsigned int __sw_hweight16(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x5555); res = (res & 0x3333) + ((res >> 2) & 0x3333); res = (res + (res >> 4)) & 0x0F0F; return (res + (res >> 8)) & 0x00FF; } -EXPORT_SYMBOL(hweight16); +EXPORT_SYMBOL(__sw_hweight16); -unsigned int hweight8(unsigned int w) +unsigned int __sw_hweight8(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x55); res = (res & 0x33) + ((res >> 2) & 0x33); return (res + (res >> 4)) & 0x0F; } -EXPORT_SYMBOL(hweight8); +EXPORT_SYMBOL(__sw_hweight8); -unsigned long hweight64(__u64 w) +unsigned long __sw_hweight64(__u64 w) { #if BITS_PER_LONG == 32 - return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); + return __sw_hweight32((unsigned int)(w >> 32)) + + __sw_hweight32((unsigned int)w); #elif BITS_PER_LONG == 64 #ifdef ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x5555555555555555ul; @@ -63,4 +64,4 @@ unsigned long hweight64(__u64 w) #endif #endif } -EXPORT_SYMBOL(hweight64); +EXPORT_SYMBOL(__sw_hweight64); diff --git a/lib/idr.c b/lib/idr.c index 2eb1dca03681..7f1a4f0acf50 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -445,6 +445,7 @@ EXPORT_SYMBOL(idr_remove); void idr_remove_all(struct idr *idp) { int n, id, max; + int bt_mask; struct idr_layer *p; struct idr_layer *pa[MAX_LEVEL]; struct idr_layer **paa = &pa[0]; @@ -462,8 +463,10 @@ void idr_remove_all(struct idr *idp) p = p->ary[(id >> n) & IDR_MASK]; } + bt_mask = id; id += 1 << n; - while (n < fls(id)) { + /* Get the highest bit that the above add changed from 0->1. */ + while (n < fls(id ^ bt_mask)) { if (p) free_layer(p); n += IDR_BITS; @@ -599,7 +602,7 @@ void *idr_get_next(struct idr *idp, int *nextidp) /* find first ent */ n = idp->layers * IDR_BITS; max = 1 << n; - p = rcu_dereference(idp->top); + p = rcu_dereference_raw(idp->top); if (!p) return NULL; @@ -607,7 +610,7 @@ void *idr_get_next(struct idr *idp, int *nextidp) while (n > 0 && p) { n -= IDR_BITS; *paa++ = p; - p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); + p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); } if (p) { @@ -623,7 +626,7 @@ void *idr_get_next(struct idr *idp, int *nextidp) } return NULL; } - +EXPORT_SYMBOL(idr_get_next); /** diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index c0251f4ad08b..da053313ee5c 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -38,12 +38,3 @@ again: return -1; } EXPORT_SYMBOL(iommu_area_alloc); - -unsigned long iommu_num_pages(unsigned long addr, unsigned long len, - unsigned long io_page_size) -{ - unsigned long size = (addr & (io_page_size - 1)) + len; - - return DIV_ROUND_UP(size, io_page_size); -} -EXPORT_SYMBOL(iommu_num_pages); diff --git a/lib/ioremap.c b/lib/ioremap.c index 14c6078f17a2..5730ecd3eb66 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -13,10 +13,10 @@ #include <asm/pgtable.h> static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pte_t *pte; - unsigned long pfn; + u64 pfn; pfn = phys_addr >> PAGE_SHIFT; pte = pte_alloc_kernel(pmd, addr); @@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, } static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pmd_t *pmd; unsigned long next; @@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, } static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pud_t *pud; unsigned long next; @@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, } int ioremap_page_range(unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pgd_t *pgd; unsigned long start; diff --git a/lib/kobject.c b/lib/kobject.c index 8115eb1bbf4d..f07c57252e82 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -850,6 +850,121 @@ struct kset *kset_create_and_add(const char *name, } EXPORT_SYMBOL_GPL(kset_create_and_add); + +static DEFINE_SPINLOCK(kobj_ns_type_lock); +static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES]; + +int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) +{ + enum kobj_ns_type type = ops->type; + int error; + + spin_lock(&kobj_ns_type_lock); + + error = -EINVAL; + if (type >= KOBJ_NS_TYPES) + goto out; + + error = -EINVAL; + if (type <= KOBJ_NS_TYPE_NONE) + goto out; + + error = -EBUSY; + if (kobj_ns_ops_tbl[type]) + goto out; + + error = 0; + kobj_ns_ops_tbl[type] = ops; + +out: + spin_unlock(&kobj_ns_type_lock); + return error; +} + +int kobj_ns_type_registered(enum kobj_ns_type type) +{ + int registered = 0; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES)) + registered = kobj_ns_ops_tbl[type] != NULL; + spin_unlock(&kobj_ns_type_lock); + + return registered; +} + +const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent) +{ + const struct kobj_ns_type_operations *ops = NULL; + + if (parent && parent->ktype->child_ns_type) + ops = parent->ktype->child_ns_type(parent); + + return ops; +} + +const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) +{ + return kobj_child_ns_ops(kobj->parent); +} + + +const void *kobj_ns_current(enum kobj_ns_type type) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->current_ns(); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->netlink_ns(sk); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +const void *kobj_ns_initial(enum kobj_ns_type type) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->initial_ns(); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +/* + * kobj_ns_exit - invalidate a namespace tag + * + * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET) + * @ns: the actual namespace being invalidated + * + * This is called when a tag is no longer valid. For instance, + * when a network namespace exits, it uses this helper to + * make sure no sb's sysfs_info points to the now-invalidated + * netns. + */ +void kobj_ns_exit(enum kobj_ns_type type, const void *ns) +{ + sysfs_exit_ns(type, ns); +} + + EXPORT_SYMBOL(kobject_get); EXPORT_SYMBOL(kobject_put); EXPORT_SYMBOL(kobject_del); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 7b48d44ced6e..b93579504dfa 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -19,18 +19,24 @@ #include <linux/kobject.h> #include <linux/module.h> #include <linux/slab.h> - +#include <linux/user_namespace.h> #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <net/sock.h> +#include <net/net_namespace.h> u64 uevent_seqnum; char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; static DEFINE_SPINLOCK(sequence_lock); -#if defined(CONFIG_NET) -static struct sock *uevent_sock; +#ifdef CONFIG_NET +struct uevent_sock { + struct list_head list; + struct sock *sk; +}; +static LIST_HEAD(uevent_sock_list); +static DEFINE_MUTEX(uevent_sock_mutex); #endif /* the strings here must match the enum in include/linux/kobject.h */ @@ -77,6 +83,39 @@ out: return ret; } +#ifdef CONFIG_NET +static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) +{ + struct kobject *kobj = data; + const struct kobj_ns_type_operations *ops; + + ops = kobj_ns_ops(kobj); + if (ops) { + const void *sock_ns, *ns; + ns = kobj->ktype->namespace(kobj); + sock_ns = ops->netlink_ns(dsk); + return sock_ns != ns; + } + + return 0; +} +#endif + +static int kobj_usermode_filter(struct kobject *kobj) +{ + const struct kobj_ns_type_operations *ops; + + ops = kobj_ns_ops(kobj); + if (ops) { + const void *init_ns, *ns; + ns = kobj->ktype->namespace(kobj); + init_ns = ops->initial_ns(); + return ns != init_ns; + } + + return 0; +} + /** * kobject_uevent_env - send an uevent with environmental data * @@ -100,6 +139,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, u64 seq; int i = 0; int retval = 0; +#ifdef CONFIG_NET + struct uevent_sock *ue_sk; +#endif pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); @@ -211,7 +253,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, #if defined(CONFIG_NET) /* send netlink message */ - if (uevent_sock) { + mutex_lock(&uevent_sock_mutex); + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + struct sock *uevent_sock = ue_sk->sk; struct sk_buff *skb; size_t len; @@ -233,18 +277,21 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, } NETLINK_CB(skb).dst_group = 1; - retval = netlink_broadcast(uevent_sock, skb, 0, 1, - GFP_KERNEL); + retval = netlink_broadcast_filtered(uevent_sock, skb, + 0, 1, GFP_KERNEL, + kobj_bcast_filter, + kobj); /* ENOBUFS should be handled in userspace */ if (retval == -ENOBUFS) retval = 0; } else retval = -ENOMEM; } + mutex_unlock(&uevent_sock_mutex); #endif /* call uevent_helper, usually only enabled during early boot */ - if (uevent_helper[0]) { + if (uevent_helper[0] && !kobj_usermode_filter(kobj)) { char *argv [3]; argv [0] = uevent_helper; @@ -320,18 +367,59 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...) EXPORT_SYMBOL_GPL(add_uevent_var); #if defined(CONFIG_NET) -static int __init kobject_uevent_init(void) +static int uevent_net_init(struct net *net) { - uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT, - 1, NULL, NULL, THIS_MODULE); - if (!uevent_sock) { + struct uevent_sock *ue_sk; + + ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL); + if (!ue_sk) + return -ENOMEM; + + ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, + 1, NULL, NULL, THIS_MODULE); + if (!ue_sk->sk) { printk(KERN_ERR "kobject_uevent: unable to create netlink socket!\n"); + kfree(ue_sk); return -ENODEV; } - netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); + mutex_lock(&uevent_sock_mutex); + list_add_tail(&ue_sk->list, &uevent_sock_list); + mutex_unlock(&uevent_sock_mutex); return 0; } +static void uevent_net_exit(struct net *net) +{ + struct uevent_sock *ue_sk; + + mutex_lock(&uevent_sock_mutex); + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + if (sock_net(ue_sk->sk) == net) + goto found; + } + mutex_unlock(&uevent_sock_mutex); + return; + +found: + list_del(&ue_sk->list); + mutex_unlock(&uevent_sock_mutex); + + netlink_kernel_release(ue_sk->sk); + kfree(ue_sk); +} + +static struct pernet_operations uevent_net_ops = { + .init = uevent_net_init, + .exit = uevent_net_exit, +}; + +static int __init kobject_uevent_init(void) +{ + netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); + return register_pernet_subsys(&uevent_net_ops); +} + + postcore_initcall(kobject_uevent_init); #endif diff --git a/lib/kref.c b/lib/kref.c index 6d19f690380b..d3d227a08a4b 100644 --- a/lib/kref.c +++ b/lib/kref.c @@ -16,23 +16,13 @@ #include <linux/slab.h> /** - * kref_set - initialize object and set refcount to requested number. - * @kref: object in question. - * @num: initial reference counter - */ -void kref_set(struct kref *kref, int num) -{ - atomic_set(&kref->refcount, num); - smp_mb(); -} - -/** * kref_init - initialize object. * @kref: object in question. */ void kref_init(struct kref *kref) { - kref_set(kref, 1); + atomic_set(&kref->refcount, 1); + smp_mb(); } /** @@ -72,7 +62,6 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref)) return 0; } -EXPORT_SYMBOL(kref_set); EXPORT_SYMBOL(kref_init); EXPORT_SYMBOL(kref_get); EXPORT_SYMBOL(kref_put); diff --git a/lib/list_debug.c b/lib/list_debug.c index 1a39f4e3ae1f..344c710d16ca 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -43,6 +43,12 @@ EXPORT_SYMBOL(__list_add); */ void list_del(struct list_head *entry) { + WARN(entry->next == LIST_POISON1, + "list_del corruption, next is LIST_POISON1 (%p)\n", + LIST_POISON1); + WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2, + "list_del corruption, prev is LIST_POISON2 (%p)\n", + LIST_POISON2); WARN(entry->prev->next != entry, "list_del corruption. prev->next should be %p, " "but was %p\n", entry, entry->prev->next); diff --git a/lib/lmb.c b/lib/lmb.c deleted file mode 100644 index b1fc52606524..000000000000 --- a/lib/lmb.c +++ /dev/null @@ -1,541 +0,0 @@ -/* - * Procedures for maintaining information about logical memory blocks. - * - * Peter Bergner, IBM Corp. June 2001. - * Copyright (C) 2001 Peter Bergner. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/bitops.h> -#include <linux/lmb.h> - -#define LMB_ALLOC_ANYWHERE 0 - -struct lmb lmb; - -static int lmb_debug; - -static int __init early_lmb(char *p) -{ - if (p && strstr(p, "debug")) - lmb_debug = 1; - return 0; -} -early_param("lmb", early_lmb); - -static void lmb_dump(struct lmb_region *region, char *name) -{ - unsigned long long base, size; - int i; - - pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); - - for (i = 0; i < region->cnt; i++) { - base = region->region[i].base; - size = region->region[i].size; - - pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n", - name, i, base, base + size - 1, size); - } -} - -void lmb_dump_all(void) -{ - if (!lmb_debug) - return; - - pr_info("LMB configuration:\n"); - pr_info(" rmo_size = 0x%llx\n", (unsigned long long)lmb.rmo_size); - pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size); - - lmb_dump(&lmb.memory, "memory"); - lmb_dump(&lmb.reserved, "reserved"); -} - -static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2, - u64 size2) -{ - return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); -} - -static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) -{ - if (base2 == base1 + size1) - return 1; - else if (base1 == base2 + size2) - return -1; - - return 0; -} - -static long lmb_regions_adjacent(struct lmb_region *rgn, - unsigned long r1, unsigned long r2) -{ - u64 base1 = rgn->region[r1].base; - u64 size1 = rgn->region[r1].size; - u64 base2 = rgn->region[r2].base; - u64 size2 = rgn->region[r2].size; - - return lmb_addrs_adjacent(base1, size1, base2, size2); -} - -static void lmb_remove_region(struct lmb_region *rgn, unsigned long r) -{ - unsigned long i; - - for (i = r; i < rgn->cnt - 1; i++) { - rgn->region[i].base = rgn->region[i + 1].base; - rgn->region[i].size = rgn->region[i + 1].size; - } - rgn->cnt--; -} - -/* Assumption: base addr of region 1 < base addr of region 2 */ -static void lmb_coalesce_regions(struct lmb_region *rgn, - unsigned long r1, unsigned long r2) -{ - rgn->region[r1].size += rgn->region[r2].size; - lmb_remove_region(rgn, r2); -} - -void __init lmb_init(void) -{ - /* Create a dummy zero size LMB which will get coalesced away later. - * This simplifies the lmb_add() code below... - */ - lmb.memory.region[0].base = 0; - lmb.memory.region[0].size = 0; - lmb.memory.cnt = 1; - - /* Ditto. */ - lmb.reserved.region[0].base = 0; - lmb.reserved.region[0].size = 0; - lmb.reserved.cnt = 1; -} - -void __init lmb_analyze(void) -{ - int i; - - lmb.memory.size = 0; - - for (i = 0; i < lmb.memory.cnt; i++) - lmb.memory.size += lmb.memory.region[i].size; -} - -static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size) -{ - unsigned long coalesced = 0; - long adjacent, i; - - if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) { - rgn->region[0].base = base; - rgn->region[0].size = size; - return 0; - } - - /* First try and coalesce this LMB with another. */ - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; - - if ((rgnbase == base) && (rgnsize == size)) - /* Already have this region, so we're done */ - return 0; - - adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize); - if (adjacent > 0) { - rgn->region[i].base -= size; - rgn->region[i].size += size; - coalesced++; - break; - } else if (adjacent < 0) { - rgn->region[i].size += size; - coalesced++; - break; - } - } - - if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) { - lmb_coalesce_regions(rgn, i, i+1); - coalesced++; - } - - if (coalesced) - return coalesced; - if (rgn->cnt >= MAX_LMB_REGIONS) - return -1; - - /* Couldn't coalesce the LMB, so add it to the sorted table. */ - for (i = rgn->cnt - 1; i >= 0; i--) { - if (base < rgn->region[i].base) { - rgn->region[i+1].base = rgn->region[i].base; - rgn->region[i+1].size = rgn->region[i].size; - } else { - rgn->region[i+1].base = base; - rgn->region[i+1].size = size; - break; - } - } - - if (base < rgn->region[0].base) { - rgn->region[0].base = base; - rgn->region[0].size = size; - } - rgn->cnt++; - - return 0; -} - -long lmb_add(u64 base, u64 size) -{ - struct lmb_region *_rgn = &lmb.memory; - - /* On pSeries LPAR systems, the first LMB is our RMO region. */ - if (base == 0) - lmb.rmo_size = size; - - return lmb_add_region(_rgn, base, size); - -} - -static long __lmb_remove(struct lmb_region *rgn, u64 base, u64 size) -{ - u64 rgnbegin, rgnend; - u64 end = base + size; - int i; - - rgnbegin = rgnend = 0; /* supress gcc warnings */ - - /* Find the region where (base, size) belongs to */ - for (i=0; i < rgn->cnt; i++) { - rgnbegin = rgn->region[i].base; - rgnend = rgnbegin + rgn->region[i].size; - - if ((rgnbegin <= base) && (end <= rgnend)) - break; - } - - /* Didn't find the region */ - if (i == rgn->cnt) - return -1; - - /* Check to see if we are removing entire region */ - if ((rgnbegin == base) && (rgnend == end)) { - lmb_remove_region(rgn, i); - return 0; - } - - /* Check to see if region is matching at the front */ - if (rgnbegin == base) { - rgn->region[i].base = end; - rgn->region[i].size -= size; - return 0; - } - - /* Check to see if the region is matching at the end */ - if (rgnend == end) { - rgn->region[i].size -= size; - return 0; - } - - /* - * We need to split the entry - adjust the current one to the - * beginging of the hole and add the region after hole. - */ - rgn->region[i].size = base - rgn->region[i].base; - return lmb_add_region(rgn, end, rgnend - end); -} - -long lmb_remove(u64 base, u64 size) -{ - return __lmb_remove(&lmb.memory, base, size); -} - -long __init lmb_free(u64 base, u64 size) -{ - return __lmb_remove(&lmb.reserved, base, size); -} - -long __init lmb_reserve(u64 base, u64 size) -{ - struct lmb_region *_rgn = &lmb.reserved; - - BUG_ON(0 == size); - - return lmb_add_region(_rgn, base, size); -} - -long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size) -{ - unsigned long i; - - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; - if (lmb_addrs_overlap(base, size, rgnbase, rgnsize)) - break; - } - - return (i < rgn->cnt) ? i : -1; -} - -static u64 lmb_align_down(u64 addr, u64 size) -{ - return addr & ~(size - 1); -} - -static u64 lmb_align_up(u64 addr, u64 size) -{ - return (addr + (size - 1)) & ~(size - 1); -} - -static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end, - u64 size, u64 align) -{ - u64 base, res_base; - long j; - - base = lmb_align_down((end - size), align); - while (start <= base) { - j = lmb_overlaps_region(&lmb.reserved, base, size); - if (j < 0) { - /* this area isn't reserved, take it */ - if (lmb_add_region(&lmb.reserved, base, size) < 0) - base = ~(u64)0; - return base; - } - res_base = lmb.reserved.region[j].base; - if (res_base < size) - break; - base = lmb_align_down(res_base - size, align); - } - - return ~(u64)0; -} - -static u64 __init lmb_alloc_nid_region(struct lmb_property *mp, - u64 (*nid_range)(u64, u64, int *), - u64 size, u64 align, int nid) -{ - u64 start, end; - - start = mp->base; - end = start + mp->size; - - start = lmb_align_up(start, align); - while (start < end) { - u64 this_end; - int this_nid; - - this_end = nid_range(start, end, &this_nid); - if (this_nid == nid) { - u64 ret = lmb_alloc_nid_unreserved(start, this_end, - size, align); - if (ret != ~(u64)0) - return ret; - } - start = this_end; - } - - return ~(u64)0; -} - -u64 __init lmb_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64 start, u64 end, int *nid)) -{ - struct lmb_region *mem = &lmb.memory; - int i; - - BUG_ON(0 == size); - - size = lmb_align_up(size, align); - - for (i = 0; i < mem->cnt; i++) { - u64 ret = lmb_alloc_nid_region(&mem->region[i], - nid_range, - size, align, nid); - if (ret != ~(u64)0) - return ret; - } - - return lmb_alloc(size, align); -} - -u64 __init lmb_alloc(u64 size, u64 align) -{ - return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); -} - -u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr) -{ - u64 alloc; - - alloc = __lmb_alloc_base(size, align, max_addr); - - if (alloc == 0) - panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", - (unsigned long long) size, (unsigned long long) max_addr); - - return alloc; -} - -u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr) -{ - long i, j; - u64 base = 0; - u64 res_base; - - BUG_ON(0 == size); - - size = lmb_align_up(size, align); - - /* On some platforms, make sure we allocate lowmem */ - /* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */ - if (max_addr == LMB_ALLOC_ANYWHERE) - max_addr = LMB_REAL_LIMIT; - - for (i = lmb.memory.cnt - 1; i >= 0; i--) { - u64 lmbbase = lmb.memory.region[i].base; - u64 lmbsize = lmb.memory.region[i].size; - - if (lmbsize < size) - continue; - if (max_addr == LMB_ALLOC_ANYWHERE) - base = lmb_align_down(lmbbase + lmbsize - size, align); - else if (lmbbase < max_addr) { - base = min(lmbbase + lmbsize, max_addr); - base = lmb_align_down(base - size, align); - } else - continue; - - while (base && lmbbase <= base) { - j = lmb_overlaps_region(&lmb.reserved, base, size); - if (j < 0) { - /* this area isn't reserved, take it */ - if (lmb_add_region(&lmb.reserved, base, size) < 0) - return 0; - return base; - } - res_base = lmb.reserved.region[j].base; - if (res_base < size) - break; - base = lmb_align_down(res_base - size, align); - } - } - return 0; -} - -/* You must call lmb_analyze() before this. */ -u64 __init lmb_phys_mem_size(void) -{ - return lmb.memory.size; -} - -u64 lmb_end_of_DRAM(void) -{ - int idx = lmb.memory.cnt - 1; - - return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); -} - -/* You must call lmb_analyze() after this. */ -void __init lmb_enforce_memory_limit(u64 memory_limit) -{ - unsigned long i; - u64 limit; - struct lmb_property *p; - - if (!memory_limit) - return; - - /* Truncate the lmb regions to satisfy the memory limit. */ - limit = memory_limit; - for (i = 0; i < lmb.memory.cnt; i++) { - if (limit > lmb.memory.region[i].size) { - limit -= lmb.memory.region[i].size; - continue; - } - - lmb.memory.region[i].size = limit; - lmb.memory.cnt = i + 1; - break; - } - - if (lmb.memory.region[0].size < lmb.rmo_size) - lmb.rmo_size = lmb.memory.region[0].size; - - memory_limit = lmb_end_of_DRAM(); - - /* And truncate any reserves above the limit also. */ - for (i = 0; i < lmb.reserved.cnt; i++) { - p = &lmb.reserved.region[i]; - - if (p->base > memory_limit) - p->size = 0; - else if ((p->base + p->size) > memory_limit) - p->size = memory_limit - p->base; - - if (p->size == 0) { - lmb_remove_region(&lmb.reserved, i); - i--; - } - } -} - -int __init lmb_is_reserved(u64 addr) -{ - int i; - - for (i = 0; i < lmb.reserved.cnt; i++) { - u64 upper = lmb.reserved.region[i].base + - lmb.reserved.region[i].size - 1; - if ((addr >= lmb.reserved.region[i].base) && (addr <= upper)) - return 1; - } - return 0; -} - -int lmb_is_region_reserved(u64 base, u64 size) -{ - return lmb_overlaps_region(&lmb.reserved, base, size); -} - -/* - * Given a <base, len>, find which memory regions belong to this range. - * Adjust the request and return a contiguous chunk. - */ -int lmb_find(struct lmb_property *res) -{ - int i; - u64 rstart, rend; - - rstart = res->base; - rend = rstart + res->size - 1; - - for (i = 0; i < lmb.memory.cnt; i++) { - u64 start = lmb.memory.region[i].base; - u64 end = start + lmb.memory.region[i].size - 1; - - if (start > rend) - return -1; - - if ((end >= rstart) && (start < rend)) { - /* adjust the request */ - if (rstart < start) - rstart = start; - if (rend > end) - rend = end; - res->base = rstart; - res->size = rend - rstart + 1; - return 0; - } - } - return -1; -} diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index aeaa6d734447..ec9048e74f44 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -137,6 +137,33 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb, return NOTIFY_OK; } +/* + * Compare counter against given value. + * Return 1 if greater, 0 if equal and -1 if less + */ +int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +{ + s64 count; + + count = percpu_counter_read(fbc); + /* Check to see if rough count will be sufficient for comparison */ + if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { + if (count > rhs) + return 1; + else + return -1; + } + /* Need to use precise count */ + count = percpu_counter_sum(fbc); + if (count > rhs) + return 1; + else if (count < rhs) + return -1; + else + return 0; +} +EXPORT_SYMBOL(percpu_counter_compare); + static int __init percpu_counter_startup(void) { compute_batch_value(); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 2a087e0f9863..e907858498a6 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -609,6 +609,100 @@ int radix_tree_tag_get(struct radix_tree_root *root, EXPORT_SYMBOL(radix_tree_tag_get); /** + * radix_tree_range_tag_if_tagged - for each item in given range set given + * tag if item has another tag set + * @root: radix tree root + * @first_indexp: pointer to a starting index of a range to scan + * @last_index: last index of a range to scan + * @nr_to_tag: maximum number items to tag + * @iftag: tag index to test + * @settag: tag index to set if tested tag is set + * + * This function scans range of radix tree from first_index to last_index + * (inclusive). For each item in the range if iftag is set, the function sets + * also settag. The function stops either after tagging nr_to_tag items or + * after reaching last_index. + * + * The function returns number of leaves where the tag was set and sets + * *first_indexp to the first unscanned index. + */ +unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, + unsigned long *first_indexp, unsigned long last_index, + unsigned long nr_to_tag, + unsigned int iftag, unsigned int settag) +{ + unsigned int height = root->height, shift; + unsigned long tagged = 0, index = *first_indexp; + struct radix_tree_node *open_slots[height], *slot; + + last_index = min(last_index, radix_tree_maxindex(height)); + if (index > last_index) + return 0; + if (!nr_to_tag) + return 0; + if (!root_tag_get(root, iftag)) { + *first_indexp = last_index + 1; + return 0; + } + if (height == 0) { + *first_indexp = last_index + 1; + root_tag_set(root, settag); + return 1; + } + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + slot = radix_tree_indirect_to_ptr(root->rnode); + + for (;;) { + int offset; + + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + if (!slot->slots[offset]) + goto next; + if (!tag_get(slot, iftag, offset)) + goto next; + tag_set(slot, settag, offset); + if (height == 1) { + tagged++; + goto next; + } + /* Go down one level */ + height--; + shift -= RADIX_TREE_MAP_SHIFT; + open_slots[height] = slot; + slot = slot->slots[offset]; + continue; +next: + /* Go to next item at level determined by 'shift' */ + index = ((index >> shift) + 1) << shift; + if (index > last_index) + break; + if (tagged >= nr_to_tag) + break; + while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { + /* + * We've fully scanned this node. Go up. Because + * last_index is guaranteed to be in the tree, what + * we do below cannot wander astray. + */ + slot = open_slots[height]; + height++; + shift += RADIX_TREE_MAP_SHIFT; + } + } + /* + * The iftag must have been set somewhere because otherwise + * we would return immediated at the beginning of the function + */ + root_tag_set(root, settag); + *first_indexp = index; + + return tagged; +} +EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); + + +/** * radix_tree_next_hole - find the next hole (not-present entry) * @root: tree root * @index: index key @@ -656,7 +750,7 @@ EXPORT_SYMBOL(radix_tree_next_hole); * * Returns: the index of the hole if found, otherwise returns an index * outside of the set specified (in which case 'index - return >= max_scan' - * will be true). In rare cases of wrap-around, LONG_MAX will be returned. + * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. * * radix_tree_next_hole may be called under rcu_read_lock. However, like * radix_tree_gang_lookup, this will not atomically search a snapshot of @@ -674,7 +768,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root, if (!radix_tree_lookup(root, index)) break; index--; - if (index == LONG_MAX) + if (index == ULONG_MAX) break; } diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile new file mode 100644 index 000000000000..19bf32da644f --- /dev/null +++ b/lib/raid6/Makefile @@ -0,0 +1,78 @@ +obj-$(CONFIG_RAID6_PQ) += raid6_pq.o + +raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ + raid6int1.o raid6int2.o raid6int4.o \ + raid6int8.o raid6int16.o raid6int32.o \ + raid6altivec1.o raid6altivec2.o raid6altivec4.o \ + raid6altivec8.o \ + raid6mmx.o raid6sse1.o raid6sse2.o +hostprogs-y += mktables + +quiet_cmd_unroll = UNROLL $@ + cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ + < $< > $@ || ( rm -f $@ && exit 1 ) + +ifeq ($(CONFIG_ALTIVEC),y) +altivec_flags := -maltivec -mabi=altivec +endif + +targets += raid6int1.c +$(obj)/raid6int1.c: UNROLL := 1 +$(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += raid6int2.c +$(obj)/raid6int2.c: UNROLL := 2 +$(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += raid6int4.c +$(obj)/raid6int4.c: UNROLL := 4 +$(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += raid6int8.c +$(obj)/raid6int8.c: UNROLL := 8 +$(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += raid6int16.c +$(obj)/raid6int16.c: UNROLL := 16 +$(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += raid6int32.c +$(obj)/raid6int32.c: UNROLL := 32 +$(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_raid6altivec1.o += $(altivec_flags) +targets += raid6altivec1.c +$(obj)/raid6altivec1.c: UNROLL := 1 +$(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_raid6altivec2.o += $(altivec_flags) +targets += raid6altivec2.c +$(obj)/raid6altivec2.c: UNROLL := 2 +$(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_raid6altivec4.o += $(altivec_flags) +targets += raid6altivec4.c +$(obj)/raid6altivec4.c: UNROLL := 4 +$(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_raid6altivec8.o += $(altivec_flags) +targets += raid6altivec8.c +$(obj)/raid6altivec8.c: UNROLL := 8 +$(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +quiet_cmd_mktable = TABLE $@ + cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) + +targets += raid6tables.c +$(obj)/raid6tables.c: $(obj)/mktables FORCE + $(call if_changed,mktable) diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c new file mode 100644 index 000000000000..3b1500843bba --- /dev/null +++ b/lib/raid6/mktables.c @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * mktables.c + * + * Make RAID-6 tables. This is a host user space program to be run at + * compile time. + */ + +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <stdlib.h> +#include <time.h> + +static uint8_t gfmul(uint8_t a, uint8_t b) +{ + uint8_t v = 0; + + while (b) { + if (b & 1) + v ^= a; + a = (a << 1) ^ (a & 0x80 ? 0x1d : 0); + b >>= 1; + } + + return v; +} + +static uint8_t gfpow(uint8_t a, int b) +{ + uint8_t v = 1; + + b %= 255; + if (b < 0) + b += 255; + + while (b) { + if (b & 1) + v = gfmul(v, a); + a = gfmul(a, a); + b >>= 1; + } + + return v; +} + +int main(int argc, char *argv[]) +{ + int i, j, k; + uint8_t v; + uint8_t exptbl[256], invtbl[256]; + + printf("#include <linux/raid/pq.h>\n"); + + /* Compute multiplication table */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfmul[256][256] =\n" + "{\n"); + for (i = 0; i < 256; i++) { + printf("\t{\n"); + for (j = 0; j < 256; j += 8) { + printf("\t\t"); + for (k = 0; k < 8; k++) + printf("0x%02x,%c", gfmul(i, j + k), + (k == 7) ? '\n' : ' '); + } + printf("\t},\n"); + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfmul);\n"); + printf("#endif\n"); + + /* Compute power-of-2 table (exponent) */ + v = 1; + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfexp[256] =\n" "{\n"); + for (i = 0; i < 256; i += 8) { + printf("\t"); + for (j = 0; j < 8; j++) { + exptbl[i + j] = v; + printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); + v = gfmul(v, 2); + if (v == 1) + v = 0; /* For entry 255, not a real entry */ + } + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfexp);\n"); + printf("#endif\n"); + + /* Compute inverse table x^-1 == x^254 */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfinv[256] =\n" "{\n"); + for (i = 0; i < 256; i += 8) { + printf("\t"); + for (j = 0; j < 8; j++) { + invtbl[i + j] = v = gfpow(i + j, 254); + printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); + } + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfinv);\n"); + printf("#endif\n"); + + /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfexi[256] =\n" "{\n"); + for (i = 0; i < 256; i += 8) { + printf("\t"); + for (j = 0; j < 8; j++) + printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1], + (j == 7) ? '\n' : ' '); + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfexi);\n"); + printf("#endif\n"); + + return 0; +} diff --git a/lib/raid6/raid6algos.c b/lib/raid6/raid6algos.c new file mode 100644 index 000000000000..1f8784bfd44d --- /dev/null +++ b/lib/raid6/raid6algos.c @@ -0,0 +1,154 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6algos.c + * + * Algorithm list and algorithm selection for RAID-6 + */ + +#include <linux/raid/pq.h> +#include <linux/gfp.h> +#ifndef __KERNEL__ +#include <sys/mman.h> +#include <stdio.h> +#else +#if !RAID6_USE_EMPTY_ZERO_PAGE +/* In .bss so it's zeroed */ +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); +EXPORT_SYMBOL(raid6_empty_zero_page); +#endif +#endif + +struct raid6_calls raid6_call; +EXPORT_SYMBOL_GPL(raid6_call); + +const struct raid6_calls * const raid6_algos[] = { + &raid6_intx1, + &raid6_intx2, + &raid6_intx4, + &raid6_intx8, +#if defined(__ia64__) + &raid6_intx16, + &raid6_intx32, +#endif +#if defined(__i386__) && !defined(__arch_um__) + &raid6_mmxx1, + &raid6_mmxx2, + &raid6_sse1x1, + &raid6_sse1x2, + &raid6_sse2x1, + &raid6_sse2x2, +#endif +#if defined(__x86_64__) && !defined(__arch_um__) + &raid6_sse2x1, + &raid6_sse2x2, + &raid6_sse2x4, +#endif +#ifdef CONFIG_ALTIVEC + &raid6_altivec1, + &raid6_altivec2, + &raid6_altivec4, + &raid6_altivec8, +#endif + NULL +}; + +#ifdef __KERNEL__ +#define RAID6_TIME_JIFFIES_LG2 4 +#else +/* Need more time to be stable in userspace */ +#define RAID6_TIME_JIFFIES_LG2 9 +#define time_before(x, y) ((x) < (y)) +#endif + +/* Try to pick the best algorithm */ +/* This code uses the gfmul table as convenient data set to abuse */ + +int __init raid6_select_algo(void) +{ + const struct raid6_calls * const * algo; + const struct raid6_calls * best; + char *syndromes; + void *dptrs[(65536/PAGE_SIZE)+2]; + int i, disks; + unsigned long perf, bestperf; + int bestprefer; + unsigned long j0, j1; + + disks = (65536/PAGE_SIZE)+2; + for ( i = 0 ; i < disks-2 ; i++ ) { + dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; + } + + /* Normal code - use a 2-page allocation to avoid D$ conflict */ + syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); + + if ( !syndromes ) { + printk("raid6: Yikes! No memory available.\n"); + return -ENOMEM; + } + + dptrs[disks-2] = syndromes; + dptrs[disks-1] = syndromes + PAGE_SIZE; + + bestperf = 0; bestprefer = 0; best = NULL; + + for ( algo = raid6_algos ; *algo ; algo++ ) { + if ( !(*algo)->valid || (*algo)->valid() ) { + perf = 0; + + preempt_disable(); + j0 = jiffies; + while ( (j1 = jiffies) == j0 ) + cpu_relax(); + while (time_before(jiffies, + j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { + (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); + perf++; + } + preempt_enable(); + + if ( (*algo)->prefer > bestprefer || + ((*algo)->prefer == bestprefer && + perf > bestperf) ) { + best = *algo; + bestprefer = best->prefer; + bestperf = perf; + } + printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, + (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + } + } + + if (best) { + printk("raid6: using algorithm %s (%ld MB/s)\n", + best->name, + (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + raid6_call = *best; + } else + printk("raid6: Yikes! No algorithm found!\n"); + + free_pages((unsigned long)syndromes, 1); + + return best ? 0 : -EINVAL; +} + +static void raid6_exit(void) +{ + do { } while (0); +} + +subsys_initcall(raid6_select_algo); +module_exit(raid6_exit); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RAID6 Q-syndrome calculations"); diff --git a/lib/raid6/raid6altivec.uc b/lib/raid6/raid6altivec.uc new file mode 100644 index 000000000000..2654d5c854be --- /dev/null +++ b/lib/raid6/raid6altivec.uc @@ -0,0 +1,130 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6altivec$#.c + * + * $#-way unrolled portable integer math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + * + * <benh> hpa: in process, + * you can just "steal" the vec unit with enable_kernel_altivec() (but + * bracked this with preempt_disable/enable or in a lock) + */ + +#include <linux/raid/pq.h> + +#ifdef CONFIG_ALTIVEC + +#include <altivec.h> +#ifdef __KERNEL__ +# include <asm/system.h> +# include <asm/cputable.h> +#endif + +/* + * This is the C data type to use. We use a vector of + * signed char so vec_cmpgt() will generate the right + * instruction. + */ + +typedef vector signed char unative_t; + +#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) +#define NSIZE sizeof(unative_t) + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ + return vec_add(v,v); +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ + unative_t zv = NBYTES(0); + + /* vec_cmpgt returns a vector bool char; thus the need for the cast */ + return (unative_t)vec_cmpgt(zv, v); +} + + +/* This is noinline to make damned sure that gcc doesn't move any of the + Altivec code around the enable/disable code */ +static void noinline +raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + unative_t x1d = NBYTES(0x1d); + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + wp$$ = vec_xor(wp$$, wd$$); + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ = vec_and(w2$$, x1d); + w1$$ = vec_xor(w1$$, w2$$); + wq$$ = vec_xor(w1$$, wd$$); + } + *(unative_t *)&p[d+NSIZE*$$] = wp$$; + *(unative_t *)&q[d+NSIZE*$$] = wq$$; + } +} + +static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + preempt_disable(); + enable_kernel_altivec(); + + raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs); + + preempt_enable(); +} + +int raid6_have_altivec(void); +#if $# == 1 +int raid6_have_altivec(void) +{ + /* This assumes either all CPUs have Altivec or none does */ +# ifdef __KERNEL__ + return cpu_has_feature(CPU_FTR_ALTIVEC); +# else + return 1; +# endif +} +#endif + +const struct raid6_calls raid6_altivec$# = { + raid6_altivec$#_gen_syndrome, + raid6_have_altivec, + "altivecx$#", + 0 +}; + +#endif /* CONFIG_ALTIVEC */ diff --git a/lib/raid6/raid6int.uc b/lib/raid6/raid6int.uc new file mode 100644 index 000000000000..d1e276a14fab --- /dev/null +++ b/lib/raid6/raid6int.uc @@ -0,0 +1,117 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6int$#.c + * + * $#-way unrolled portable integer math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + */ + +#include <linux/raid/pq.h> + +/* + * This is the C data type to use + */ + +/* Change this from BITS_PER_LONG if there is something better... */ +#if BITS_PER_LONG == 64 +# define NBYTES(x) ((x) * 0x0101010101010101UL) +# define NSIZE 8 +# define NSHIFT 3 +# define NSTRING "64" +typedef u64 unative_t; +#else +# define NBYTES(x) ((x) * 0x01010101U) +# define NSIZE 4 +# define NSHIFT 2 +# define NSTRING "32" +typedef u32 unative_t; +#endif + + + +/* + * IA-64 wants insane amounts of unrolling. On other architectures that + * is just a waste of space. + */ +#if ($# <= 8) || defined(__ia64__) + + +/* + * These sub-operations are separate inlines since they can sometimes be + * specially optimized using architecture-specific hacks. + */ + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ + unative_t vv; + + vv = (v << 1) & NBYTES(0xfe); + return vv; +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ + unative_t vv; + + vv = v & NBYTES(0x80); + vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ + return vv; +} + + +static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + wp$$ ^= wd$$; + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ &= NBYTES(0x1d); + w1$$ ^= w2$$; + wq$$ = w1$$ ^ wd$$; + } + *(unative_t *)&p[d+NSIZE*$$] = wp$$; + *(unative_t *)&q[d+NSIZE*$$] = wq$$; + } +} + +const struct raid6_calls raid6_intx$# = { + raid6_int$#_gen_syndrome, + NULL, /* always valid */ + "int" NSTRING "x$#", + 0 +}; + +#endif diff --git a/lib/raid6/raid6mmx.c b/lib/raid6/raid6mmx.c new file mode 100644 index 000000000000..e7f6c13132bf --- /dev/null +++ b/lib/raid6/raid6mmx.c @@ -0,0 +1,142 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6mmx.c + * + * MMX implementation of RAID-6 syndrome functions + */ + +#if defined(__i386__) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "raid6x86.h" + +/* Shared with raid6sse1.c */ +const struct raid6_mmx_constants { + u64 x1d; +} raid6_mmx_constants = { + 0x1d1d1d1d1d1d1d1dULL, +}; + +static int raid6_have_mmx(void) +{ + /* Not really "boot_cpu" but "all_cpus" */ + return boot_cpu_has(X86_FEATURE_MMX); +} + +/* + * Plain MMX implementation + */ +static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 8 ) { + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm6,%mm2"); + asm volatile("pxor %mm6,%mm4"); + } + asm volatile("movq %%mm2,%0" : "=m" (p[d])); + asm volatile("pxor %mm2,%mm2"); + asm volatile("movq %%mm4,%0" : "=m" (q[d])); + asm volatile("pxor %mm4,%mm4"); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_mmxx1 = { + raid6_mmx1_gen_syndrome, + raid6_have_mmx, + "mmxx1", + 0 +}; + +/* + * Unrolled-by-2 MMX implementation + */ +static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + asm volatile("pxor %mm7,%mm7"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + asm volatile("movq %mm3,%mm6"); /* Q[1] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("pcmpgtb %mm6,%mm7"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("paddb %mm6,%mm6"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pand %mm0,%mm7"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); + asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); + asm volatile("pxor %mm5,%mm2"); + asm volatile("pxor %mm7,%mm3"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm7,%mm7"); + } + asm volatile("movq %%mm2,%0" : "=m" (p[d])); + asm volatile("movq %%mm3,%0" : "=m" (p[d+8])); + asm volatile("movq %%mm4,%0" : "=m" (q[d])); + asm volatile("movq %%mm6,%0" : "=m" (q[d+8])); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_mmxx2 = { + raid6_mmx2_gen_syndrome, + raid6_have_mmx, + "mmxx2", + 0 +}; + +#endif diff --git a/lib/raid6/raid6recov.c b/lib/raid6/raid6recov.c new file mode 100644 index 000000000000..2609f00e0d61 --- /dev/null +++ b/lib/raid6/raid6recov.c @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6recov.c + * + * RAID-6 data recovery in dual failure mode. In single failure mode, + * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct + * the syndrome.) + */ + +#include <linux/raid/pq.h> + +/* Recover two failed data blocks. */ +void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, + void **ptrs) +{ + u8 *p, *q, *dp, *dq; + u8 px, qx, db; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data pages + Use the dead data pages as temporary storage for + delta p and delta q */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks-2] = p; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; + + /* Now do it... */ + while ( bytes-- ) { + px = *p ^ *dp; + qx = qmul[*q ^ *dq]; + *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */ + *dp++ = db ^ px; /* Reconstructed A */ + p++; q++; + } +} +EXPORT_SYMBOL_GPL(raid6_2data_recov); + +/* Recover failure of one data block plus the P block */ +void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data page + Use the dead data page as temporary storage for delta q */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + /* Now do it... */ + while ( bytes-- ) { + *p++ ^= *dq = qmul[*q ^ *dq]; + q++; dq++; + } +} +EXPORT_SYMBOL_GPL(raid6_datap_recov); + +#ifndef __KERNEL__ +/* Testing only */ + +/* Recover two failed blocks. */ +void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) +{ + if ( faila > failb ) { + int tmp = faila; + faila = failb; + failb = tmp; + } + + if ( failb == disks-1 ) { + if ( faila == disks-2 ) { + /* P+Q failure. Just rebuild the syndrome. */ + raid6_call.gen_syndrome(disks, bytes, ptrs); + } else { + /* data+Q failure. Reconstruct data from P, + then rebuild syndrome. */ + /* NOT IMPLEMENTED - equivalent to RAID-5 */ + } + } else { + if ( failb == disks-2 ) { + /* data+P failure. */ + raid6_datap_recov(disks, bytes, faila, ptrs); + } else { + /* data+data failure. */ + raid6_2data_recov(disks, bytes, faila, failb, ptrs); + } + } +} + +#endif diff --git a/lib/raid6/raid6sse1.c b/lib/raid6/raid6sse1.c new file mode 100644 index 000000000000..b274dd5eab8f --- /dev/null +++ b/lib/raid6/raid6sse1.c @@ -0,0 +1,162 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6sse1.c + * + * SSE-1/MMXEXT implementation of RAID-6 syndrome functions + * + * This is really an MMX implementation, but it requires SSE-1 or + * AMD MMXEXT for prefetch support and a few other features. The + * support for nontemporal memory accesses is enough to make this + * worthwhile as a separate implementation. + */ + +#if defined(__i386__) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "raid6x86.h" + +/* Defined in raid6mmx.c */ +extern const struct raid6_mmx_constants { + u64 x1d; +} raid6_mmx_constants; + +static int raid6_have_sse1_or_mmxext(void) +{ + /* Not really boot_cpu but "all_cpus" */ + return boot_cpu_has(X86_FEATURE_MMX) && + (boot_cpu_has(X86_FEATURE_XMM) || + boot_cpu_has(X86_FEATURE_MMXEXT)); +} + +/* + * Plain SSE1 implementation + */ +static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 8 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); + for ( z = z0-2 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm6,%mm2"); + asm volatile("pxor %mm6,%mm4"); + asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); + } + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm6,%mm2"); + asm volatile("pxor %mm6,%mm4"); + + asm volatile("movntq %%mm2,%0" : "=m" (p[d])); + asm volatile("movntq %%mm4,%0" : "=m" (q[d])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse1x1 = { + raid6_sse11_gen_syndrome, + raid6_have_sse1_or_mmxext, + "sse1x1", + 1 /* Has cache hints */ +}; + +/* + * Unrolled-by-2 SSE1 implementation + */ +static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + asm volatile("pxor %mm7,%mm7"); /* Zero temp */ + + /* We uniformly assume a single prefetch covers at least 16 bytes */ + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + asm volatile("movq %mm3,%mm6"); /* Q[1] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("pcmpgtb %mm6,%mm7"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("paddb %mm6,%mm6"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pand %mm0,%mm7"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); + asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); + asm volatile("pxor %mm5,%mm2"); + asm volatile("pxor %mm7,%mm3"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm7,%mm7"); + } + asm volatile("movntq %%mm2,%0" : "=m" (p[d])); + asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); + asm volatile("movntq %%mm4,%0" : "=m" (q[d])); + asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); + } + + asm volatile("sfence" : :: "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse1x2 = { + raid6_sse12_gen_syndrome, + raid6_have_sse1_or_mmxext, + "sse1x2", + 1 /* Has cache hints */ +}; + +#endif diff --git a/lib/raid6/raid6sse2.c b/lib/raid6/raid6sse2.c new file mode 100644 index 000000000000..6ed6c6c0389f --- /dev/null +++ b/lib/raid6/raid6sse2.c @@ -0,0 +1,262 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6sse2.c + * + * SSE-2 implementation of RAID-6 syndrome functions + * + */ + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "raid6x86.h" + +static const struct raid6_sse_constants { + u64 x1d[2]; +} raid6_sse_constants __attribute__((aligned(16))) = { + { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL }, +}; + +static int raid6_have_sse2(void) +{ + /* Not really boot_cpu but "all_cpus" */ + return boot_cpu_has(X86_FEATURE_MMX) && + boot_cpu_has(X86_FEATURE_FXSR) && + boot_cpu_has(X86_FEATURE_XMM) && + boot_cpu_has(X86_FEATURE_XMM2); +} + +/* + * Plain SSE2 implementation + */ +static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); + asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ + asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d])); + for ( z = z0-2 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm6,%xmm2"); + asm volatile("pxor %xmm6,%xmm4"); + asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d])); + } + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm6,%xmm2"); + asm volatile("pxor %xmm6,%xmm4"); + + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("pxor %xmm2,%xmm2"); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("pxor %xmm4,%xmm4"); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x1 = { + raid6_sse21_gen_syndrome, + raid6_have_sse2, + "sse2x1", + 1 /* Has cache hints */ +}; + +/* + * Unrolled-by-2 SSE2 implementation + */ +static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ + asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ + + /* We uniformly assume a single prefetch covers at least 32 bytes */ + for ( d = 0 ; d < bytes ; d += 32 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */ + asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ + asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + } + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x2 = { + raid6_sse22_gen_syndrome, + raid6_have_sse2, + "sse2x2", + 1 /* Has cache hints */ +}; + +#endif + +#if defined(__x86_64__) && !defined(__arch_um__) + +/* + * Unrolled-by-4 SSE2 implementation + */ +static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); + asm volatile("pxor %xmm2,%xmm2"); /* P[0] */ + asm volatile("pxor %xmm3,%xmm3"); /* P[1] */ + asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */ + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ + asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */ + asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ + asm volatile("pxor %xmm10,%xmm10"); /* P[2] */ + asm volatile("pxor %xmm11,%xmm11"); /* P[3] */ + asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */ + asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */ + asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */ + asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 64 ) { + for ( z = z0 ; z >= 0 ; z-- ) { + /* The second prefetch seems to improve performance... */ + asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); + asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("pcmpgtb %xmm12,%xmm13"); + asm volatile("pcmpgtb %xmm14,%xmm15"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("paddb %xmm12,%xmm12"); + asm volatile("paddb %xmm14,%xmm14"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pand %xmm0,%xmm13"); + asm volatile("pand %xmm0,%xmm15"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); + asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); + asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm13,%xmm10"); + asm volatile("pxor %xmm15,%xmm11"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pxor %xmm13,%xmm13"); + asm volatile("pxor %xmm15,%xmm15"); + } + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("pxor %xmm2,%xmm2"); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); + asm volatile("pxor %xmm3,%xmm3"); + asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); + asm volatile("pxor %xmm10,%xmm10"); + asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); + asm volatile("pxor %xmm11,%xmm11"); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("pxor %xmm4,%xmm4"); + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); + asm volatile("pxor %xmm6,%xmm6"); + asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); + asm volatile("pxor %xmm12,%xmm12"); + asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); + asm volatile("pxor %xmm14,%xmm14"); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x4 = { + raid6_sse24_gen_syndrome, + raid6_have_sse2, + "sse2x4", + 1 /* Has cache hints */ +}; + +#endif diff --git a/lib/raid6/raid6test/Makefile b/lib/raid6/raid6test/Makefile new file mode 100644 index 000000000000..2874cbef529d --- /dev/null +++ b/lib/raid6/raid6test/Makefile @@ -0,0 +1,75 @@ +# +# This is a simple Makefile to test some of the RAID-6 code +# from userspace. +# + +CC = gcc +OPTFLAGS = -O2 # Adjust as desired +CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) +LD = ld +AWK = awk +AR = ar +RANLIB = ranlib + +.c.o: + $(CC) $(CFLAGS) -c -o $@ $< + +%.c: ../%.c + cp -f $< $@ + +%.uc: ../%.uc + cp -f $< $@ + +all: raid6.a raid6test + +raid6.a: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \ + raid6int32.o \ + raid6mmx.o raid6sse1.o raid6sse2.o \ + raid6altivec1.o raid6altivec2.o raid6altivec4.o raid6altivec8.o \ + raid6recov.o raid6algos.o \ + raid6tables.o + rm -f $@ + $(AR) cq $@ $^ + $(RANLIB) $@ + +raid6test: test.c raid6.a + $(CC) $(CFLAGS) -o raid6test $^ + +raid6altivec1.c: raid6altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=1 < raid6altivec.uc > $@ + +raid6altivec2.c: raid6altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=2 < raid6altivec.uc > $@ + +raid6altivec4.c: raid6altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=4 < raid6altivec.uc > $@ + +raid6altivec8.c: raid6altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < raid6altivec.uc > $@ + +raid6int1.c: raid6int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=1 < raid6int.uc > $@ + +raid6int2.c: raid6int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=2 < raid6int.uc > $@ + +raid6int4.c: raid6int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=4 < raid6int.uc > $@ + +raid6int8.c: raid6int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < raid6int.uc > $@ + +raid6int16.c: raid6int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=16 < raid6int.uc > $@ + +raid6int32.c: raid6int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=32 < raid6int.uc > $@ + +raid6tables.c: mktables + ./mktables > raid6tables.c + +clean: + rm -f *.o *.a mktables mktables.c raid6int.uc raid6*.c raid6test + +spotless: clean + rm -f *~ diff --git a/lib/raid6/raid6test/test.c b/lib/raid6/raid6test/test.c new file mode 100644 index 000000000000..7a930318b17d --- /dev/null +++ b/lib/raid6/raid6test/test.c @@ -0,0 +1,124 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6test.c + * + * Test RAID-6 recovery with various algorithms + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <linux/raid/pq.h> + +#define NDISKS 16 /* Including P and Q */ + +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); +struct raid6_calls raid6_call; + +char *dataptrs[NDISKS]; +char data[NDISKS][PAGE_SIZE]; +char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; + +static void makedata(void) +{ + int i, j; + + for (i = 0; i < NDISKS; i++) { + for (j = 0; j < PAGE_SIZE; j++) + data[i][j] = rand(); + + dataptrs[i] = data[i]; + } +} + +static char disk_type(int d) +{ + switch (d) { + case NDISKS-2: + return 'P'; + case NDISKS-1: + return 'Q'; + default: + return 'D'; + } +} + +static int test_disks(int i, int j) +{ + int erra, errb; + + memset(recovi, 0xf0, PAGE_SIZE); + memset(recovj, 0xba, PAGE_SIZE); + + dataptrs[i] = recovi; + dataptrs[j] = recovj; + + raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs); + + erra = memcmp(data[i], recovi, PAGE_SIZE); + errb = memcmp(data[j], recovj, PAGE_SIZE); + + if (i < NDISKS-2 && j == NDISKS-1) { + /* We don't implement the DQ failure scenario, since it's + equivalent to a RAID-5 failure (XOR, then recompute Q) */ + erra = errb = 0; + } else { + printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", + raid6_call.name, + i, disk_type(i), + j, disk_type(j), + (!erra && !errb) ? "OK" : + !erra ? "ERRB" : + !errb ? "ERRA" : "ERRAB"); + } + + dataptrs[i] = data[i]; + dataptrs[j] = data[j]; + + return erra || errb; +} + +int main(int argc, char *argv[]) +{ + const struct raid6_calls *const *algo; + int i, j; + int err = 0; + + makedata(); + + for (algo = raid6_algos; *algo; algo++) { + if (!(*algo)->valid || (*algo)->valid()) { + raid6_call = **algo; + + /* Nuke syndromes */ + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + + /* Generate assumed good syndrome */ + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, + (void **)&dataptrs); + + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + } + printf("\n"); + } + + printf("\n"); + /* Pick the best algorithm test */ + raid6_select_algo(); + + if (err) + printf("\n*** ERRORS FOUND ***\n"); + + return err; +} diff --git a/lib/raid6/raid6x86.h b/lib/raid6/raid6x86.h new file mode 100644 index 000000000000..4c22c1568558 --- /dev/null +++ b/lib/raid6/raid6x86.h @@ -0,0 +1,61 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6x86.h + * + * Definitions common to x86 and x86-64 RAID-6 code only + */ + +#ifndef LINUX_RAID_RAID6X86_H +#define LINUX_RAID_RAID6X86_H + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#ifdef __KERNEL__ /* Real code */ + +#include <asm/i387.h> + +#else /* Dummy code for user space testing */ + +static inline void kernel_fpu_begin(void) +{ +} + +static inline void kernel_fpu_end(void) +{ +} + +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions + * (fast save and restore) */ +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ + +/* Should work well enough on modern CPUs for testing */ +static inline int boot_cpu_has(int flag) +{ + u32 eax = (flag >> 5) ? 0x80000001 : 1; + u32 edx; + + asm volatile("cpuid" + : "+a" (eax), "=d" (edx) + : : "ecx", "ebx"); + + return (edx >> (flag & 31)) & 1; +} + +#endif /* ndef __KERNEL__ */ + +#endif +#endif diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk new file mode 100644 index 000000000000..c6aa03631df8 --- /dev/null +++ b/lib/raid6/unroll.awk @@ -0,0 +1,20 @@ + +# This filter requires one command line option of form -vN=n +# where n must be a decimal number. +# +# Repeat each input line containing $$ n times, replacing $$ with 0...n-1. +# Replace each $# with n, and each $* with a single $. + +BEGIN { + n = N + 0 +} +{ + if (/\$\$/) { rep = n } else { rep = 1 } + for (i = 0; i < rep; ++i) { + tmp = $0 + gsub(/\$\$/, i, tmp) + gsub(/\$\#/, n, tmp) + gsub(/\$\*/, "$", tmp) + print tmp + } +} diff --git a/lib/random32.c b/lib/random32.c index 217d5c4b666d..fc3545a32771 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -39,13 +39,16 @@ #include <linux/jiffies.h> #include <linux/random.h> -struct rnd_state { - u32 s1, s2, s3; -}; - static DEFINE_PER_CPU(struct rnd_state, net_rand_state); -static u32 __random32(struct rnd_state *state) +/** + * prandom32 - seeded pseudo-random number generator. + * @state: pointer to state structure holding seeded state. + * + * This is used for pseudo-randomness with no outside seeding. + * For more random results, use random32(). + */ +u32 prandom32(struct rnd_state *state) { #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) @@ -55,14 +58,7 @@ static u32 __random32(struct rnd_state *state) return (state->s1 ^ state->s2 ^ state->s3); } - -/* - * Handle minimum values for seeds - */ -static inline u32 __seed(u32 x, u32 m) -{ - return (x < m) ? x + m : x; -} +EXPORT_SYMBOL(prandom32); /** * random32 - pseudo random number generator @@ -75,7 +71,7 @@ u32 random32(void) { unsigned long r; struct rnd_state *state = &get_cpu_var(net_rand_state); - r = __random32(state); + r = prandom32(state); put_cpu_var(state); return r; } @@ -118,12 +114,12 @@ static int __init random32_init(void) state->s3 = __seed(LCG(state->s2), 15); /* "warm it up" */ - __random32(state); - __random32(state); - __random32(state); - __random32(state); - __random32(state); - __random32(state); + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); } return 0; } @@ -131,7 +127,7 @@ core_initcall(random32_init); /* * Generate better values after random number generator - * is fully initalized. + * is fully initialized. */ static int __init random32_reseed(void) { @@ -147,7 +143,7 @@ static int __init random32_reseed(void) state->s3 = __seed(seeds[2], 15); /* mix it in */ - __random32(state); + prandom32(state); } return 0; } diff --git a/lib/rbtree.c b/lib/rbtree.c index e2aa3be29858..4693f79195d3 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -283,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root) } EXPORT_SYMBOL(rb_erase); +static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data) +{ + struct rb_node *parent; + +up: + func(node, data); + parent = rb_parent(node); + if (!parent) + return; + + if (node == parent->rb_left && parent->rb_right) + func(parent->rb_right, data); + else if (parent->rb_left) + func(parent->rb_left, data); + + node = parent; + goto up; +} + +/* + * after inserting @node into the tree, update the tree to account for + * both the new entry and any damage done by rebalance + */ +void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + + rb_augment_path(node, func, data); +} + +/* + * before removing the node, find the deepest node on the rebalance path + * that will still be there after @node gets removed + */ +struct rb_node *rb_augment_erase_begin(struct rb_node *node) +{ + struct rb_node *deepest; + + if (!node->rb_right && !node->rb_left) + deepest = rb_parent(node); + else if (!node->rb_right) + deepest = node->rb_left; + else if (!node->rb_left) + deepest = node->rb_right; + else { + deepest = rb_next(node); + if (deepest->rb_right) + deepest = deepest->rb_right; + else if (rb_parent(deepest) != node) + deepest = rb_parent(deepest); + } + + return deepest; +} + +/* + * after removal, update the tree to account for the removed entry + * and any rebalance damage. + */ +void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node) + rb_augment_path(node, func, data); +} + /* * This function returns the first node (in sort order) of the tree. */ diff --git a/lib/rwsem.c b/lib/rwsem.c index 3e3365e5665e..f236d7cd5cf3 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -36,45 +36,56 @@ struct rwsem_waiter { #define RWSEM_WAITING_FOR_WRITE 0x00000002 }; +/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and + * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held + * since the rwsem value was observed. + */ +#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ +#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ +#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ + /* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then: * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) - * - there must be someone on the queue + * - there must be someone on the queue * - the spinlock must be held by the caller * - woken process blocks are discarded from the list after having task zeroed * - writers are only woken if downgrading is false */ -static inline struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int downgrading) +static struct rw_semaphore * +__rwsem_do_wake(struct rw_semaphore *sem, int wake_type) { struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; - signed long oldcount, woken, loop; - - if (downgrading) - goto dont_wake_writers; - - /* if we came through an up_xxxx() call, we only only wake someone up - * if we can transition the active part of the count from 0 -> 1 - */ - try_again: - oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem) - - RWSEM_ACTIVE_BIAS; - if (oldcount & RWSEM_ACTIVE_MASK) - goto undo; + signed long oldcount, woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - - /* try to grant a single write lock if there's a writer at the front - * of the queue - note we leave the 'active part' of the count - * incremented by 1 and the waiting part incremented by 0x00010000 - */ if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) goto readers_only; + if (wake_type == RWSEM_WAKE_READ_OWNED) + /* Another active reader was observed, so wakeup is not + * likely to succeed. Save the atomic op. + */ + goto out; + + /* There's a writer at the front of the queue - try to grant it the + * write lock. However, we only wake this writer if we can transition + * the active part of the count from 0 -> 1 + */ + adjustment = RWSEM_ACTIVE_WRITE_BIAS; + if (waiter->list.next == &sem->wait_list) + adjustment -= RWSEM_WAITING_BIAS; + + try_again_write: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (oldcount & RWSEM_ACTIVE_MASK) + /* Someone grabbed the sem already */ + goto undo_write; + /* We must be careful not to touch 'waiter' after we set ->task = NULL. * It is an allocated on the waiter's stack and may become invalid at * any time after that point (due to a wakeup from another source). @@ -87,18 +98,30 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) put_task_struct(tsk); goto out; - /* don't want to wake any writers */ - dont_wake_writers: - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (waiter->flags & RWSEM_WAITING_FOR_WRITE) + readers_only: + /* If we come here from up_xxxx(), another thread might have reached + * rwsem_down_failed_common() before we acquired the spinlock and + * woken up a waiter, making it now active. We prefer to check for + * this first in order to not spend too much time with the spinlock + * held if we're not going to be able to wake up readers in the end. + * + * Note that we do not need to update the rwsem count: any writer + * trying to acquire rwsem will run rwsem_down_write_failed() due + * to the waiting threads and block trying to acquire the spinlock. + * + * We use a dummy atomic update in order to acquire the cache line + * exclusively since we expect to succeed and run the final rwsem + * count adjustment pretty soon. + */ + if (wake_type == RWSEM_WAKE_ANY && + rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) + /* Someone grabbed the sem for write already */ goto out; - /* grant an infinite number of read locks to the readers at the front - * of the queue - * - note we increment the 'active part' of the count by the number of - * readers before waking any processes up + /* Grant an infinite number of read locks to the readers at the front + * of the queue. Note we increment the 'active part' of the count by + * the number of readers before waking any processes up. */ - readers_only: woken = 0; do { woken++; @@ -111,16 +134,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) } while (waiter->flags & RWSEM_WAITING_FOR_READ); - loop = woken; - woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS; - if (!downgrading) - /* we'd already done one increment earlier */ - woken -= RWSEM_ACTIVE_BIAS; + adjustment = woken * RWSEM_ACTIVE_READ_BIAS; + if (waiter->flags & RWSEM_WAITING_FOR_READ) + /* hit end of list above */ + adjustment -= RWSEM_WAITING_BIAS; - rwsem_atomic_add(woken, sem); + rwsem_atomic_add(adjustment, sem); next = sem->wait_list.next; - for (; loop > 0; loop--) { + for (loop = woken; loop > 0; loop--) { waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; @@ -136,11 +158,12 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) out: return sem; - /* undo the change to count, but check for a transition 1->0 */ - undo: - if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0) + /* undo the change to the active count, but check for a transition + * 1->0 */ + undo_write: + if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) goto out; - goto try_again; + goto try_again_write; } /* @@ -148,8 +171,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) */ static struct rw_semaphore __sched * rwsem_down_failed_common(struct rw_semaphore *sem, - struct rwsem_waiter *waiter, signed long adjustment) + unsigned int flags, signed long adjustment) { + struct rwsem_waiter waiter; struct task_struct *tsk = current; signed long count; @@ -157,23 +181,34 @@ rwsem_down_failed_common(struct rw_semaphore *sem, /* set up my own style of waitqueue */ spin_lock_irq(&sem->wait_lock); - waiter->task = tsk; + waiter.task = tsk; + waiter.flags = flags; get_task_struct(tsk); - list_add_tail(&waiter->list, &sem->wait_list); + if (list_empty(&sem->wait_list)) + adjustment += RWSEM_WAITING_BIAS; + list_add_tail(&waiter.list, &sem->wait_list); - /* we're now waiting on the lock, but no longer actively read-locking */ + /* we're now waiting on the lock, but no longer actively locking */ count = rwsem_atomic_update(adjustment, sem); - /* if there are no active locks, wake the front queued process(es) up */ - if (!(count & RWSEM_ACTIVE_MASK)) - sem = __rwsem_do_wake(sem, 0); + /* If there are no active locks, wake the front queued process(es) up. + * + * Alternatively, if we're called from a failed down_write(), there + * were already threads queued before us and there are no active + * writers, the lock must be read owned; so we try to wake any read + * locks that were queued ahead of us. */ + if (count == RWSEM_WAITING_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); + else if (count > RWSEM_WAITING_BIAS && + adjustment == -RWSEM_ACTIVE_WRITE_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ for (;;) { - if (!waiter->task) + if (!waiter.task) break; schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); @@ -190,12 +225,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem, asmregparm struct rw_semaphore __sched * rwsem_down_read_failed(struct rw_semaphore *sem) { - struct rwsem_waiter waiter; - - waiter.flags = RWSEM_WAITING_FOR_READ; - rwsem_down_failed_common(sem, &waiter, - RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS); - return sem; + return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, + -RWSEM_ACTIVE_READ_BIAS); } /* @@ -204,12 +235,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem) asmregparm struct rw_semaphore __sched * rwsem_down_write_failed(struct rw_semaphore *sem) { - struct rwsem_waiter waiter; - - waiter.flags = RWSEM_WAITING_FOR_WRITE; - rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS); - - return sem; + return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, + -RWSEM_ACTIVE_WRITE_BIAS); } /* @@ -224,7 +251,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 0); + sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); spin_unlock_irqrestore(&sem->wait_lock, flags); @@ -244,7 +271,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 1); + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); spin_unlock_irqrestore(&sem->wait_lock, flags); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 9afa25b52a83..a5ec42868f99 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -10,6 +10,7 @@ #include <linux/slab.h> #include <linux/scatterlist.h> #include <linux/highmem.h> +#include <linux/kmemleak.h> /** * sg_next - return the next scatterlist entry in a list @@ -115,17 +116,29 @@ EXPORT_SYMBOL(sg_init_one); */ static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) { - if (nents == SG_MAX_SINGLE_ALLOC) - return (struct scatterlist *) __get_free_page(gfp_mask); - else + if (nents == SG_MAX_SINGLE_ALLOC) { + /* + * Kmemleak doesn't track page allocations as they are not + * commonly used (in a raw form) for kernel data structures. + * As we chain together a list of pages and then a normal + * kmalloc (tracked by kmemleak), in order to for that last + * allocation not to become decoupled (and thus a + * false-positive) we need to inform kmemleak of all the + * intermediate allocations. + */ + void *ptr = (void *) __get_free_page(gfp_mask); + kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask); + return ptr; + } else return kmalloc(nents * sizeof(struct scatterlist), gfp_mask); } static void sg_kfree(struct scatterlist *sg, unsigned int nents) { - if (nents == SG_MAX_SINGLE_ALLOC) + if (nents == SG_MAX_SINGLE_ALLOC) { + kmemleak_free(sg); free_page((unsigned long) sg); - else + } else kfree(sg); } diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 5fddf720da73..34e3082632d8 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -50,19 +50,11 @@ */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -/* - * Enumeration for sync targets - */ -enum dma_sync_target { - SYNC_FOR_CPU = 0, - SYNC_FOR_DEVICE = 1, -}; - int swiotlb_force; /* - * Used to do a quick range check in unmap_single and - * sync_single_*, to see if the memory was in fact allocated by this + * Used to do a quick range check in swiotlb_tbl_unmap_single and + * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this * API. */ static char *io_tlb_start, *io_tlb_end; @@ -140,28 +132,14 @@ void swiotlb_print_info(void) (unsigned long long)pend); } -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the DMA API. - */ -void __init -swiotlb_init_with_default_size(size_t default_size, int verbose) +void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) { unsigned long i, bytes; - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - bytes = io_tlb_nslabs << IO_TLB_SHIFT; + bytes = nslabs << IO_TLB_SHIFT; - /* - * Get IO TLB memory from the low pages - */ - io_tlb_start = alloc_bootmem_low_pages(bytes); - if (!io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); + io_tlb_nslabs = nslabs; + io_tlb_start = tlb; io_tlb_end = io_tlb_start + bytes; /* @@ -185,6 +163,32 @@ swiotlb_init_with_default_size(size_t default_size, int verbose) swiotlb_print_info(); } +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the DMA API. + */ +void __init +swiotlb_init_with_default_size(size_t default_size, int verbose) +{ + unsigned long bytes; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + + /* + * Get IO TLB memory from the low pages + */ + io_tlb_start = alloc_bootmem_low_pages(bytes); + if (!io_tlb_start) + panic("Cannot allocate SWIOTLB buffer"); + + swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose); +} + void __init swiotlb_init(int verbose) { @@ -323,8 +327,8 @@ static int is_swiotlb_buffer(phys_addr_t paddr) /* * Bounce: copy the swiotlb buffer back to the original dma location */ -static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, - enum dma_data_direction dir) +void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, + enum dma_data_direction dir) { unsigned long pfn = PFN_DOWN(phys); @@ -360,26 +364,25 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, memcpy(phys_to_virt(phys), dma_addr, size); } } +EXPORT_SYMBOL_GPL(swiotlb_bounce); -/* - * Allocates bounce buffer and returns its kernel virtual address. - */ -static void * -map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir) +void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, + phys_addr_t phys, size_t size, + enum dma_data_direction dir) { unsigned long flags; char *dma_addr; unsigned int nslots, stride, index, wrap; int i; - unsigned long start_dma_addr; unsigned long mask; unsigned long offset_slots; unsigned long max_slots; mask = dma_get_seg_boundary(hwdev); - start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask; - offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + tbl_dma_addr &= mask; + + offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; /* * Carefully handle integer overflow which can occur when mask == ~0UL. @@ -466,12 +469,27 @@ found: return dma_addr; } +EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); + +/* + * Allocates bounce buffer and returns its kernel virtual address. + */ + +static void * +map_single(struct device *hwdev, phys_addr_t phys, size_t size, + enum dma_data_direction dir) +{ + dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start); + + return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); +} /* * dma_addr is the kernel virtual address of the bounce buffer to unmap. */ -static void -do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +void +swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir) { unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; @@ -509,10 +527,12 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) } spin_unlock_irqrestore(&io_tlb_lock, flags); } +EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); -static void -sync_single(struct device *hwdev, char *dma_addr, size_t size, - int dir, int target) +void +swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir, + enum dma_sync_target target) { int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; phys_addr_t phys = io_tlb_orig_addr[index]; @@ -536,6 +556,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, BUG(); } } +EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); void * swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -559,8 +580,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, } if (!ret) { /* - * We are either out of memory or the device can't DMA - * to GFP_DMA memory; fall back on map_single(), which + * We are either out of memory or the device can't DMA to + * GFP_DMA memory; fall back on map_single(), which * will grab memory from the lowest available address range. */ ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); @@ -578,7 +599,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, (unsigned long long)dev_addr); /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); + swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); return NULL; } *dma_handle = dev_addr; @@ -596,13 +617,14 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, if (!is_swiotlb_buffer(paddr)) free_pages((unsigned long)vaddr, get_order(size)); else - /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); + /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ + swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); } EXPORT_SYMBOL(swiotlb_free_coherent); static void -swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) +swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, + int do_panic) { /* * Ran out of IOMMU space for this operation. This is very bad. @@ -680,14 +702,14 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); * whatever the device wrote there. */ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) + size_t size, enum dma_data_direction dir) { phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(paddr)) { - do_unmap_single(hwdev, phys_to_virt(paddr), size, dir); + swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); return; } @@ -723,14 +745,16 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page); */ static void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir, int target) + size_t size, enum dma_data_direction dir, + enum dma_sync_target target) { phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(paddr)) { - sync_single(hwdev, phys_to_virt(paddr), size, dir, target); + swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, + target); return; } @@ -757,37 +781,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, EXPORT_SYMBOL(swiotlb_sync_single_for_device); /* - * Same as above, but for a sub-range of the mapping. - */ -static void -swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - int dir, int target) -{ - swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target); -} - -void -swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, - SYNC_FOR_CPU); -} -EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); - -void -swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, - SYNC_FOR_DEVICE); -} -EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); - -/* * Map a set of buffers described by scatterlist in streaming mode for DMA. * This is the scatter-gather version of the above swiotlb_map_page * interface. Here the scatter gather list elements are each tagged with the @@ -840,7 +833,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs); int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) + enum dma_data_direction dir) { return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); } @@ -867,7 +860,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) + enum dma_data_direction dir) { return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); } @@ -882,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg); */ static void swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, - int nelems, int dir, int target) + int nelems, enum dma_data_direction dir, + enum dma_sync_target target) { struct scatterlist *sg; int i; diff --git a/lib/uuid.c b/lib/uuid.c new file mode 100644 index 000000000000..8fadd7cef46c --- /dev/null +++ b/lib/uuid.c @@ -0,0 +1,53 @@ +/* + * Unified UUID/GUID definition + * + * Copyright (C) 2009, Intel Corp. + * Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/uuid.h> +#include <linux/random.h> + +static void __uuid_gen_common(__u8 b[16]) +{ + int i; + u32 r; + + for (i = 0; i < 4; i++) { + r = random32(); + memcpy(b + i * 4, &r, 4); + } + /* reversion 0b10 */ + b[8] = (b[8] & 0x3F) | 0x80; +} + +void uuid_le_gen(uuid_le *lu) +{ + __uuid_gen_common(lu->b); + /* version 4 : random generation */ + lu->b[7] = (lu->b[7] & 0x0F) | 0x40; +} +EXPORT_SYMBOL_GPL(uuid_le_gen); + +void uuid_be_gen(uuid_be *bu) +{ + __uuid_gen_common(bu->b); + /* version 4 : random generation */ + bu->b[6] = (bu->b[6] & 0x0F) | 0x40; +} +EXPORT_SYMBOL_GPL(uuid_be_gen); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 46d34b0b74a8..7af9d841c43b 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -146,19 +146,16 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res) { char *tail; unsigned long val; - size_t len; *res = 0; - len = strlen(cp); - if (len == 0) + if (!*cp) return -EINVAL; val = simple_strtoul(cp, &tail, base); if (tail == cp) return -EINVAL; - if ((*tail == '\0') || - ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { + if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) { *res = val; return 0; } @@ -220,18 +217,15 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res) { char *tail; unsigned long long val; - size_t len; *res = 0; - len = strlen(cp); - if (len == 0) + if (!*cp) return -EINVAL; val = simple_strtoull(cp, &tail, base); if (tail == cp) return -EINVAL; - if ((*tail == '\0') || - ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { + if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) { *res = val; return 0; } @@ -267,7 +261,8 @@ int strict_strtoll(const char *cp, unsigned int base, long long *res) } EXPORT_SYMBOL(strict_strtoll); -static int skip_atoi(const char **s) +static noinline_for_stack +int skip_atoi(const char **s) { int i = 0; @@ -287,7 +282,8 @@ static int skip_atoi(const char **s) /* Formats correctly any integer in [0,99999]. * Outputs from one to five digits depending on input. * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */ -static char *put_dec_trunc(char *buf, unsigned q) +static noinline_for_stack +char *put_dec_trunc(char *buf, unsigned q) { unsigned d3, d2, d1, d0; d1 = (q>>4) & 0xf; @@ -324,7 +320,8 @@ static char *put_dec_trunc(char *buf, unsigned q) return buf; } /* Same with if's removed. Always emits five digits */ -static char *put_dec_full(char *buf, unsigned q) +static noinline_for_stack +char *put_dec_full(char *buf, unsigned q) { /* BTW, if q is in [0,9999], 8-bit ints will be enough, */ /* but anyway, gcc produces better code with full-sized ints */ @@ -366,7 +363,8 @@ static char *put_dec_full(char *buf, unsigned q) return buf; } /* No inlining helps gcc to use registers better */ -static noinline char *put_dec(char *buf, unsigned long long num) +static noinline_for_stack +char *put_dec(char *buf, unsigned long long num) { while (1) { unsigned rem; @@ -417,8 +415,9 @@ struct printf_spec { s16 precision; /* # of digits/chars */ }; -static char *number(char *buf, char *end, unsigned long long num, - struct printf_spec spec) +static noinline_for_stack +char *number(char *buf, char *end, unsigned long long num, + struct printf_spec spec) { /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ @@ -537,7 +536,8 @@ static char *number(char *buf, char *end, unsigned long long num, return buf; } -static char *string(char *buf, char *end, const char *s, struct printf_spec spec) +static noinline_for_stack +char *string(char *buf, char *end, const char *s, struct printf_spec spec) { int len, i; @@ -567,8 +567,9 @@ static char *string(char *buf, char *end, const char *s, struct printf_spec spec return buf; } -static char *symbol_string(char *buf, char *end, void *ptr, - struct printf_spec spec, char ext) +static noinline_for_stack +char *symbol_string(char *buf, char *end, void *ptr, + struct printf_spec spec, char ext) { unsigned long value = (unsigned long) ptr; #ifdef CONFIG_KALLSYMS @@ -588,8 +589,9 @@ static char *symbol_string(char *buf, char *end, void *ptr, #endif } -static char *resource_string(char *buf, char *end, struct resource *res, - struct printf_spec spec, const char *fmt) +static noinline_for_stack +char *resource_string(char *buf, char *end, struct resource *res, + struct printf_spec spec, const char *fmt) { #ifndef IO_RSRC_PRINTK_SIZE #define IO_RSRC_PRINTK_SIZE 6 @@ -690,8 +692,9 @@ static char *resource_string(char *buf, char *end, struct resource *res, return string(buf, end, sym, spec); } -static char *mac_address_string(char *buf, char *end, u8 *addr, - struct printf_spec spec, const char *fmt) +static noinline_for_stack +char *mac_address_string(char *buf, char *end, u8 *addr, + struct printf_spec spec, const char *fmt) { char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")]; char *p = mac_addr; @@ -714,7 +717,8 @@ static char *mac_address_string(char *buf, char *end, u8 *addr, return string(buf, end, mac_addr, spec); } -static char *ip4_string(char *p, const u8 *addr, const char *fmt) +static noinline_for_stack +char *ip4_string(char *p, const u8 *addr, const char *fmt) { int i; bool leading_zeros = (fmt[0] == 'i'); @@ -763,7 +767,8 @@ static char *ip4_string(char *p, const u8 *addr, const char *fmt) return p; } -static char *ip6_compressed_string(char *p, const char *addr) +static noinline_for_stack +char *ip6_compressed_string(char *p, const char *addr) { int i, j, range; unsigned char zerolength[8]; @@ -843,7 +848,8 @@ static char *ip6_compressed_string(char *p, const char *addr) return p; } -static char *ip6_string(char *p, const char *addr, const char *fmt) +static noinline_for_stack +char *ip6_string(char *p, const char *addr, const char *fmt) { int i; @@ -858,8 +864,9 @@ static char *ip6_string(char *p, const char *addr, const char *fmt) return p; } -static char *ip6_addr_string(char *buf, char *end, const u8 *addr, - struct printf_spec spec, const char *fmt) +static noinline_for_stack +char *ip6_addr_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) { char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")]; @@ -871,8 +878,9 @@ static char *ip6_addr_string(char *buf, char *end, const u8 *addr, return string(buf, end, ip6_addr, spec); } -static char *ip4_addr_string(char *buf, char *end, const u8 *addr, - struct printf_spec spec, const char *fmt) +static noinline_for_stack +char *ip4_addr_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) { char ip4_addr[sizeof("255.255.255.255")]; @@ -881,8 +889,9 @@ static char *ip4_addr_string(char *buf, char *end, const u8 *addr, return string(buf, end, ip4_addr, spec); } -static char *uuid_string(char *buf, char *end, const u8 *addr, - struct printf_spec spec, const char *fmt) +static noinline_for_stack +char *uuid_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) { char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; char *p = uuid; @@ -965,13 +974,19 @@ static char *uuid_string(char *buf, char *end, const u8 *addr, * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15] * little endian output byte order is: * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15] + * - 'V' For a struct va_format which contains a format string * and va_list *, + * call vsnprintf(->format, *->va_list). + * Implements a "recursive vsnprintf". + * Do not use this feature without some mechanism to verify the + * correctness of the format string and va_list arguments. * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a * pointer to the real address. */ -static char *pointer(const char *fmt, char *buf, char *end, void *ptr, - struct printf_spec spec) +static noinline_for_stack +char *pointer(const char *fmt, char *buf, char *end, void *ptr, + struct printf_spec spec) { if (!ptr) return string(buf, end, "(null)", spec); @@ -1009,6 +1024,10 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, break; case 'U': return uuid_string(buf, end, ptr, spec, fmt); + case 'V': + return buf + vsnprintf(buf, end - buf, + ((struct va_format *)ptr)->fmt, + *(((struct va_format *)ptr)->va)); } spec.flags |= SMALL; if (spec.field_width == -1) { @@ -1040,7 +1059,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, * @precision: precision of a number * @qualifier: qualifier of a number (long, size_t, ...) */ -static int format_decode(const char *fmt, struct printf_spec *spec) +static noinline_for_stack +int format_decode(const char *fmt, struct printf_spec *spec) { const char *start = fmt; @@ -1980,7 +2000,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args) { char *s = (char *)va_arg(args, char *); if (field_width == -1) - field_width = SHORT_MAX; + field_width = SHRT_MAX; /* first, skip leading white space in buffer */ str = skip_spaces(str); |