diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 21 | ||||
-rw-r--r-- | lib/Kconfig.debug | 60 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/devres.c | 2 | ||||
-rw-r--r-- | lib/ioq.c | 304 | ||||
-rw-r--r-- | lib/ioremap.c | 10 | ||||
-rw-r--r-- | lib/random32.c | 2 | ||||
-rw-r--r-- | lib/rbtree.c | 116 | ||||
-rw-r--r-- | lib/shm_signal.c | 196 | ||||
-rw-r--r-- | lib/swiotlb.c | 137 | ||||
-rw-r--r-- | lib/vsprintf.c | 9 |
11 files changed, 734 insertions, 125 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 170d8ca901d8..af12831f2eea 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -210,4 +210,25 @@ config GENERIC_ATOMIC64 config LRU_CACHE tristate +config SHM_SIGNAL + tristate "SHM Signal - Generic shared-memory signaling mechanism" + default n + help + Provides a shared-memory based signaling mechanism to indicate + memory-dirty notifications between two end-points. + + If unsure, say N + +config IOQ + tristate "IO-Queue library - Generic shared-memory queue" + select SHM_SIGNAL + default n + help + IOQ is a generic shared-memory-queue mechanism that happens to be + friendly to virtualization boundaries. It can be used in a variety + of ways, though its intended purpose is to become a low-level + communication path for paravirtualized drivers. + + If unsure, say N + endmenu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e722e9d62221..ec4519477ffe 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -152,28 +152,33 @@ config DEBUG_SHIRQ Drivers ought to be able to handle interrupts coming in at those points; some don't and need to be caught. -config DETECT_SOFTLOCKUP - bool "Detect Soft Lockups" +config LOCKUP_DETECTOR + bool "Detect Hard and Soft Lockups" depends on DEBUG_KERNEL && !S390 - default y help - Say Y here to enable the kernel to detect "soft lockups", - which are bugs that cause the kernel to loop in kernel + Say Y here to enable the kernel to act as a watchdog to detect + hard and soft lockups. + + Softlockups are bugs that cause the kernel to loop in kernel mode for more than 60 seconds, without giving other tasks a - chance to run. + chance to run. The current stack trace is displayed upon + detection and the system will stay locked up. - When a soft-lockup is detected, the kernel will print the - current stack trace (which you should report), but the - system will stay locked up. This feature has negligible - overhead. + Hardlockups are bugs that cause the CPU to loop in kernel mode + for more than 60 seconds, without letting other interrupts have a + chance to run. The current stack trace is displayed upon detection + and the system will stay locked up. - (Note that "hard lockups" are separate type of bugs that - can be detected via the NMI-watchdog, on platforms that - support it.) + The overhead should be minimal. A periodic hrtimer runs to + generate interrupts and kick the watchdog task every 10-12 seconds. + An NMI is generated every 60 seconds or so to check for hardlockups. + +config HARDLOCKUP_DETECTOR + def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" - depends on DETECT_SOFTLOCKUP + depends on LOCKUP_DETECTOR help Say Y here to enable the kernel to panic on "soft lockups", which are bugs that cause the kernel to loop in kernel @@ -190,7 +195,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE int - depends on DETECT_SOFTLOCKUP + depends on LOCKUP_DETECTOR range 0 1 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC @@ -307,6 +312,12 @@ config DEBUG_OBJECTS_WORK work queue routines to track the life time of work objects and validate the work operations. +config DEBUG_OBJECTS_RCU_HEAD + bool "Debug RCU callbacks objects" + depends on DEBUG_OBJECTS && PREEMPT + help + Enable this to turn on debugging of RCU list heads (call_rcu() usage). + config DEBUG_OBJECTS_ENABLE_DEFAULT int "debug_objects bootup default value (0-1)" range 0 1 @@ -528,7 +539,7 @@ config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 + select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE select KALLSYMS select KALLSYMS_ALL @@ -628,6 +639,19 @@ config DEBUG_INFO If unsure, say N. +config DEBUG_INFO_REDUCED + bool "Reduce debugging information" + depends on DEBUG_INFO + help + If you say Y here gcc is instructed to generate less debugging + information for structure types. This means that tools that + need full debugging information (like kgdb or systemtap) won't + be happy. But if you merely need debugging information to + resolve line numbers there is no loss. Advantage is that + build directory object sizes shrink dramatically over a full + DEBUG_INFO build and compile times are reduced too. + Only works with newer gcc versions. + config DEBUG_VM bool "Debug VM" depends on DEBUG_KERNEL @@ -958,13 +982,13 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !PPC && !S390 + select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE help Provide stacktrace filter for fault-injection capabilities config LATENCYTOP bool "Latency measuring infrastructure" - select FRAME_POINTER if !MIPS && !PPC && !S390 + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE select KALLSYMS select KALLSYMS_ALL select STACKTRACE diff --git a/lib/Makefile b/lib/Makefile index 3f1062cbbff4..e18f1a6b2c14 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -81,6 +81,8 @@ obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o obj-$(CONFIG_SMP) += percpu_counter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o +obj-$(CONFIG_SHM_SIGNAL) += shm_signal.o +obj-$(CONFIG_IOQ) += ioq.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o diff --git a/lib/devres.c b/lib/devres.c index 49368608f988..6efddf53b90c 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -328,7 +328,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all); * @pdev: PCI device to map IO resources for * @mask: Mask of BARs to unmap and release * - * Unamp and release regions specified by @mask. + * Unmap and release regions specified by @mask. */ void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) { diff --git a/lib/ioq.c b/lib/ioq.c new file mode 100644 index 000000000000..4027848d7436 --- /dev/null +++ b/lib/ioq.c @@ -0,0 +1,304 @@ +/* + * Copyright 2009 Novell. All Rights Reserved. + * + * See include/linux/ioq.h for documentation + * + * Author: + * Gregory Haskins <ghaskins@novell.com> + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/sched.h> +#include <linux/ioq.h> +#include <linux/bitops.h> +#include <linux/module.h> + +MODULE_AUTHOR("Gregory Haskins"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1"); + +#ifndef NULL +#define NULL 0 +#endif + +static int ioq_iter_setpos(struct ioq_iterator *iter, u32 pos) +{ + struct ioq *ioq = iter->ioq; + + BUG_ON(pos >= ioq->count); + + iter->pos = pos; + iter->desc = &ioq->ring[pos]; + + return 0; +} + +static inline u32 modulo_inc(u32 val, u32 mod) +{ + BUG_ON(val >= mod); + + if (val == (mod - 1)) + return 0; + + return val + 1; +} + +static inline int idx_full(struct ioq_ring_idx *idx) +{ + return idx->full && (idx->head == idx->tail); +} + +int ioq_iter_seek(struct ioq_iterator *iter, enum ioq_seek_type type, + long offset, int flags) +{ + struct ioq_ring_idx *idx = iter->idx; + u32 pos; + + switch (type) { + case ioq_seek_next: + pos = modulo_inc(iter->pos, iter->ioq->count); + break; + case ioq_seek_tail: + pos = le32_to_cpu(idx->tail); + break; + case ioq_seek_head: + pos = le32_to_cpu(idx->head); + break; + case ioq_seek_set: + if (offset >= iter->ioq->count) + return -1; + pos = offset; + break; + default: + return -EINVAL; + } + + return ioq_iter_setpos(iter, pos); +} +EXPORT_SYMBOL_GPL(ioq_iter_seek); + +static int ioq_ring_count(struct ioq_ring_idx *idx, int count) +{ + u32 head = le32_to_cpu(idx->head); + u32 tail = le32_to_cpu(idx->tail); + + if (idx->full && (head == tail)) + return count; + else if (tail >= head) + return tail - head; + else + return (tail + count) - head; +} + +static void idx_tail_push(struct ioq_ring_idx *idx, int count) +{ + u32 tail = modulo_inc(le32_to_cpu(idx->tail), count); + u32 head = le32_to_cpu(idx->head); + + if (head == tail) { + rmb(); + + /* + * Setting full here may look racy, but note that we havent + * flipped the owner bit yet. So it is impossible for the + * remote locale to move head in such a way that this operation + * becomes invalid + */ + idx->full = 1; + wmb(); + } + + idx->tail = cpu_to_le32(tail); +} + +int ioq_iter_push(struct ioq_iterator *iter, int flags) +{ + struct ioq_ring_head *head_desc = iter->ioq->head_desc; + struct ioq_ring_idx *idx = iter->idx; + int ret; + + /* + * Its only valid to push if we are currently pointed at the tail + */ + if (iter->pos != le32_to_cpu(idx->tail) || iter->desc->sown != iter->ioq->locale) + return -EINVAL; + + idx_tail_push(idx, iter->ioq->count); + if (iter->dualidx) { + idx_tail_push(&head_desc->idx[ioq_idxtype_inuse], + iter->ioq->count); + if (head_desc->idx[ioq_idxtype_inuse].tail != + head_desc->idx[ioq_idxtype_valid].tail) { + SHM_SIGNAL_FAULT(iter->ioq->signal, + "Tails not synchronized"); + return -EINVAL; + } + } + + wmb(); /* the index must be visible before the sown, or signal */ + + if (iter->flipowner) { + iter->desc->sown = !iter->ioq->locale; + wmb(); /* sown must be visible before we signal */ + } + + ret = ioq_iter_seek(iter, ioq_seek_next, 0, flags); + + if (iter->update) + ioq_signal(iter->ioq, 0); + + return ret; +} +EXPORT_SYMBOL_GPL(ioq_iter_push); + +int ioq_iter_pop(struct ioq_iterator *iter, int flags) +{ + struct ioq_ring_idx *idx = iter->idx; + int ret; + + /* + * Its only valid to pop if we are currently pointed at the head + */ + if (iter->pos != le32_to_cpu(idx->head) || iter->desc->sown != iter->ioq->locale) + return -EINVAL; + + idx->head = cpu_to_le32(modulo_inc(le32_to_cpu(idx->head), iter->ioq->count)); + wmb(); /* head must be visible before full */ + + if (idx->full) { + idx->full = 0; + wmb(); /* full must be visible before sown */ + } + + if (iter->flipowner) { + iter->desc->sown = !iter->ioq->locale; + wmb(); /* sown must be visible before we signal */ + } + + ret = ioq_iter_seek(iter, ioq_seek_next, 0, flags); + + if (iter->update) + ioq_signal(iter->ioq, 0); + + return ret; +} +EXPORT_SYMBOL_GPL(ioq_iter_pop); + +static struct ioq_ring_idx *idxtype_to_idx(struct ioq *ioq, + enum ioq_idx_type type) +{ + struct ioq_ring_idx *idx; + + switch (type) { + case ioq_idxtype_valid: + case ioq_idxtype_inuse: + idx = &ioq->head_desc->idx[type]; + break; + default: + panic("IOQ: illegal index type: %d", type); + break; + } + + return idx; +} + +int ioq_iter_init(struct ioq *ioq, struct ioq_iterator *iter, + enum ioq_idx_type type, int flags) +{ + iter->ioq = ioq; + iter->update = (flags & IOQ_ITER_AUTOUPDATE); + iter->flipowner = !(flags & IOQ_ITER_NOFLIPOWNER); + iter->pos = -1; + iter->desc = NULL; + iter->dualidx = 0; + + if (type == ioq_idxtype_both) { + /* + * "both" is a special case, so we set the dualidx flag. + * + * However, we also just want to use the valid-index + * for normal processing, so override that here + */ + type = ioq_idxtype_valid; + iter->dualidx = 1; + } + + iter->idx = idxtype_to_idx(ioq, type); + + return 0; +} +EXPORT_SYMBOL_GPL(ioq_iter_init); + +int ioq_count(struct ioq *ioq, enum ioq_idx_type type) +{ + return ioq_ring_count(idxtype_to_idx(ioq, type), ioq->count); +} +EXPORT_SYMBOL_GPL(ioq_count); + +int ioq_remain(struct ioq *ioq, enum ioq_idx_type type) +{ + int count = ioq_ring_count(idxtype_to_idx(ioq, type), ioq->count); + + return ioq->count - count; +} +EXPORT_SYMBOL_GPL(ioq_remain); + +int ioq_size(struct ioq *ioq) +{ + return ioq->count; +} +EXPORT_SYMBOL_GPL(ioq_size); + +int ioq_full(struct ioq *ioq, enum ioq_idx_type type) +{ + struct ioq_ring_idx *idx = idxtype_to_idx(ioq, type); + + return idx_full(idx); +} +EXPORT_SYMBOL_GPL(ioq_full); + +static void ioq_shm_signal(struct shm_signal_notifier *notifier) +{ + struct ioq *ioq = container_of(notifier, struct ioq, shm_notifier); + + if (waitqueue_active(&ioq->wq)) + wake_up(&ioq->wq); + + if (ioq->notifier) + ioq->notifier->signal(ioq->notifier); +} + +void ioq_init(struct ioq *ioq, + struct ioq_ops *ops, + enum ioq_locality locale, + struct ioq_ring_head *head, + struct shm_signal *signal, + size_t count) +{ + memset(ioq, 0, sizeof(*ioq)); + kref_init(&ioq->kref); + init_waitqueue_head(&ioq->wq); + + ioq->ops = ops; + ioq->locale = locale; + ioq->head_desc = head; + ioq->ring = &head->ring[0]; + ioq->count = count; + ioq->signal = signal; + + ioq->shm_notifier.signal = &ioq_shm_signal; + signal->notifier = &ioq->shm_notifier; +} +EXPORT_SYMBOL_GPL(ioq_init); diff --git a/lib/ioremap.c b/lib/ioremap.c index 14c6078f17a2..5730ecd3eb66 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -13,10 +13,10 @@ #include <asm/pgtable.h> static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pte_t *pte; - unsigned long pfn; + u64 pfn; pfn = phys_addr >> PAGE_SHIFT; pte = pte_alloc_kernel(pmd, addr); @@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, } static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pmd_t *pmd; unsigned long next; @@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, } static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pud_t *pud; unsigned long next; @@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, } int ioremap_page_range(unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pgd_t *pgd; unsigned long start; diff --git a/lib/random32.c b/lib/random32.c index 870dc3fc0f0f..fc3545a32771 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -127,7 +127,7 @@ core_initcall(random32_init); /* * Generate better values after random number generator - * is fully initalized. + * is fully initialized. */ static int __init random32_reseed(void) { diff --git a/lib/rbtree.c b/lib/rbtree.c index 15e10b1afdd2..4693f79195d3 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -44,11 +44,6 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) else root->rb_node = right; rb_set_parent(node, right); - - if (root->augment_cb) { - root->augment_cb(node); - root->augment_cb(right); - } } static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) @@ -72,20 +67,12 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) else root->rb_node = left; rb_set_parent(node, left); - - if (root->augment_cb) { - root->augment_cb(node); - root->augment_cb(left); - } } void rb_insert_color(struct rb_node *node, struct rb_root *root) { struct rb_node *parent, *gparent; - if (root->augment_cb) - root->augment_cb(node); - while ((parent = rb_parent(node)) && rb_is_red(parent)) { gparent = rb_parent(parent); @@ -240,15 +227,12 @@ void rb_erase(struct rb_node *node, struct rb_root *root) else { struct rb_node *old = node, *left; - int old_parent_cb = 0; - int successor_parent_cb = 0; node = node->rb_right; while ((left = node->rb_left) != NULL) node = left; if (rb_parent(old)) { - old_parent_cb = 1; if (rb_parent(old)->rb_left == old) rb_parent(old)->rb_left = node; else @@ -263,10 +247,8 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (parent == old) { parent = node; } else { - successor_parent_cb = 1; if (child) rb_set_parent(child, parent); - parent->rb_left = child; node->rb_right = old->rb_right; @@ -277,24 +259,6 @@ void rb_erase(struct rb_node *node, struct rb_root *root) node->rb_left = old->rb_left; rb_set_parent(old->rb_left, node); - if (root->augment_cb) { - /* - * Here, three different nodes can have new children. - * The parent of the successor node that was selected - * to replace the node to be erased. - * The node that is getting erased and is now replaced - * by its successor. - * The parent of the node getting erased-replaced. - */ - if (successor_parent_cb) - root->augment_cb(parent); - - root->augment_cb(node); - - if (old_parent_cb) - root->augment_cb(rb_parent(old)); - } - goto color; } @@ -303,19 +267,15 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (child) rb_set_parent(child, parent); - - if (parent) { + if (parent) + { if (parent->rb_left == node) parent->rb_left = child; else parent->rb_right = child; - - if (root->augment_cb) - root->augment_cb(parent); - - } else { - root->rb_node = child; } + else + root->rb_node = child; color: if (color == RB_BLACK) @@ -323,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root) } EXPORT_SYMBOL(rb_erase); +static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data) +{ + struct rb_node *parent; + +up: + func(node, data); + parent = rb_parent(node); + if (!parent) + return; + + if (node == parent->rb_left && parent->rb_right) + func(parent->rb_right, data); + else if (parent->rb_left) + func(parent->rb_left, data); + + node = parent; + goto up; +} + +/* + * after inserting @node into the tree, update the tree to account for + * both the new entry and any damage done by rebalance + */ +void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + + rb_augment_path(node, func, data); +} + +/* + * before removing the node, find the deepest node on the rebalance path + * that will still be there after @node gets removed + */ +struct rb_node *rb_augment_erase_begin(struct rb_node *node) +{ + struct rb_node *deepest; + + if (!node->rb_right && !node->rb_left) + deepest = rb_parent(node); + else if (!node->rb_right) + deepest = node->rb_left; + else if (!node->rb_left) + deepest = node->rb_right; + else { + deepest = rb_next(node); + if (deepest->rb_right) + deepest = deepest->rb_right; + else if (rb_parent(deepest) != node) + deepest = rb_parent(deepest); + } + + return deepest; +} + +/* + * after removal, update the tree to account for the removed entry + * and any rebalance damage. + */ +void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node) + rb_augment_path(node, func, data); +} + /* * This function returns the first node (in sort order) of the tree. */ diff --git a/lib/shm_signal.c b/lib/shm_signal.c new file mode 100644 index 000000000000..8d3e9b418a27 --- /dev/null +++ b/lib/shm_signal.c @@ -0,0 +1,196 @@ +/* + * Copyright 2009 Novell. All Rights Reserved. + * + * See include/linux/shm_signal.h for documentation + * + * Author: + * Gregory Haskins <ghaskins@novell.com> + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/shm_signal.h> + +MODULE_AUTHOR("Gregory Haskins"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1"); + +int shm_signal_enable(struct shm_signal *s, int flags) +{ + struct shm_signal_irq *irq = &s->desc->irq[s->locale]; + unsigned long iflags; + + spin_lock_irqsave(&s->lock, iflags); + + irq->enabled = 1; + wmb(); + + if ((irq->dirty || irq->pending) + && !test_bit(shm_signal_in_wakeup, &s->flags)) { + rmb(); + tasklet_schedule(&s->deferred_notify); + } + + spin_unlock_irqrestore(&s->lock, iflags); + + return 0; +} +EXPORT_SYMBOL_GPL(shm_signal_enable); + +int shm_signal_disable(struct shm_signal *s, int flags) +{ + struct shm_signal_irq *irq = &s->desc->irq[s->locale]; + + irq->enabled = 0; + wmb(); + + return 0; +} +EXPORT_SYMBOL_GPL(shm_signal_disable); + +/* + * signaling protocol: + * + * each side of the shm_signal has an "irq" structure with the following + * fields: + * + * - enabled: controlled by shm_signal_enable/disable() to mask/unmask + * the notification locally + * - dirty: indicates if the shared-memory is dirty or clean. This + * is updated regardless of the enabled/pending state so that + * the state is always accurately tracked. + * - pending: indicates if a signal is pending to the remote locale. + * This allows us to determine if a remote-notification is + * already in flight to optimize spurious notifications away. + */ +int shm_signal_inject(struct shm_signal *s, int flags) +{ + /* Load the irq structure from the other locale */ + struct shm_signal_irq *irq = &s->desc->irq[!s->locale]; + + /* + * We always mark the remote side as dirty regardless of whether + * they need to be notified. + */ + irq->dirty = 1; + wmb(); /* dirty must be visible before we test the pending state */ + + if (irq->enabled && !irq->pending) { + rmb(); + + /* + * If the remote side has enabled notifications, and we do + * not see a notification pending, we must inject a new one. + */ + irq->pending = 1; + wmb(); /* make it visible before we do the injection */ + + s->ops->inject(s); + } + + return 0; +} +EXPORT_SYMBOL_GPL(shm_signal_inject); + +void _shm_signal_wakeup(struct shm_signal *s) +{ + struct shm_signal_irq *irq = &s->desc->irq[s->locale]; + int dirty; + unsigned long flags; + + spin_lock_irqsave(&s->lock, flags); + + __set_bit(shm_signal_in_wakeup, &s->flags); + + /* + * The outer loop protects against race conditions between + * irq->dirty and irq->pending updates + */ + while (irq->enabled && (irq->dirty || irq->pending)) { + + /* + * Run until we completely exhaust irq->dirty (it may + * be re-dirtied by the remote side while we are in the + * callback). We let "pending" remain untouched until we have + * processed them all so that the remote side knows we do not + * need a new notification (yet). + */ + do { + irq->dirty = 0; + /* the unlock is an implicit wmb() for dirty = 0 */ + spin_unlock_irqrestore(&s->lock, flags); + + if (s->notifier) + s->notifier->signal(s->notifier); + + spin_lock_irqsave(&s->lock, flags); + dirty = irq->dirty; + rmb(); + + } while (irq->enabled && dirty); + + barrier(); + + /* + * We can finally acknowledge the notification by clearing + * "pending" after all of the dirty memory has been processed + * Races against this clearing are handled by the outer loop. + * Subsequent iterations of this loop will execute with + * pending=0 potentially leading to future spurious + * notifications, but this is an acceptable tradeoff as this + * will be rare and harmless. + */ + irq->pending = 0; + wmb(); + + } + + __clear_bit(shm_signal_in_wakeup, &s->flags); + spin_unlock_irqrestore(&s->lock, flags); + +} +EXPORT_SYMBOL_GPL(_shm_signal_wakeup); + +void _shm_signal_release(struct kref *kref) +{ + struct shm_signal *s = container_of(kref, struct shm_signal, kref); + + s->ops->release(s); +} +EXPORT_SYMBOL_GPL(_shm_signal_release); + +static void +deferred_notify(unsigned long data) +{ + struct shm_signal *s = (struct shm_signal *)data; + + _shm_signal_wakeup(s); +} + +void shm_signal_init(struct shm_signal *s, enum shm_signal_locality locale, + struct shm_signal_ops *ops, struct shm_signal_desc *desc) +{ + memset(s, 0, sizeof(*s)); + kref_init(&s->kref); + spin_lock_init(&s->lock); + tasklet_init(&s->deferred_notify, + deferred_notify, + (unsigned long)s); + s->locale = locale; + s->ops = ops; + s->desc = desc; +} +EXPORT_SYMBOL_GPL(shm_signal_init); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index a009055140ec..34e3082632d8 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -50,19 +50,11 @@ */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -/* - * Enumeration for sync targets - */ -enum dma_sync_target { - SYNC_FOR_CPU = 0, - SYNC_FOR_DEVICE = 1, -}; - int swiotlb_force; /* - * Used to do a quick range check in unmap_single and - * sync_single_*, to see if the memory was in fact allocated by this + * Used to do a quick range check in swiotlb_tbl_unmap_single and + * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this * API. */ static char *io_tlb_start, *io_tlb_end; @@ -140,28 +132,14 @@ void swiotlb_print_info(void) (unsigned long long)pend); } -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the DMA API. - */ -void __init -swiotlb_init_with_default_size(size_t default_size, int verbose) +void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) { unsigned long i, bytes; - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - bytes = io_tlb_nslabs << IO_TLB_SHIFT; + bytes = nslabs << IO_TLB_SHIFT; - /* - * Get IO TLB memory from the low pages - */ - io_tlb_start = alloc_bootmem_low_pages(bytes); - if (!io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); + io_tlb_nslabs = nslabs; + io_tlb_start = tlb; io_tlb_end = io_tlb_start + bytes; /* @@ -185,6 +163,32 @@ swiotlb_init_with_default_size(size_t default_size, int verbose) swiotlb_print_info(); } +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the DMA API. + */ +void __init +swiotlb_init_with_default_size(size_t default_size, int verbose) +{ + unsigned long bytes; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + + /* + * Get IO TLB memory from the low pages + */ + io_tlb_start = alloc_bootmem_low_pages(bytes); + if (!io_tlb_start) + panic("Cannot allocate SWIOTLB buffer"); + + swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose); +} + void __init swiotlb_init(int verbose) { @@ -323,8 +327,8 @@ static int is_swiotlb_buffer(phys_addr_t paddr) /* * Bounce: copy the swiotlb buffer back to the original dma location */ -static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, - enum dma_data_direction dir) +void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, + enum dma_data_direction dir) { unsigned long pfn = PFN_DOWN(phys); @@ -360,26 +364,25 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, memcpy(phys_to_virt(phys), dma_addr, size); } } +EXPORT_SYMBOL_GPL(swiotlb_bounce); -/* - * Allocates bounce buffer and returns its kernel virtual address. - */ -static void * -map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir) +void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, + phys_addr_t phys, size_t size, + enum dma_data_direction dir) { unsigned long flags; char *dma_addr; unsigned int nslots, stride, index, wrap; int i; - unsigned long start_dma_addr; unsigned long mask; unsigned long offset_slots; unsigned long max_slots; mask = dma_get_seg_boundary(hwdev); - start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask; - offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + tbl_dma_addr &= mask; + + offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; /* * Carefully handle integer overflow which can occur when mask == ~0UL. @@ -466,12 +469,27 @@ found: return dma_addr; } +EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); + +/* + * Allocates bounce buffer and returns its kernel virtual address. + */ + +static void * +map_single(struct device *hwdev, phys_addr_t phys, size_t size, + enum dma_data_direction dir) +{ + dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start); + + return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); +} /* * dma_addr is the kernel virtual address of the bounce buffer to unmap. */ -static void -do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +void +swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir) { unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; @@ -509,10 +527,12 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) } spin_unlock_irqrestore(&io_tlb_lock, flags); } +EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); -static void -sync_single(struct device *hwdev, char *dma_addr, size_t size, - int dir, int target) +void +swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir, + enum dma_sync_target target) { int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; phys_addr_t phys = io_tlb_orig_addr[index]; @@ -536,6 +556,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, BUG(); } } +EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); void * swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -559,8 +580,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, } if (!ret) { /* - * We are either out of memory or the device can't DMA - * to GFP_DMA memory; fall back on map_single(), which + * We are either out of memory or the device can't DMA to + * GFP_DMA memory; fall back on map_single(), which * will grab memory from the lowest available address range. */ ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); @@ -578,7 +599,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, (unsigned long long)dev_addr); /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); + swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); return NULL; } *dma_handle = dev_addr; @@ -596,13 +617,14 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, if (!is_swiotlb_buffer(paddr)) free_pages((unsigned long)vaddr, get_order(size)); else - /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); + /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ + swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); } EXPORT_SYMBOL(swiotlb_free_coherent); static void -swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) +swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, + int do_panic) { /* * Ran out of IOMMU space for this operation. This is very bad. @@ -680,14 +702,14 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); * whatever the device wrote there. */ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) + size_t size, enum dma_data_direction dir) { phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(paddr)) { - do_unmap_single(hwdev, phys_to_virt(paddr), size, dir); + swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); return; } @@ -723,14 +745,16 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page); */ static void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir, int target) + size_t size, enum dma_data_direction dir, + enum dma_sync_target target) { phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(paddr)) { - sync_single(hwdev, phys_to_virt(paddr), size, dir, target); + swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, + target); return; } @@ -809,7 +833,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs); int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) + enum dma_data_direction dir) { return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); } @@ -836,7 +860,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) + enum dma_data_direction dir) { return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); } @@ -851,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg); */ static void swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, - int nelems, int dir, int target) + int nelems, enum dma_data_direction dir, + enum dma_sync_target target) { struct scatterlist *sg; int i; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index b8a2f549ab0e..4ee19d0d3910 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -980,6 +980,11 @@ char *uuid_string(char *buf, char *end, const u8 *addr, * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15] * little endian output byte order is: * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15] + * - 'V' For a struct va_format which contains a format string * and va_list *, + * call vsnprintf(->format, *->va_list). + * Implements a "recursive vsnprintf". + * Do not use this feature without some mechanism to verify the + * correctness of the format string and va_list arguments. * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a @@ -1025,6 +1030,10 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, break; case 'U': return uuid_string(buf, end, ptr, spec, fmt); + case 'V': + return buf + vsnprintf(buf, end - buf, + ((struct va_format *)ptr)->fmt, + *(((struct va_format *)ptr)->va)); } spec.flags |= SMALL; if (spec.field_width == -1) { |