From 08970ecf744e09837bb6620c95406710f4c81ae2 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 18 Apr 2019 16:54:01 +0100 Subject: irq/irqdomain: Fix typo in the comment on top of __irq_domain_alloc_irqs() The word 'number' has been misspelt in the comment on top of _irq_domain_alloc_irqs(). Signed-off-by: Julien Grall Signed-off-by: Marc Zyngier --- kernel/irq/irqdomain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 9ed29e4a7dbf..a453e229f99c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1297,7 +1297,7 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, /** * __irq_domain_alloc_irqs - Allocate IRQs from domain * @domain: domain to allocate from - * @irq_base: allocate specified IRQ nubmer if irq_base >= 0 + * @irq_base: allocate specified IRQ number if irq_base >= 0 * @nr_irqs: number of IRQs to allocate * @node: NUMA node id for memory allocation * @arg: domain specific argument -- cgit v1.2.3 From 2bd1298ac17777525a41c8425521f569e412df14 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Tue, 30 Apr 2019 15:42:22 +0530 Subject: genirq: Introduce irq_chip_{request,release}_resource_parent() apis Introduce irq_chip_{request,release}_resource_parent() apis so that these can be used in hierarchical irqchips. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier --- include/linux/irq.h | 2 ++ kernel/irq/chip.c | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'kernel') diff --git a/include/linux/irq.h b/include/linux/irq.h index 7ae8de5ad0f2..fb301cf29148 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -625,6 +625,8 @@ extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on); extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info); extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type); +extern int irq_chip_request_resources_parent(struct irq_data *data); +extern void irq_chip_release_resources_parent(struct irq_data *data); #endif /* Handling of unhandled and spurious interrupts: */ diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 51128bea3846..29d6c7d070b4 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1459,6 +1459,33 @@ int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on) return -ENOSYS; } EXPORT_SYMBOL_GPL(irq_chip_set_wake_parent); + +/** + * irq_chip_request_resources_parent - Request resources on the parent interrupt + * @data: Pointer to interrupt specific data + */ +int irq_chip_request_resources_parent(struct irq_data *data) +{ + data = data->parent_data; + + if (data->chip->irq_request_resources) + return data->chip->irq_request_resources(data); + + return -ENOSYS; +} +EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent); + +/** + * irq_chip_release_resources_parent - Release resources on the parent interrupt + * @data: Pointer to interrupt specific data + */ +void irq_chip_release_resources_parent(struct irq_data *data) +{ + data = data->parent_data; + if (data->chip->irq_release_resources) + data->chip->irq_release_resources(data); +} +EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent); #endif /** -- cgit v1.2.3 From aaebdf8d68479f78d9f72b239684f70fbb0722c6 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Wed, 1 May 2019 14:58:18 +0100 Subject: genirq/msi: Add a new field in msi_desc to store an IOMMU cookie When an MSI doorbell is located downstream of an IOMMU, it is required to swizzle the physical address with an appropriately-mapped IOVA for any device attached to one of our DMA ops domain. At the moment, the allocation of the mapping may be done when composing the message. However, the composing may be done in non-preemtible context while the allocation requires to be called from preemptible context. A follow-up change will split the current logic in two functions requiring to keep an IOMMU cookie per MSI. A new field is introduced in msi_desc to store an IOMMU cookie. As the cookie may not be required in some configuration, the field is protected under a new config CONFIG_IRQ_MSI_IOMMU. A pair of helpers has also been introduced to access the field. Signed-off-by: Julien Grall Reviewed-by: Robin Murphy Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/msi.h | 26 ++++++++++++++++++++++++++ kernel/irq/Kconfig | 3 +++ 2 files changed, 29 insertions(+) (limited to 'kernel') diff --git a/include/linux/msi.h b/include/linux/msi.h index 7c28762851a3..3eb42ede0114 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -86,6 +86,9 @@ struct msi_desc { struct device *dev; struct msi_msg msg; struct irq_affinity_desc *affinity; +#ifdef CONFIG_IRQ_MSI_IOMMU + const void *iommu_cookie; +#endif union { /* PCI MSI/X specific data */ @@ -129,6 +132,29 @@ struct msi_desc { #define for_each_msi_entry_safe(desc, tmp, dev) \ list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list) +#ifdef CONFIG_IRQ_MSI_IOMMU +static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) +{ + return desc->iommu_cookie; +} + +static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, + const void *iommu_cookie) +{ + desc->iommu_cookie = iommu_cookie; +} +#else +static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) +{ + return NULL; +} + +static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, + const void *iommu_cookie) +{ +} +#endif + #ifdef CONFIG_PCI_MSI #define first_pci_msi_entry(pdev) first_msi_entry(&(pdev)->dev) #define for_each_pci_msi_entry(desc, pdev) \ diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 5f3e2baefca9..8fee06625c37 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -91,6 +91,9 @@ config GENERIC_MSI_IRQ_DOMAIN select IRQ_DOMAIN_HIERARCHY select GENERIC_MSI_IRQ +config IRQ_MSI_IOMMU + bool + config HANDLE_DOMAIN_IRQ bool -- cgit v1.2.3 From a9e9bcb45b1525ba7aea26ed9441e8632aeeda58 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 28 Apr 2019 17:25:38 -0400 Subject: locking/rwsem: Prevent decrement of reader count before increment During my rwsem testing, it was found that after a down_read(), the reader count may occasionally become 0 or even negative. Consequently, a writer may steal the lock at that time and execute with the reader in parallel thus breaking the mutual exclusion guarantee of the write lock. In other words, both readers and writer can become rwsem owners simultaneously. The current reader wakeup code does it in one pass to clear waiter->task and put them into wake_q before fully incrementing the reader count. Once waiter->task is cleared, the corresponding reader may see it, finish the critical section and do unlock to decrement the count before the count is incremented. This is not a problem if there is only one reader to wake up as the count has been pre-incremented by 1. It is a problem if there are more than one readers to be woken up and writer can steal the lock. The wakeup was actually done in 2 passes before the following v4.9 commit: 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once") To fix this problem, the wakeup is now done in two passes again. In the first pass, we collect the readers and count them. The reader count is then fully incremented. In the second pass, the waiter->task is then cleared and they are put into wake_q to be woken up later. Signed-off-by: Waiman Long Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: huang ying Fixes: 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once") Link: http://lkml.kernel.org/r/20190428212557.13482-2-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 46 ++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 6b3ee9948bf1..0b1f77957240 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -130,6 +130,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, { struct rwsem_waiter *waiter, *tmp; long oldcount, woken = 0, adjustment = 0; + struct list_head wlist; /* * Take a peek at the queue head waiter such that we can determine @@ -188,18 +189,43 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, * of the queue. We know that woken will be at least 1 as we accounted * for above. Note we increment the 'active part' of the count by the * number of readers before waking any processes up. + * + * We have to do wakeup in 2 passes to prevent the possibility that + * the reader count may be decremented before it is incremented. It + * is because the to-be-woken waiter may not have slept yet. So it + * may see waiter->task got cleared, finish its critical section and + * do an unlock before the reader count increment. + * + * 1) Collect the read-waiters in a separate list, count them and + * fully increment the reader count in rwsem. + * 2) For each waiters in the new list, clear waiter->task and + * put them into wake_q to be woken up later. */ - list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { - struct task_struct *tsk; - + list_for_each_entry(waiter, &sem->wait_list, list) { if (waiter->type == RWSEM_WAITING_FOR_WRITE) break; woken++; - tsk = waiter->task; + } + list_cut_before(&wlist, &sem->wait_list, &waiter->list); + + adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; + lockevent_cond_inc(rwsem_wake_reader, woken); + if (list_empty(&sem->wait_list)) { + /* hit end of list above */ + adjustment -= RWSEM_WAITING_BIAS; + } + + if (adjustment) + atomic_long_add(adjustment, &sem->count); + + /* 2nd pass */ + list_for_each_entry_safe(waiter, tmp, &wlist, list) { + struct task_struct *tsk; + tsk = waiter->task; get_task_struct(tsk); - list_del(&waiter->list); + /* * Ensure calling get_task_struct() before setting the reader * waiter to nil such that rwsem_down_read_failed() cannot @@ -213,16 +239,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, */ wake_q_add_safe(wake_q, tsk); } - - adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; - lockevent_cond_inc(rwsem_wake_reader, woken); - if (list_empty(&sem->wait_list)) { - /* hit end of list above */ - adjustment -= RWSEM_WAITING_BIAS; - } - - if (adjustment) - atomic_long_add(adjustment, &sem->count); } /* -- cgit v1.2.3