summaryrefslogtreecommitdiff
path: root/arch/powerpc/sysdev/xive/native.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/sysdev/xive/native.c')
-rw-r--r--arch/powerpc/sysdev/xive/native.c640
1 files changed, 640 insertions, 0 deletions
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
new file mode 100644
index 000000000000..1a726229a427
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -0,0 +1,640 @@
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "xive: " fmt
+
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/cpumask.h>
+#include <linux/mm.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/opal.h>
+
+#include "xive-internal.h"
+
+
+static u32 xive_provision_size;
+static u32 *xive_provision_chips;
+static u32 xive_provision_chip_count;
+static u32 xive_queue_shift;
+static u32 xive_pool_vps = XIVE_INVALID_VP;
+static struct kmem_cache *xive_provision_cache;
+
+int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
+{
+ __be64 flags, eoi_page, trig_page;
+ __be32 esb_shift, src_chip;
+ u64 opal_flags;
+ s64 rc;
+
+ memset(data, 0, sizeof(*data));
+
+ rc = opal_xive_get_irq_info(hw_irq, &flags, &eoi_page, &trig_page,
+ &esb_shift, &src_chip);
+ if (rc) {
+ pr_err("opal_xive_get_irq_info(0x%x) returned %lld\n",
+ hw_irq, rc);
+ return -EINVAL;
+ }
+
+ opal_flags = be64_to_cpu(flags);
+ if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI)
+ data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
+ if (opal_flags & OPAL_XIVE_IRQ_LSI)
+ data->flags |= XIVE_IRQ_FLAG_LSI;
+ if (opal_flags & OPAL_XIVE_IRQ_SHIFT_BUG)
+ data->flags |= XIVE_IRQ_FLAG_SHIFT_BUG;
+ if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW)
+ data->flags |= XIVE_IRQ_FLAG_MASK_FW;
+ if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW)
+ data->flags |= XIVE_IRQ_FLAG_EOI_FW;
+ data->eoi_page = be64_to_cpu(eoi_page);
+ data->trig_page = be64_to_cpu(trig_page);
+ data->esb_shift = be32_to_cpu(esb_shift);
+ data->src_chip = be32_to_cpu(src_chip);
+
+ data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
+ if (!data->eoi_mmio) {
+ pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
+ return -ENOMEM;
+ }
+
+ if (!data->trig_page)
+ return 0;
+ if (data->trig_page == data->eoi_page) {
+ data->trig_mmio = data->eoi_mmio;
+ return 0;
+ }
+
+ data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
+ if (!data->trig_mmio) {
+ pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
+{
+ s64 rc;
+
+ for (;;) {
+ rc = opal_xive_set_irq_config(hw_irq, target, prio, sw_irq);
+ if (rc != OPAL_BUSY)
+ break;
+ msleep(1);
+ }
+ return rc == 0 ? 0 : -ENXIO;
+}
+
+/* This can be called multiple time to change a queue configuration */
+int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
+ __be32 *qpage, u32 order, bool can_escalate)
+{
+ s64 rc = 0;
+ __be64 qeoi_page_be;
+ __be32 esc_irq_be;
+ u64 flags, qpage_phys;
+
+ /* If there's an actual queue page, clean it */
+ if (order) {
+ if (WARN_ON(!qpage))
+ return -EINVAL;
+ qpage_phys = __pa(qpage);
+ } else
+ qpage_phys = 0;
+
+ /* Initialize the rest of the fields */
+ q->msk = order ? ((1u << (order - 2)) - 1) : 0;
+ q->idx = 0;
+ q->toggle = 0;
+
+ rc = opal_xive_get_queue_info(vp_id, prio, NULL, NULL,
+ &qeoi_page_be,
+ &esc_irq_be,
+ NULL);
+ if (rc) {
+ pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+ rc = -EIO;
+ goto fail;
+ }
+ q->eoi_phys = be64_to_cpu(qeoi_page_be);
+
+ /* Default flags */
+ flags = OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED;
+
+ /* Escalation needed ? */
+ if (can_escalate) {
+ q->esc_irq = be32_to_cpu(esc_irq_be);
+ flags |= OPAL_XIVE_EQ_ESCALATE;
+ }
+
+ /* Configure and enable the queue in HW */
+ for (;;) {
+ rc = opal_xive_set_queue_info(vp_id, prio, qpage_phys, order, flags);
+ if (rc != OPAL_BUSY)
+ break;
+ msleep(1);
+ }
+ if (rc) {
+ pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+ rc = -EIO;
+ } else {
+ /*
+ * KVM code requires all of the above to be visible before
+ * q->qpage is set due to how it manages IPI EOIs
+ */
+ wmb();
+ q->qpage = qpage;
+ }
+fail:
+ return rc;
+}
+
+static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
+{
+ s64 rc;
+
+ /* Disable the queue in HW */
+ for (;;) {
+ rc = opal_xive_set_queue_info(vp_id, prio, 0, 0, 0);
+ if (rc != OPAL_BUSY)
+ break;
+ msleep(1);
+ }
+ if (rc)
+ pr_err("Error %lld disabling queue for prio %d\n", rc, prio);
+}
+
+void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
+{
+ __xive_native_disable_queue(vp_id, q, prio);
+}
+
+static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
+{
+ struct xive_q *q = &xc->queue[prio];
+ unsigned int alloc_order;
+ struct page *pages;
+ __be32 *qpage;
+
+ alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
+ (xive_queue_shift - PAGE_SHIFT) : 0;
+ pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
+ if (!pages)
+ return -ENOMEM;
+ qpage = (__be32 *)page_address(pages);
+ memset(qpage, 0, 1 << xive_queue_shift);
+ return xive_native_configure_queue(get_hard_smp_processor_id(cpu),
+ q, prio, qpage, xive_queue_shift, false);
+}
+
+static void xive_native_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
+{
+ struct xive_q *q = &xc->queue[prio];
+ unsigned int alloc_order;
+
+ /*
+ * We use the variant with no iounmap as this is called on exec
+ * from an IPI and iounmap isn't safe
+ */
+ __xive_native_disable_queue(get_hard_smp_processor_id(cpu), q, prio);
+ alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
+ (xive_queue_shift - PAGE_SHIFT) : 0;
+ free_pages((unsigned long)q->qpage, alloc_order);
+ q->qpage = NULL;
+}
+
+static bool xive_native_match(struct device_node *node)
+{
+ return of_device_is_compatible(node, "ibm,opal-xive-vc");
+}
+
+#ifdef CONFIG_SMP
+static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+ struct device_node *np;
+ unsigned int chip_id;
+ s64 irq;
+
+ /* Find the chip ID */
+ np = of_get_cpu_node(cpu, NULL);
+ if (np) {
+ if (of_property_read_u32(np, "ibm,chip-id", &chip_id) < 0)
+ chip_id = 0;
+ }
+
+ /* Allocate an IPI and populate info about it */
+ for (;;) {
+ irq = opal_xive_allocate_irq(chip_id);
+ if (irq == OPAL_BUSY) {
+ msleep(1);
+ continue;
+ }
+ if (irq < 0) {
+ pr_err("Failed to allocate IPI on CPU %d\n", cpu);
+ return -ENXIO;
+ }
+ xc->hw_ipi = irq;
+ break;
+ }
+ return 0;
+}
+
+u32 xive_native_alloc_irq(void)
+{
+ s64 rc;
+
+ for (;;) {
+ rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP);
+ if (rc != OPAL_BUSY)
+ break;
+ msleep(1);
+ }
+ if (rc < 0)
+ return 0;
+ return rc;
+}
+
+void xive_native_free_irq(u32 irq)
+{
+ for (;;) {
+ s64 rc = opal_xive_free_irq(irq);
+ if (rc != OPAL_BUSY)
+ break;
+ msleep(1);
+ }
+}
+
+static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+ s64 rc;
+
+ /* Free the IPI */
+ if (!xc->hw_ipi)
+ return;
+ for (;;) {
+ rc = opal_xive_free_irq(xc->hw_ipi);
+ if (rc == OPAL_BUSY) {
+ msleep(1);
+ continue;
+ }
+ xc->hw_ipi = 0;
+ break;
+ }
+}
+#endif /* CONFIG_SMP */
+
+static void xive_native_shutdown(void)
+{
+ /* Switch the XIVE to emulation mode */
+ opal_xive_reset(OPAL_XIVE_MODE_EMU);
+}
+
+/*
+ * Perform an "ack" cycle on the current thread, thus
+ * grabbing the pending active priorities and updating
+ * the CPPR to the most favored one.
+ */
+static void xive_native_update_pending(struct xive_cpu *xc)
+{
+ u8 he, cppr;
+ u16 ack;
+
+ /* Perform the acknowledge hypervisor to register cycle */
+ ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_HV_REG));
+
+ /* Synchronize subsequent queue accesses */
+ mb();
+
+ /*
+ * Grab the CPPR and the "HE" field which indicates the source
+ * of the hypervisor interrupt (if any)
+ */
+ cppr = ack & 0xff;
+ he = GETFIELD(TM_QW3_NSR_HE, (ack >> 8));
+ switch(he) {
+ case TM_QW3_NSR_HE_NONE: /* Nothing to see here */
+ break;
+ case TM_QW3_NSR_HE_PHYS: /* Physical thread interrupt */
+ if (cppr == 0xff)
+ return;
+ /* Mark the priority pending */
+ xc->pending_prio |= 1 << cppr;
+
+ /*
+ * A new interrupt should never have a CPPR less favored
+ * than our current one.
+ */
+ if (cppr >= xc->cppr)
+ pr_err("CPU %d odd ack CPPR, got %d at %d\n",
+ smp_processor_id(), cppr, xc->cppr);
+
+ /* Update our idea of what the CPPR is */
+ xc->cppr = cppr;
+ break;
+ case TM_QW3_NSR_HE_POOL: /* HV Pool interrupt (unused) */
+ case TM_QW3_NSR_HE_LSI: /* Legacy FW LSI (unused) */
+ pr_err("CPU %d got unexpected interrupt type HE=%d\n",
+ smp_processor_id(), he);
+ return;
+ }
+}
+
+static void xive_native_eoi(u32 hw_irq)
+{
+ /*
+ * Not normally used except if specific interrupts need
+ * a workaround on EOI.
+ */
+ opal_int_eoi(hw_irq);
+}
+
+static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+ s64 rc;
+ u32 vp;
+ __be64 vp_cam_be;
+ u64 vp_cam;
+
+ if (xive_pool_vps == XIVE_INVALID_VP)
+ return;
+
+ /* Enable the pool VP */
+ vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
+ pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp);
+ for (;;) {
+ rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
+ if (rc != OPAL_BUSY)
+ break;
+ msleep(1);
+ }
+ if (rc) {
+ pr_err("Failed to enable pool VP on CPU %d\n", cpu);
+ return;
+ }
+
+ /* Grab it's CAM value */
+ rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL);
+ if (rc) {
+ pr_err("Failed to get pool VP info CPU %d\n", cpu);
+ return;
+ }
+ vp_cam = be64_to_cpu(vp_cam_be);
+
+ pr_debug("VP CAM = %llx\n", vp_cam);
+
+ /* Push it on the CPU (set LSMFB to 0xff to skip backlog scan) */
+ pr_debug("(Old HW value: %08x)\n",
+ in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2));
+ out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD0, 0xff);
+ out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2,
+ TM_QW2W2_VP | vp_cam);
+ pr_debug("(New HW value: %08x)\n",
+ in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2));
+}
+
+static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+ s64 rc;
+ u32 vp;
+
+ if (xive_pool_vps == XIVE_INVALID_VP)
+ return;
+
+ /* Pull the pool VP from the CPU */
+ in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
+
+ /* Disable it */
+ vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
+ for (;;) {
+ rc = opal_xive_set_vp_info(vp, 0, 0);
+ if (rc != OPAL_BUSY)
+ break;
+ msleep(1);
+ }
+}
+
+static void xive_native_sync_source(u32 hw_irq)
+{
+ opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
+}
+
+static const struct xive_ops xive_native_ops = {
+ .populate_irq_data = xive_native_populate_irq_data,
+ .configure_irq = xive_native_configure_irq,
+ .setup_queue = xive_native_setup_queue,
+ .cleanup_queue = xive_native_cleanup_queue,
+ .match = xive_native_match,
+ .shutdown = xive_native_shutdown,
+ .update_pending = xive_native_update_pending,
+ .eoi = xive_native_eoi,
+ .setup_cpu = xive_native_setup_cpu,
+ .teardown_cpu = xive_native_teardown_cpu,
+ .sync_source = xive_native_sync_source,
+#ifdef CONFIG_SMP
+ .get_ipi = xive_native_get_ipi,
+ .put_ipi = xive_native_put_ipi,
+#endif /* CONFIG_SMP */
+ .name = "native",
+};
+
+static bool xive_parse_provisioning(struct device_node *np)
+{
+ int rc;
+
+ if (of_property_read_u32(np, "ibm,xive-provision-page-size",
+ &xive_provision_size) < 0)
+ return true;
+ rc = of_property_count_elems_of_size(np, "ibm,xive-provision-chips", 4);
+ if (rc < 0) {
+ pr_err("Error %d getting provision chips array\n", rc);
+ return false;
+ }
+ xive_provision_chip_count = rc;
+ if (rc == 0)
+ return true;
+
+ xive_provision_chips = kzalloc(4 * xive_provision_chip_count,
+ GFP_KERNEL);
+ if (WARN_ON(!xive_provision_chips))
+ return false;
+
+ rc = of_property_read_u32_array(np, "ibm,xive-provision-chips",
+ xive_provision_chips,
+ xive_provision_chip_count);
+ if (rc < 0) {
+ pr_err("Error %d reading provision chips array\n", rc);
+ return false;
+ }
+
+ xive_provision_cache = kmem_cache_create("xive-provision",
+ xive_provision_size,
+ xive_provision_size,
+ 0, NULL);
+ if (!xive_provision_cache) {
+ pr_err("Failed to allocate provision cache\n");
+ return false;
+ }
+ return true;
+}
+
+u32 xive_native_default_eq_shift(void)
+{
+ return xive_queue_shift;
+}
+
+bool xive_native_init(void)
+{
+ struct device_node *np;
+ struct resource r;
+ void __iomem *tima;
+ struct property *prop;
+ u8 max_prio = 7;
+ const __be32 *p;
+ u32 val;
+ s64 rc;
+
+ if (xive_cmdline_disabled)
+ return false;
+
+ pr_devel("xive_native_init()\n");
+ np = of_find_compatible_node(NULL, NULL, "ibm,opal-xive-pe");
+ if (!np) {
+ pr_devel("not found !\n");
+ return false;
+ }
+ pr_devel("Found %s\n", np->full_name);
+
+ /* Resource 1 is HV window */
+ if (of_address_to_resource(np, 1, &r)) {
+ pr_err("Failed to get thread mgmnt area resource\n");
+ return false;
+ }
+ tima = ioremap(r.start, resource_size(&r));
+ if (!tima) {
+ pr_err("Failed to map thread mgmnt area\n");
+ return false;
+ }
+
+ /* Read number of priorities */
+ if (of_property_read_u32(np, "ibm,xive-#priorities", &val) == 0)
+ max_prio = val - 1;
+
+ /* Iterate the EQ sizes and pick one */
+ of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, p, val) {
+ xive_queue_shift = val;
+ if (val == PAGE_SHIFT)
+ break;
+ }
+
+ /* Grab size of provisioning pages */
+ xive_parse_provisioning(np);
+
+ /* Switch the XIVE to exploitation mode */
+ rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL);
+ if (rc) {
+ pr_err("Switch to exploitation mode failed with error %lld\n", rc);
+ return false;
+ }
+
+ /* Initialize XIVE core with our backend */
+ if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS,
+ max_prio)) {
+ opal_xive_reset(OPAL_XIVE_MODE_EMU);
+ return false;
+ }
+ pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
+ return true;
+}
+
+static bool xive_native_provision_pages(void)
+{
+ u32 i;
+ void *p;
+
+ for (i = 0; i < xive_provision_chip_count; i++) {
+ u32 chip = xive_provision_chips[i];
+
+ /*
+ * XXX TODO: Try to make the allocation local to the node where
+ * the chip resides.
+ */
+ p = kmem_cache_alloc(xive_provision_cache, GFP_KERNEL);
+ if (!p) {
+ pr_err("Failed to allocate provisioning page\n");
+ return false;
+ }
+ opal_xive_donate_page(chip, __pa(p));
+ }
+ return true;
+}
+
+u32 xive_native_alloc_vp_block(u32 max_vcpus)
+{
+ s64 rc;
+ u32 order;
+
+ order = fls(max_vcpus) - 1;
+ if (max_vcpus > (1 << order))
+ order++;
+
+ pr_info("VP block alloc, for max VCPUs %d use order %d\n",
+ max_vcpus, order);
+
+ for (;;) {
+ rc = opal_xive_alloc_vp_block(order);
+ switch (rc) {
+ case OPAL_BUSY:
+ msleep(1);
+ break;
+ case OPAL_XIVE_PROVISIONING:
+ if (!xive_native_provision_pages())
+ return XIVE_INVALID_VP;
+ break;
+ default:
+ if (rc < 0) {
+ pr_err("OPAL failed to allocate VCPUs order %d, err %lld\n",
+ order, rc);
+ return XIVE_INVALID_VP;
+ }
+ return rc;
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(xive_native_alloc_vp_block);
+
+void xive_native_free_vp_block(u32 vp_base)
+{
+ s64 rc;
+
+ if (vp_base == XIVE_INVALID_VP)
+ return;
+
+ rc = opal_xive_free_vp_block(vp_base);
+ if (rc < 0)
+ pr_warn("OPAL error %lld freeing VP block\n", rc);
+}
+EXPORT_SYMBOL_GPL(xive_native_free_vp_block);