diff options
author | Dave Airlie <airlied@redhat.com> | 2017-05-18 12:57:06 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2017-05-18 12:57:06 +1000 |
commit | e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 (patch) | |
tree | 8357e8fda6efb0867ac39fc6b9211a579721d00a /drivers/gpu/drm/vc4/vc4_v3d.c | |
parent | 2ea659a9ef488125eb46da6eb571de5eae5c43f6 (diff) | |
parent | 9cf8f5802f39d9991158b29033c852bccfc3a4d4 (diff) |
Merge tag 'drm-misc-next-2017-05-16' of git://anongit.freedesktop.org/git/drm-misc into drm-next
UAPI Changes:
- Return -ENODEV instead of -ENXIO when creating cma fb w/o valid gem (Daniel)
- Add aspect ratio and custom scaling propertis to connector state (Maarten)
Cross-subsystem Changes:
- None
Core Changes:
- Add Laurent as bridge reviewer and Andrzej as bridge maintainer (Archit)
- Maintain new STM driver through -misc (Yannick)
- Misc doc improvements (as is tradition) (Daniel)
- Add driver-private objects to atomic state (Dhinakaran)
- Deprecate preclose hook in modern drivers (use postclose) (Daniel)
- Add hwmode to vblank struct. This fixes mode access in irq context and reduced
a bunch of boilerplate (Daniel)
Driver Changes:
- vc4: Add out-fence support to vc4 V3D rendering (Eric)
- stm: Add stm32f429 display hw and am-480272h3tmqw-t01h panel support (Yannick)
- vc4: Remove 256MB cma limit from vc4 (Eric)
- dw-hdmi: Disable audio when inactive, instead of always enabled (Romain)
- zte: Add support for VGA to the ZTE driver (Shawn)
- i915: Track DP MST bandwidth and check it in atomic_check (Dhinakaran)
- vgem: Enable gem dmabuf import iface to facilitate ion testing (Laura)
- vc4: Add support for Cygnus (new dt compat string + couple bug fixes) (Eric)
- pl111: Add driver for pl111 CLCD display controller (Eric/Tom)
- vgem: Subclass drm_device instead of standalone platform device (Chris)
Cc: Archit Taneja <architt@codeaurora.org>
Cc: Eric Anholt <eric@anholt.net>
Cc: Yannick Fertre <yannick.fertre@st.com>
Cc: Romain Perier <romain.perier@collabora.com>
Cc: Navare, Manasi D <manasi.d.navare@intel.com>
Cc: Shawn Guo <shawn.guo@linaro.org>
Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Tom Cooksey <tom.cooksey@arm.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
* tag 'drm-misc-next-2017-05-16' of git://anongit.freedesktop.org/git/drm-misc: (72 commits)
drm: add missing declaration to drm_blend.h
drm/dp: Wait up all outstanding tx waiters
drm/dp: Read the tx msg state once after checking for an event
drm/prime: Forward declare struct device
drm/vblank: Lock down vblank->hwmode more
drm/vblank: drop the mode argument from drm_calc_vbltimestamp_from_scanoutpos
drm/vblank: Add FIXME comments about moving the vblank ts hooks
drm/vblank: Switch to bool in_vblank_irq in get_vblank_timestamp
drm/vblank: Switch drm_driver->get_vblank_timestamp to return a bool
drm/vgem: Convert to a struct drm_device subclass
gpu: drm: gma500: remove dead code
drm/sti: Adjust two checks for null pointers in sti_hqvdp_probe()
drm/sti: Fix typos in a comment line
drm/sti: Fix a typo in a comment line
drm/sti: Replace 17 seq_puts() calls by seq_putc()
drm/sti: Reduce function calls for sequence output at five places
drm/sti: use seq_puts to display a string
drm: Nerf the preclose callback for modern drivers
drm/exynos: Merge pre/postclose hooks
drm/tegra: switch to postclose
...
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_v3d.c')
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_v3d.c | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 7cc346ad9b0b..c53afec34586 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -16,6 +16,7 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ +#include "linux/clk.h" #include "linux/component.h" #include "linux/pm_runtime.h" #include "vc4_drv.h" @@ -156,6 +157,144 @@ static void vc4_v3d_init_hw(struct drm_device *dev) V3D_WRITE(V3D_VPMBASE, 0); } +int vc4_v3d_get_bin_slot(struct vc4_dev *vc4) +{ + struct drm_device *dev = vc4->dev; + unsigned long irqflags; + int slot; + uint64_t seqno = 0; + struct vc4_exec_info *exec; + +try_again: + spin_lock_irqsave(&vc4->job_lock, irqflags); + slot = ffs(~vc4->bin_alloc_used); + if (slot != 0) { + /* Switch from ffs() bit index to a 0-based index. */ + slot--; + vc4->bin_alloc_used |= BIT(slot); + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + return slot; + } + + /* Couldn't find an open slot. Wait for render to complete + * and try again. + */ + exec = vc4_last_render_job(vc4); + if (exec) + seqno = exec->seqno; + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + + if (seqno) { + int ret = vc4_wait_for_seqno(dev, seqno, ~0ull, true); + + if (ret == 0) + goto try_again; + + return ret; + } + + return -ENOMEM; +} + +/** + * vc4_allocate_bin_bo() - allocates the memory that will be used for + * tile binning. + * + * The binner has a limitation that the addresses in the tile state + * buffer that point into the tile alloc buffer or binner overflow + * memory only have 28 bits (256MB), and the top 4 on the bus for + * tile alloc references end up coming from the tile state buffer's + * address. + * + * To work around this, we allocate a single large buffer while V3D is + * in use, make sure that it has the top 4 bits constant across its + * entire extent, and then put the tile state, tile alloc, and binner + * overflow memory inside that buffer. + * + * This creates a limitation where we may not be able to execute a job + * if it doesn't fit within the buffer that we allocated up front. + * However, it turns out that 16MB is "enough for anybody", and + * real-world applications run into allocation failures from the + * overall CMA pool before they make scenes complicated enough to run + * out of bin space. + */ +int +vc4_allocate_bin_bo(struct drm_device *drm) +{ + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_v3d *v3d = vc4->v3d; + uint32_t size = 16 * 1024 * 1024; + int ret = 0; + struct list_head list; + + /* We may need to try allocating more than once to get a BO + * that doesn't cross 256MB. Track the ones we've allocated + * that failed so far, so that we can free them when we've got + * one that succeeded (if we freed them right away, our next + * allocation would probably be the same chunk of memory). + */ + INIT_LIST_HEAD(&list); + + while (true) { + struct vc4_bo *bo = vc4_bo_create(drm, size, true); + + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + + dev_err(&v3d->pdev->dev, + "Failed to allocate memory for tile binning: " + "%d. You may need to enable CMA or give it " + "more memory.", + ret); + break; + } + + /* Check if this BO won't trigger the addressing bug. */ + if ((bo->base.paddr & 0xf0000000) == + ((bo->base.paddr + bo->base.base.size - 1) & 0xf0000000)) { + vc4->bin_bo = bo; + + /* Set up for allocating 512KB chunks of + * binner memory. The biggest allocation we + * need to do is for the initial tile alloc + + * tile state buffer. We can render to a + * maximum of ((2048*2048) / (32*32) = 4096 + * tiles in a frame (until we do floating + * point rendering, at which point it would be + * 8192). Tile state is 48b/tile (rounded to + * a page), and tile alloc is 32b/tile + * (rounded to a page), plus a page of extra, + * for a total of 320kb for our worst-case. + * We choose 512kb so that it divides evenly + * into our 16MB, and the rest of the 512kb + * will be used as storage for the overflow + * from the initial 32b CL per bin. + */ + vc4->bin_alloc_size = 512 * 1024; + vc4->bin_alloc_used = 0; + vc4->bin_alloc_overflow = 0; + WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 != + bo->base.base.size / vc4->bin_alloc_size); + + break; + } + + /* Put it on the list to free later, and try again. */ + list_add(&bo->unref_head, &list); + } + + /* Free all the BOs we allocated but didn't choose. */ + while (!list_empty(&list)) { + struct vc4_bo *bo = list_last_entry(&list, + struct vc4_bo, unref_head); + + list_del(&bo->unref_head); + drm_gem_object_put_unlocked(&bo->base.base); + } + + return ret; +} + #ifdef CONFIG_PM static int vc4_v3d_runtime_suspend(struct device *dev) { @@ -164,6 +303,11 @@ static int vc4_v3d_runtime_suspend(struct device *dev) vc4_irq_uninstall(vc4->dev); + drm_gem_object_put_unlocked(&vc4->bin_bo->base.base); + vc4->bin_bo = NULL; + + clk_disable_unprepare(v3d->clk); + return 0; } @@ -171,6 +315,15 @@ static int vc4_v3d_runtime_resume(struct device *dev) { struct vc4_v3d *v3d = dev_get_drvdata(dev); struct vc4_dev *vc4 = v3d->vc4; + int ret; + + ret = vc4_allocate_bin_bo(vc4->dev); + if (ret) + return ret; + + ret = clk_prepare_enable(v3d->clk); + if (ret != 0) + return ret; vc4_v3d_init_hw(vc4->dev); vc4_irq_postinstall(vc4->dev); @@ -202,12 +355,38 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) vc4->v3d = v3d; v3d->vc4 = vc4; + v3d->clk = devm_clk_get(dev, NULL); + if (IS_ERR(v3d->clk)) { + int ret = PTR_ERR(v3d->clk); + + if (ret == -ENOENT) { + /* bcm2835 didn't have a clock reference in the DT. */ + ret = 0; + v3d->clk = NULL; + } else { + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get V3D clock: %d\n", + ret); + return ret; + } + } + if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) { DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n", V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0); return -EINVAL; } + ret = clk_prepare_enable(v3d->clk); + if (ret != 0) + return ret; + + ret = vc4_allocate_bin_bo(drm); + if (ret) { + clk_disable_unprepare(v3d->clk); + return ret; + } + /* Reset the binner overflow address/size at setup, to be sure * we don't reuse an old one. */ @@ -271,6 +450,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev) static const struct of_device_id vc4_v3d_dt_match[] = { { .compatible = "brcm,bcm2835-v3d" }, + { .compatible = "brcm,cygnus-v3d" }, { .compatible = "brcm,vc4-v3d" }, {} }; |