diff options
Diffstat (limited to 'drivers/gpu/drm/vc4')
-rw-r--r-- | drivers/gpu/drm/vc4/Kconfig | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_bo.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_crtc.c | 269 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_debugfs.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_dpi.c | 503 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.c | 106 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.h | 26 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 31 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_hdmi.c | 24 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_irq.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_kms.c | 39 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_plane.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_qpu_defines.h | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_regs.h | 32 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_validate.c | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_validate_shaders.c | 455 |
17 files changed, 1387 insertions, 156 deletions
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 584810474e5b..e53df59cb139 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -5,6 +5,7 @@ config DRM_VC4 select DRM_KMS_HELPER select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER + select DRM_PANEL help Choose this option if you have a system that has a Broadcom VC4 GPU, such as the Raspberry Pi or other BCM2708/BCM2835. diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile index 4c6a99f0398c..fb77db755e0a 100644 --- a/drivers/gpu/drm/vc4/Makefile +++ b/drivers/gpu/drm/vc4/Makefile @@ -7,6 +7,7 @@ vc4-y := \ vc4_bo.o \ vc4_crtc.o \ vc4_drv.o \ + vc4_dpi.o \ vc4_kms.o \ vc4_gem.o \ vc4_hdmi.o \ diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 9807bc9d296e..3f6704cf6608 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -144,7 +144,7 @@ static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev, return &vc4->bo_cache.size_list[page_index]; } -void vc4_bo_cache_purge(struct drm_device *dev) +static void vc4_bo_cache_purge(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -291,8 +291,6 @@ static void vc4_bo_cache_free_old(struct drm_device *dev) /* Called on the last userspace/kernel unreference of the BO. Returns * it to the BO cache if possible, otherwise frees it. - * - * Note that this is called with the struct_mutex held. */ void vc4_free_object(struct drm_gem_object *gem_bo) { @@ -457,7 +455,7 @@ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_vc4_mmap_bo *args = data; struct drm_gem_object *gem_obj; - gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle); + gem_obj = drm_gem_object_lookup(file_priv, args->handle); if (!gem_obj) { DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); return -EINVAL; diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 355ee4b091b3..8fc2b731b59a 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -46,9 +46,18 @@ struct vc4_crtc { const struct vc4_crtc_data *data; void __iomem *regs; + /* Timestamp at start of vblank irq - unaffected by lock delays. */ + ktime_t t_vblank; + /* Which HVS channel we're using for our CRTC. */ int channel; + u8 lut_r[256]; + u8 lut_g[256]; + u8 lut_b[256]; + /* Size in pixels of the COB memory allocated to this CRTC. */ + u32 cob_size; + struct drm_pending_vblank_event *event; }; @@ -142,11 +151,191 @@ int vc4_crtc_debugfs_regs(struct seq_file *m, void *unused) } #endif +int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, + unsigned int flags, int *vpos, int *hpos, + ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id]; + u32 val; + int fifo_lines; + int vblank_lines; + int ret = 0; + + /* + * XXX Doesn't work well in interlaced mode yet, partially due + * to problems in vc4 kms or drm core interlaced mode handling, + * so disable for now in interlaced mode. + */ + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + return ret; + + /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ + + /* Get optional system timestamp before query. */ + if (stime) + *stime = ktime_get(); + + /* + * Read vertical scanline which is currently composed for our + * pixelvalve by the HVS, and also the scaler status. + */ + val = HVS_READ(SCALER_DISPSTATX(vc4_crtc->channel)); + + /* Get optional system timestamp after query. */ + if (etime) + *etime = ktime_get(); + + /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ + + /* Vertical position of hvs composed scanline. */ + *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE); + + /* No hpos info available. */ + if (hpos) + *hpos = 0; + + /* This is the offset we need for translating hvs -> pv scanout pos. */ + fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay; + + if (fifo_lines > 0) + ret |= DRM_SCANOUTPOS_VALID; + + /* HVS more than fifo_lines into frame for compositing? */ + if (*vpos > fifo_lines) { + /* + * We are in active scanout and can get some meaningful results + * from HVS. The actual PV scanout can not trail behind more + * than fifo_lines as that is the fifo's capacity. Assume that + * in active scanout the HVS and PV work in lockstep wrt. HVS + * refilling the fifo and PV consuming from the fifo, ie. + * whenever the PV consumes and frees up a scanline in the + * fifo, the HVS will immediately refill it, therefore + * incrementing vpos. Therefore we choose HVS read position - + * fifo size in scanlines as a estimate of the real scanout + * position of the PV. + */ + *vpos -= fifo_lines + 1; + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + *vpos /= 2; + + ret |= DRM_SCANOUTPOS_ACCURATE; + return ret; + } + + /* + * Less: This happens when we are in vblank and the HVS, after getting + * the VSTART restart signal from the PV, just started refilling its + * fifo with new lines from the top-most lines of the new framebuffers. + * The PV does not scan out in vblank, so does not remove lines from + * the fifo, so the fifo will be full quickly and the HVS has to pause. + * We can't get meaningful readings wrt. scanline position of the PV + * and need to make things up in a approximative but consistent way. + */ + ret |= DRM_SCANOUTPOS_IN_VBLANK; + vblank_lines = mode->crtc_vtotal - mode->crtc_vdisplay; + + if (flags & DRM_CALLED_FROM_VBLIRQ) { + /* + * Assume the irq handler got called close to first + * line of vblank, so PV has about a full vblank + * scanlines to go, and as a base timestamp use the + * one taken at entry into vblank irq handler, so it + * is not affected by random delays due to lock + * contention on event_lock or vblank_time lock in + * the core. + */ + *vpos = -vblank_lines; + + if (stime) + *stime = vc4_crtc->t_vblank; + if (etime) + *etime = vc4_crtc->t_vblank; + + /* + * If the HVS fifo is not yet full then we know for certain + * we are at the very beginning of vblank, as the hvs just + * started refilling, and the stime and etime timestamps + * truly correspond to start of vblank. + */ + if ((val & SCALER_DISPSTATX_FULL) != SCALER_DISPSTATX_FULL) + ret |= DRM_SCANOUTPOS_ACCURATE; + } else { + /* + * No clue where we are inside vblank. Return a vpos of zero, + * which will cause calling code to just return the etime + * timestamp uncorrected. At least this is no worse than the + * standard fallback. + */ + *vpos = 0; + } + + return ret; +} + +int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id, + int *max_error, struct timeval *vblank_time, + unsigned flags) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id]; + struct drm_crtc *crtc = &vc4_crtc->base; + struct drm_crtc_state *state = crtc->state; + + /* Helper routine in DRM core does all the work: */ + return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc_id, max_error, + vblank_time, flags, + &state->adjusted_mode); +} + static void vc4_crtc_destroy(struct drm_crtc *crtc) { drm_crtc_cleanup(crtc); } +static void +vc4_crtc_lut_load(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + u32 i; + + /* The LUT memory is laid out with each HVS channel in order, + * each of which takes 256 writes for R, 256 for G, then 256 + * for B. + */ + HVS_WRITE(SCALER_GAMADDR, + SCALER_GAMADDR_AUTOINC | + (vc4_crtc->channel * 3 * crtc->gamma_size)); + + for (i = 0; i < crtc->gamma_size; i++) + HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_r[i]); + for (i = 0; i < crtc->gamma_size; i++) + HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_g[i]); + for (i = 0; i < crtc->gamma_size; i++) + HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_b[i]); +} + +static int +vc4_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b, + uint32_t size) +{ + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + u32 i; + + for (i = 0; i < size; i++) { + vc4_crtc->lut_r[i] = r[i] >> 8; + vc4_crtc->lut_g[i] = g[i] >> 8; + vc4_crtc->lut_b[i] = b[i] >> 8; + } + + vc4_crtc_lut_load(crtc); + + return 0; +} + static u32 vc4_get_fifo_full_level(u32 format) { static const u32 fifo_len_bytes = 64; @@ -260,8 +449,14 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc) HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel), SCALER_DISPBKGND_AUTOHS | + SCALER_DISPBKGND_GAMMA | (interlace ? SCALER_DISPBKGND_INTERLACE : 0)); + /* Reload the LUT, since the SRAMs would have been disabled if + * all CRTCs had SCALER_DISPBKGND_GAMMA unset at once. + */ + vc4_crtc_lut_load(crtc); + if (debug_dump_regs) { DRM_INFO("CRTC %d regs after:\n", drm_crtc_index(crtc)); vc4_crtc_dump_regs(vc4_crtc); @@ -345,6 +540,7 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc, struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_plane *plane; unsigned long flags; + const struct drm_plane_state *plane_state; u32 dlist_count = 0; int ret; @@ -354,18 +550,8 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc, if (hweight32(state->connector_mask) > 1) return -EINVAL; - drm_atomic_crtc_state_for_each_plane(plane, state) { - struct drm_plane_state *plane_state = - state->state->plane_states[drm_plane_index(plane)]; - - /* plane might not have changed, in which case take - * current state: - */ - if (!plane_state) - plane_state = plane->state; - + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, state) dlist_count += vc4_plane_dlist_size(plane_state); - } dlist_count++; /* Account for SCALER_CTL0_END. */ @@ -406,14 +592,6 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size); - HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), - vc4_state->mm.start); - - if (debug_dump_regs) { - DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); - vc4_hvs_dump_state(dev); - } - if (crtc->state->event) { unsigned long flags; @@ -423,8 +601,20 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, spin_lock_irqsave(&dev->event_lock, flags); vc4_crtc->event = crtc->state->event; - spin_unlock_irqrestore(&dev->event_lock, flags); crtc->state->event = NULL; + + HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), + vc4_state->mm.start); + + spin_unlock_irqrestore(&dev->event_lock, flags); + } else { + HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), + vc4_state->mm.start); + } + + if (debug_dump_regs) { + DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); + vc4_hvs_dump_state(dev); } } @@ -450,12 +640,17 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc) { struct drm_crtc *crtc = &vc4_crtc->base; struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); + u32 chan = vc4_crtc->channel; unsigned long flags; spin_lock_irqsave(&dev->event_lock, flags); - if (vc4_crtc->event) { + if (vc4_crtc->event && + (vc4_state->mm.start == HVS_READ(SCALER_DISPLACTX(chan)))) { drm_crtc_send_vblank_event(crtc, vc4_crtc->event); vc4_crtc->event = NULL; + drm_crtc_vblank_put(crtc); } spin_unlock_irqrestore(&dev->event_lock, flags); } @@ -467,6 +662,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data) irqreturn_t ret = IRQ_NONE; if (stat & PV_INT_VFP_START) { + vc4_crtc->t_vblank = ktime_get(); CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START); drm_crtc_handle_vblank(&vc4_crtc->base); vc4_crtc_handle_page_flip(vc4_crtc); @@ -506,6 +702,7 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) spin_unlock_irqrestore(&dev->event_lock, flags); } + drm_crtc_vblank_put(crtc); drm_framebuffer_unreference(flip_state->fb); kfree(flip_state); @@ -548,6 +745,8 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, return ret; } + WARN_ON(drm_crtc_vblank_get(crtc) != 0); + /* Immediately update the plane's legacy fb pointer, so that later * modeset prep sees the state that will be present when the semaphore * is released. @@ -600,7 +799,7 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc, } - __drm_atomic_helper_crtc_destroy_state(crtc, state); + __drm_atomic_helper_crtc_destroy_state(state); } static const struct drm_crtc_funcs vc4_crtc_funcs = { @@ -613,6 +812,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = { .reset = drm_atomic_helper_crtc_reset, .atomic_duplicate_state = vc4_crtc_duplicate_state, .atomic_destroy_state = vc4_crtc_destroy_state, + .gamma_set = vc4_crtc_gamma_set, }; static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = { @@ -667,6 +867,22 @@ static void vc4_set_crtc_possible_masks(struct drm_device *drm, } } +static void +vc4_crtc_get_cob_allocation(struct vc4_crtc *vc4_crtc) +{ + struct drm_device *drm = vc4_crtc->base.dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); + u32 dispbase = HVS_READ(SCALER_DISPBASEX(vc4_crtc->channel)); + /* Top/base are supposed to be 4-pixel aligned, but the + * Raspberry Pi firmware fills the low bits (which are + * presumably ignored). + */ + u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3; + u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3; + + vc4_crtc->cob_size = top - base + 4; +} + static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); @@ -711,6 +927,7 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) primary_plane->crtc = crtc; vc4->crtc[drm_crtc_index(crtc)] = vc4_crtc; vc4_crtc->channel = vc4_crtc->data->hvs_channel; + drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r)); /* Set up some arbitrary number of planes. We're not limited * by a set number of physical registers, just the space in @@ -742,6 +959,8 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) crtc->cursor = cursor_plane; } + vc4_crtc_get_cob_allocation(vc4_crtc); + CRTC_WRITE(PV_INTEN, 0); CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START); ret = devm_request_irq(dev, platform_get_irq(pdev, 0), @@ -751,6 +970,12 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) vc4_set_crtc_possible_masks(drm, crtc); + for (i = 0; i < crtc->gamma_size; i++) { + vc4_crtc->lut_r[i] = i; + vc4_crtc->lut_g[i] = i; + vc4_crtc->lut_b[i] = i; + } + platform_set_drvdata(pdev, vc4_crtc); return 0; diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c index d76ad10b07fd..245115d49c46 100644 --- a/drivers/gpu/drm/vc4/vc4_debugfs.c +++ b/drivers/gpu/drm/vc4/vc4_debugfs.c @@ -17,6 +17,7 @@ static const struct drm_info_list vc4_debugfs_list[] = { {"bo_stats", vc4_bo_stats_debugfs, 0}, + {"dpi_regs", vc4_dpi_debugfs_regs, 0}, {"hdmi_regs", vc4_hdmi_debugfs_regs, 0}, {"hvs_regs", vc4_hvs_debugfs_regs, 0}, {"crtc0_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)0}, diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c new file mode 100644 index 000000000000..275fedbdbd9e --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_dpi.c @@ -0,0 +1,503 @@ +/* + * Copyright (C) 2016 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * DOC: VC4 DPI module + * + * The VC4 DPI hardware supports MIPI DPI type 4 and Nokia ViSSI + * signals, which are routed out to GPIO0-27 with the ALT2 function. + */ + +#include "drm_atomic_helper.h" +#include "drm_crtc_helper.h" +#include "drm_edid.h" +#include "drm_panel.h" +#include "linux/clk.h" +#include "linux/component.h" +#include "linux/of_graph.h" +#include "linux/of_platform.h" +#include "vc4_drv.h" +#include "vc4_regs.h" + +#define DPI_C 0x00 +# define DPI_OUTPUT_ENABLE_MODE BIT(16) + +/* The order field takes the incoming 24 bit RGB from the pixel valve + * and shuffles the 3 channels. + */ +# define DPI_ORDER_MASK VC4_MASK(15, 14) +# define DPI_ORDER_SHIFT 14 +# define DPI_ORDER_RGB 0 +# define DPI_ORDER_BGR 1 +# define DPI_ORDER_GRB 2 +# define DPI_ORDER_BRG 3 + +/* The format field takes the ORDER-shuffled pixel valve data and + * formats it onto the output lines. + */ +# define DPI_FORMAT_MASK VC4_MASK(13, 11) +# define DPI_FORMAT_SHIFT 11 +/* This define is named in the hardware, but actually just outputs 0. */ +# define DPI_FORMAT_9BIT_666_RGB 0 +/* Outputs 00000000rrrrrggggggbbbbb */ +# define DPI_FORMAT_16BIT_565_RGB_1 1 +/* Outputs 000rrrrr00gggggg000bbbbb */ +# define DPI_FORMAT_16BIT_565_RGB_2 2 +/* Outputs 00rrrrr000gggggg00bbbbb0 */ +# define DPI_FORMAT_16BIT_565_RGB_3 3 +/* Outputs 000000rrrrrrggggggbbbbbb */ +# define DPI_FORMAT_18BIT_666_RGB_1 4 +/* Outputs 00rrrrrr00gggggg00bbbbbb */ +# define DPI_FORMAT_18BIT_666_RGB_2 5 +/* Outputs rrrrrrrrggggggggbbbbbbbb */ +# define DPI_FORMAT_24BIT_888_RGB 6 + +/* Reverses the polarity of the corresponding signal */ +# define DPI_PIXEL_CLK_INVERT BIT(10) +# define DPI_HSYNC_INVERT BIT(9) +# define DPI_VSYNC_INVERT BIT(8) +# define DPI_OUTPUT_ENABLE_INVERT BIT(7) + +/* Outputs the signal the falling clock edge instead of rising. */ +# define DPI_HSYNC_NEGATE BIT(6) +# define DPI_VSYNC_NEGATE BIT(5) +# define DPI_OUTPUT_ENABLE_NEGATE BIT(4) + +/* Disables the signal */ +# define DPI_HSYNC_DISABLE BIT(3) +# define DPI_VSYNC_DISABLE BIT(2) +# define DPI_OUTPUT_ENABLE_DISABLE BIT(1) + +/* Power gate to the device, full reset at 0 -> 1 transition */ +# define DPI_ENABLE BIT(0) + +/* All other registers besides DPI_C return the ID */ +#define DPI_ID 0x04 +# define DPI_ID_VALUE 0x00647069 + +/* General DPI hardware state. */ +struct vc4_dpi { + struct platform_device *pdev; + + struct drm_encoder *encoder; + struct drm_connector *connector; + struct drm_panel *panel; + + void __iomem *regs; + + struct clk *pixel_clock; + struct clk *core_clock; +}; + +#define DPI_READ(offset) readl(dpi->regs + (offset)) +#define DPI_WRITE(offset, val) writel(val, dpi->regs + (offset)) + +/* VC4 DPI encoder KMS struct */ +struct vc4_dpi_encoder { + struct vc4_encoder base; + struct vc4_dpi *dpi; +}; + +static inline struct vc4_dpi_encoder * +to_vc4_dpi_encoder(struct drm_encoder *encoder) +{ + return container_of(encoder, struct vc4_dpi_encoder, base.base); +} + +/* VC4 DPI connector KMS struct */ +struct vc4_dpi_connector { + struct drm_connector base; + struct vc4_dpi *dpi; + + /* Since the connector is attached to just the one encoder, + * this is the reference to it so we can do the best_encoder() + * hook. + */ + struct drm_encoder *encoder; +}; + +static inline struct vc4_dpi_connector * +to_vc4_dpi_connector(struct drm_connector *connector) +{ + return container_of(connector, struct vc4_dpi_connector, base); +} + +#define DPI_REG(reg) { reg, #reg } +static const struct { + u32 reg; + const char *name; +} dpi_regs[] = { + DPI_REG(DPI_C), + DPI_REG(DPI_ID), +}; + +static void vc4_dpi_dump_regs(struct vc4_dpi *dpi) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dpi_regs); i++) { + DRM_INFO("0x%04x (%s): 0x%08x\n", + dpi_regs[i].reg, dpi_regs[i].name, + DPI_READ(dpi_regs[i].reg)); + } +} + +#ifdef CONFIG_DEBUG_FS +int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_dpi *dpi = vc4->dpi; + int i; + + if (!dpi) + return 0; + + for (i = 0; i < ARRAY_SIZE(dpi_regs); i++) { + seq_printf(m, "%s (0x%04x): 0x%08x\n", + dpi_regs[i].name, dpi_regs[i].reg, + DPI_READ(dpi_regs[i].reg)); + } + + return 0; +} +#endif + +static enum drm_connector_status +vc4_dpi_connector_detect(struct drm_connector *connector, bool force) +{ + struct vc4_dpi_connector *vc4_connector = + to_vc4_dpi_connector(connector); + struct vc4_dpi *dpi = vc4_connector->dpi; + + if (dpi->panel) + return connector_status_connected; + else + return connector_status_disconnected; +} + +static void vc4_dpi_connector_destroy(struct drm_connector *connector) +{ + drm_connector_unregister(connector); + drm_connector_cleanup(connector); +} + +static int vc4_dpi_connector_get_modes(struct drm_connector *connector) +{ + struct vc4_dpi_connector *vc4_connector = + to_vc4_dpi_connector(connector); + struct vc4_dpi *dpi = vc4_connector->dpi; + + if (dpi->panel) + return drm_panel_get_modes(dpi->panel); + + return 0; +} + +static const struct drm_connector_funcs vc4_dpi_connector_funcs = { + .dpms = drm_atomic_helper_connector_dpms, + .detect = vc4_dpi_connector_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = vc4_dpi_connector_destroy, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static const struct drm_connector_helper_funcs vc4_dpi_connector_helper_funcs = { + .get_modes = vc4_dpi_connector_get_modes, +}; + +static struct drm_connector *vc4_dpi_connector_init(struct drm_device *dev, + struct vc4_dpi *dpi) +{ + struct drm_connector *connector = NULL; + struct vc4_dpi_connector *dpi_connector; + + dpi_connector = devm_kzalloc(dev->dev, sizeof(*dpi_connector), + GFP_KERNEL); + if (!dpi_connector) + return ERR_PTR(-ENOMEM); + + connector = &dpi_connector->base; + + dpi_connector->encoder = dpi->encoder; + dpi_connector->dpi = dpi; + + drm_connector_init(dev, connector, &vc4_dpi_connector_funcs, + DRM_MODE_CONNECTOR_DPI); + drm_connector_helper_add(connector, &vc4_dpi_connector_helper_funcs); + + connector->polled = 0; + connector->interlace_allowed = 0; + connector->doublescan_allowed = 0; + + drm_mode_connector_attach_encoder(connector, dpi->encoder); + + return connector; +} + +static const struct drm_encoder_funcs vc4_dpi_encoder_funcs = { + .destroy = drm_encoder_cleanup, +}; + +static void vc4_dpi_encoder_disable(struct drm_encoder *encoder) +{ + struct vc4_dpi_encoder *vc4_encoder = to_vc4_dpi_encoder(encoder); + struct vc4_dpi *dpi = vc4_encoder->dpi; + + drm_panel_disable(dpi->panel); + + clk_disable_unprepare(dpi->pixel_clock); + + drm_panel_unprepare(dpi->panel); +} + +static void vc4_dpi_encoder_enable(struct drm_encoder *encoder) +{ + struct drm_display_mode *mode = &encoder->crtc->mode; + struct vc4_dpi_encoder *vc4_encoder = to_vc4_dpi_encoder(encoder); + struct vc4_dpi *dpi = vc4_encoder->dpi; + u32 dpi_c = DPI_ENABLE | DPI_OUTPUT_ENABLE_MODE; + int ret; + + ret = drm_panel_prepare(dpi->panel); + if (ret) { + DRM_ERROR("Panel failed to prepare\n"); + return; + } + + if (dpi->connector->display_info.num_bus_formats) { + u32 bus_format = dpi->connector->display_info.bus_formats[0]; + + switch (bus_format) { + case MEDIA_BUS_FMT_RGB888_1X24: + dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB, + DPI_FORMAT); + break; + case MEDIA_BUS_FMT_BGR888_1X24: + dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB, + DPI_FORMAT); + dpi_c |= VC4_SET_FIELD(DPI_ORDER_BGR, DPI_ORDER); + break; + case MEDIA_BUS_FMT_RGB666_1X24_CPADHI: + dpi_c |= VC4_SET_FIELD(DPI_FORMAT_18BIT_666_RGB_2, + DPI_FORMAT); + break; + case MEDIA_BUS_FMT_RGB666_1X18: + dpi_c |= VC4_SET_FIELD(DPI_FORMAT_18BIT_666_RGB_1, + DPI_FORMAT); + break; + case MEDIA_BUS_FMT_RGB565_1X16: + dpi_c |= VC4_SET_FIELD(DPI_FORMAT_16BIT_565_RGB_3, + DPI_FORMAT); + break; + default: + DRM_ERROR("Unknown media bus format %d\n", bus_format); + break; + } + } + + if (mode->flags & DRM_MODE_FLAG_NHSYNC) + dpi_c |= DPI_HSYNC_INVERT; + else if (!(mode->flags & DRM_MODE_FLAG_PHSYNC)) + dpi_c |= DPI_HSYNC_DISABLE; + + if (mode->flags & DRM_MODE_FLAG_NVSYNC) + dpi_c |= DPI_VSYNC_INVERT; + else if (!(mode->flags & DRM_MODE_FLAG_PVSYNC)) + dpi_c |= DPI_VSYNC_DISABLE; + + DPI_WRITE(DPI_C, dpi_c); + + ret = clk_set_rate(dpi->pixel_clock, mode->clock * 1000); + if (ret) + DRM_ERROR("Failed to set clock rate: %d\n", ret); + + ret = clk_prepare_enable(dpi->pixel_clock); + if (ret) + DRM_ERROR("Failed to set clock rate: %d\n", ret); + + ret = drm_panel_enable(dpi->panel); + if (ret) { + DRM_ERROR("Panel failed to enable\n"); + drm_panel_unprepare(dpi->panel); + return; + } +} + +static const struct drm_encoder_helper_funcs vc4_dpi_encoder_helper_funcs = { + .disable = vc4_dpi_encoder_disable, + .enable = vc4_dpi_encoder_enable, +}; + +static const struct of_device_id vc4_dpi_dt_match[] = { + { .compatible = "brcm,bcm2835-dpi", .data = NULL }, + {} +}; + +/* Walks the OF graph to find the panel node and then asks DRM to look + * up the panel. + */ +static struct drm_panel *vc4_dpi_get_panel(struct device *dev) +{ + struct device_node *endpoint, *panel_node; + struct device_node *np = dev->of_node; + struct drm_panel *panel; + + endpoint = of_graph_get_next_endpoint(np, NULL); + if (!endpoint) { + dev_err(dev, "no endpoint to fetch DPI panel\n"); + return NULL; + } + + /* don't proceed if we have an endpoint but no panel_node tied to it */ + panel_node = of_graph_get_remote_port_parent(endpoint); + of_node_put(endpoint); + if (!panel_node) { + dev_err(dev, "no valid panel node\n"); + return NULL; + } + + panel = of_drm_find_panel(panel_node); + of_node_put(panel_node); + + return panel; +} + +static int vc4_dpi_bind(struct device *dev, struct device *master, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct drm_device *drm = dev_get_drvdata(master); + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_dpi *dpi; + struct vc4_dpi_encoder *vc4_dpi_encoder; + int ret; + + dpi = devm_kzalloc(dev, sizeof(*dpi), GFP_KERNEL); + if (!dpi) + return -ENOMEM; + + vc4_dpi_encoder = devm_kzalloc(dev, sizeof(*vc4_dpi_encoder), + GFP_KERNEL); + if (!vc4_dpi_encoder) + return -ENOMEM; + vc4_dpi_encoder->base.type = VC4_ENCODER_TYPE_DPI; + vc4_dpi_encoder->dpi = dpi; + dpi->encoder = &vc4_dpi_encoder->base.base; + + dpi->pdev = pdev; + dpi->regs = vc4_ioremap_regs(pdev, 0); + if (IS_ERR(dpi->regs)) + return PTR_ERR(dpi->regs); + + vc4_dpi_dump_regs(dpi); + + if (DPI_READ(DPI_ID) != DPI_ID_VALUE) { + dev_err(dev, "Port returned 0x%08x for ID instead of 0x%08x\n", + DPI_READ(DPI_ID), DPI_ID_VALUE); + return -ENODEV; + } + + dpi->core_clock = devm_clk_get(dev, "core"); + if (IS_ERR(dpi->core_clock)) { + ret = PTR_ERR(dpi->core_clock); + if (ret != -EPROBE_DEFER) + DRM_ERROR("Failed to get core clock: %d\n", ret); + return ret; + } + dpi->pixel_clock = devm_clk_get(dev, "pixel"); + if (IS_ERR(dpi->pixel_clock)) { + ret = PTR_ERR(dpi->pixel_clock); + if (ret != -EPROBE_DEFER) + DRM_ERROR("Failed to get pixel clock: %d\n", ret); + return ret; + } + + ret = clk_prepare_enable(dpi->core_clock); + if (ret) + DRM_ERROR("Failed to turn on core clock: %d\n", ret); + + dpi->panel = vc4_dpi_get_panel(dev); + + drm_encoder_init(drm, dpi->encoder, &vc4_dpi_encoder_funcs, + DRM_MODE_ENCODER_DPI, NULL); + drm_encoder_helper_add(dpi->encoder, &vc4_dpi_encoder_helper_funcs); + + dpi->connector = vc4_dpi_connector_init(drm, dpi); + if (IS_ERR(dpi->connector)) { + ret = PTR_ERR(dpi->connector); + goto err_destroy_encoder; + } + + if (dpi->panel) + drm_panel_attach(dpi->panel, dpi->connector); + + dev_set_drvdata(dev, dpi); + + vc4->dpi = dpi; + + return 0; + +err_destroy_encoder: + drm_encoder_cleanup(dpi->encoder); + clk_disable_unprepare(dpi->core_clock); + return ret; +} + +static void vc4_dpi_unbind(struct device *dev, struct device *master, + void *data) +{ + struct drm_device *drm = dev_get_drvdata(master); + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_dpi *dpi = dev_get_drvdata(dev); + + if (dpi->panel) + drm_panel_detach(dpi->panel); + + vc4_dpi_connector_destroy(dpi->connector); + drm_encoder_cleanup(dpi->encoder); + + clk_disable_unprepare(dpi->core_clock); + + vc4->dpi = NULL; +} + +static const struct component_ops vc4_dpi_ops = { + .bind = vc4_dpi_bind, + .unbind = vc4_dpi_unbind, +}; + +static int vc4_dpi_dev_probe(struct platform_device *pdev) +{ + return component_add(&pdev->dev, &vc4_dpi_ops); +} + +static int vc4_dpi_dev_remove(struct platform_device *pdev) +{ + component_del(&pdev->dev, &vc4_dpi_ops); + return 0; +} + +struct platform_driver vc4_dpi_driver = { + .probe = vc4_dpi_dev_probe, + .remove = vc4_dpi_dev_remove, + .driver = { + .name = "vc4_dpi", + .of_match_table = vc4_dpi_dt_match, + }, +}; diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index b7d2ff0e6e1f..9ecef9385491 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/of_platform.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include "drm_fb_cma_helper.h" #include "uapi/drm/vc4_drm.h" @@ -43,12 +44,54 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index) return map; } +static int vc4_get_param_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_vc4_get_param *args = data; + int ret; + + if (args->pad != 0) + return -EINVAL; + + switch (args->param) { + case DRM_VC4_PARAM_V3D_IDENT0: + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + if (ret < 0) + return ret; + args->value = V3D_READ(V3D_IDENT0); + pm_runtime_put(&vc4->v3d->pdev->dev); + break; + case DRM_VC4_PARAM_V3D_IDENT1: + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + if (ret < 0) + return ret; + args->value = V3D_READ(V3D_IDENT1); + pm_runtime_put(&vc4->v3d->pdev->dev); + break; + case DRM_VC4_PARAM_V3D_IDENT2: + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + if (ret < 0) + return ret; + args->value = V3D_READ(V3D_IDENT2); + pm_runtime_put(&vc4->v3d->pdev->dev); + break; + case DRM_VC4_PARAM_SUPPORTS_BRANCHES: + args->value = true; + break; + default: + DRM_DEBUG("Unknown parameter %d\n", args->param); + return -EINVAL; + } + + return 0; +} + static void vc4_lastclose(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - if (vc4->fbdev) - drm_fbdev_cma_restore_mode(vc4->fbdev); + drm_fbdev_cma_restore_mode(vc4->fbdev); } static const struct file_operations vc4_drm_fops = { @@ -66,14 +109,15 @@ static const struct file_operations vc4_drm_fops = { }; static const struct drm_ioctl_desc vc4_drm_ioctls[] = { - DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver vc4_drm_driver = { @@ -81,6 +125,7 @@ static struct drm_driver vc4_drm_driver = { DRIVER_ATOMIC | DRIVER_GEM | DRIVER_HAVE_IRQ | + DRIVER_RENDER | DRIVER_PRIME), .lastclose = vc4_lastclose, .irq_handler = vc4_irq, @@ -90,7 +135,9 @@ static struct drm_driver vc4_drm_driver = { .enable_vblank = vc4_enable_vblank, .disable_vblank = vc4_disable_vblank, - .get_vblank_counter = drm_vblank_count, + .get_vblank_counter = drm_vblank_no_hw_counter, + .get_scanout_position = vc4_crtc_get_scanoutpos, + .get_vblank_timestamp = vc4_crtc_get_vblank_timestamp, #if defined(CONFIG_DEBUG_FS) .debugfs_init = vc4_debugfs_init, @@ -98,7 +145,7 @@ static struct drm_driver vc4_drm_driver = { #endif .gem_create_object = vc4_create_object, - .gem_free_object = vc4_free_object, + .gem_free_object_unlocked = vc4_free_object, .gem_vm_ops = &drm_gem_cma_vm_ops, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, @@ -153,11 +200,28 @@ static void vc4_match_add_drivers(struct device *dev, } } +static void vc4_kick_out_firmware_fb(void) +{ + struct apertures_struct *ap; + + ap = alloc_apertures(1); + if (!ap) + return; + + /* Since VC4 is a UMA device, the simplefb node may have been + * located anywhere in memory. + */ + ap->ranges[0].base = 0; + ap->ranges[0].size = ~0; + + remove_conflicting_framebuffers(ap, "vc4drmfb", false); + kfree(ap); +} + static int vc4_drm_bind(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct drm_device *drm; - struct drm_connector *connector; struct vc4_dev *vc4; int ret = 0; @@ -177,8 +241,6 @@ static int vc4_drm_bind(struct device *dev) vc4_bo_cache_init(drm); drm_mode_config_init(drm); - if (ret) - goto unref; vc4_gem_init(drm); @@ -186,31 +248,20 @@ static int vc4_drm_bind(struct device *dev) if (ret) goto gem_destroy; + vc4_kick_out_firmware_fb(); + ret = drm_dev_register(drm, 0); if (ret < 0) goto unbind_all; - /* Connector registration has to occur after DRM device - * registration, because it creates sysfs entries based on the - * DRM device. - */ - list_for_each_entry(connector, &drm->mode_config.connector_list, head) { - ret = drm_connector_register(connector); - if (ret) - goto unregister; - } - vc4_kms_load(drm); return 0; -unregister: - drm_dev_unregister(drm); unbind_all: component_unbind_all(dev, drm); gem_destroy: vc4_gem_destroy(drm); -unref: drm_dev_unref(drm); vc4_bo_cache_destroy(drm); return ret; @@ -237,8 +288,9 @@ static const struct component_master_ops vc4_drm_ops = { static struct platform_driver *const component_drivers[] = { &vc4_hdmi_driver, - &vc4_crtc_driver, + &vc4_dpi_driver, &vc4_hvs_driver, + &vc4_crtc_driver, &vc4_v3d_driver, }; diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index fa2ad15d4f62..428e24919ef1 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -16,6 +16,7 @@ struct vc4_dev { struct vc4_hvs *hvs; struct vc4_crtc *crtc[3]; struct vc4_v3d *v3d; + struct vc4_dpi *dpi; struct drm_fbdev_cma *fbdev; @@ -320,6 +321,15 @@ vc4_first_render_job(struct vc4_dev *vc4) struct vc4_exec_info, head); } +static inline struct vc4_exec_info * +vc4_last_render_job(struct vc4_dev *vc4) +{ + if (list_empty(&vc4->render_job_list)) + return NULL; + return list_last_entry(&vc4->render_job_list, + struct vc4_exec_info, head); +} + /** * struct vc4_texture_sample_info - saves the offsets into the UBO for texture * setup parameters. @@ -354,6 +364,9 @@ struct vc4_validated_shader_info { uint32_t uniforms_src_size; uint32_t num_texture_samples; struct vc4_texture_sample_info *texture_samples; + + uint32_t num_uniform_addr_offsets; + uint32_t *uniform_addr_offsets; }; /** @@ -414,6 +427,13 @@ extern struct platform_driver vc4_crtc_driver; int vc4_enable_vblank(struct drm_device *dev, unsigned int crtc_id); void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id); int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg); +int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, + unsigned int flags, int *vpos, int *hpos, + ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode); +int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id, + int *max_error, struct timeval *vblank_time, + unsigned flags); /* vc4_debugfs.c */ int vc4_debugfs_init(struct drm_minor *minor); @@ -422,6 +442,10 @@ void vc4_debugfs_cleanup(struct drm_minor *minor); /* vc4_drv.c */ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); +/* vc4_dpi.c */ +extern struct platform_driver vc4_dpi_driver; +int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused); + /* vc4_gem.c */ void vc4_gem_init(struct drm_device *dev); void vc4_gem_destroy(struct drm_device *dev); @@ -464,7 +488,7 @@ int vc4_kms_load(struct drm_device *dev); struct drm_plane *vc4_plane_init(struct drm_device *dev, enum drm_plane_type type); u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist); -u32 vc4_plane_dlist_size(struct drm_plane_state *state); +u32 vc4_plane_dlist_size(const struct drm_plane_state *state); void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 8d4384f8b78d..b262c5c26f10 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -53,10 +53,8 @@ vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) { unsigned int i; - mutex_lock(&dev->struct_mutex); for (i = 0; i < state->user_state.bo_count; i++) - drm_gem_object_unreference(state->bo[i]); - mutex_unlock(&dev->struct_mutex); + drm_gem_object_unreference_unlocked(state->bo[i]); kfree(state); } @@ -536,8 +534,8 @@ vc4_cl_lookup_bos(struct drm_device *dev, return -EINVAL; } - exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *), - GFP_KERNEL); + exec->bo = drm_calloc_large(exec->bo_count, + sizeof(struct drm_gem_cma_object *)); if (!exec->bo) { DRM_ERROR("Failed to allocate validated BO pointers\n"); return -ENOMEM; @@ -574,8 +572,8 @@ vc4_cl_lookup_bos(struct drm_device *dev, spin_unlock(&file_priv->table_lock); fail: - kfree(handles); - return 0; + drm_free_large(handles); + return ret; } static int @@ -610,7 +608,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) * read the contents back for validation, and I think the * bo->vaddr is uncached access. */ - temp = kmalloc(temp_size, GFP_KERNEL); + temp = drm_malloc_ab(temp_size, 1); if (!temp) { DRM_ERROR("Failed to allocate storage for copying " "in bin/render CLs.\n"); @@ -677,7 +675,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) ret = vc4_validate_shader_recs(dev, exec); fail: - kfree(temp); + drm_free_large(temp); return ret; } @@ -687,21 +685,18 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) struct vc4_dev *vc4 = to_vc4_dev(dev); unsigned i; - /* Need the struct lock for drm_gem_object_unreference(). */ - mutex_lock(&dev->struct_mutex); if (exec->bo) { for (i = 0; i < exec->bo_count; i++) - drm_gem_object_unreference(&exec->bo[i]->base); - kfree(exec->bo); + drm_gem_object_unreference_unlocked(&exec->bo[i]->base); + drm_free_large(exec->bo); } while (!list_empty(&exec->unref_list)) { struct vc4_bo *bo = list_first_entry(&exec->unref_list, struct vc4_bo, unref_head); list_del(&bo->unref_head); - drm_gem_object_unreference(&bo->base.base); + drm_gem_object_unreference_unlocked(&bo->base.base); } - mutex_unlock(&dev->struct_mutex); mutex_lock(&vc4->power_lock); if (--vc4->power_refcount == 0) @@ -822,7 +817,7 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void *data, if (args->pad != 0) return -EINVAL; - gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle); + gem_obj = drm_gem_object_lookup(file_priv, args->handle); if (!gem_obj) { DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); return -EINVAL; @@ -947,8 +942,8 @@ vc4_gem_destroy(struct drm_device *dev) vc4->overflow_mem = NULL; } - vc4_bo_cache_destroy(dev); - if (vc4->hang_state) vc4_free_hang_state(dev, vc4->hang_state); + + vc4_bo_cache_destroy(dev); } diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index d8b864925fd3..4452f3631cac 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -208,14 +208,6 @@ static int vc4_hdmi_connector_get_modes(struct drm_connector *connector) return ret; } -static struct drm_encoder * -vc4_hdmi_connector_best_encoder(struct drm_connector *connector) -{ - struct vc4_hdmi_connector *hdmi_connector = - to_vc4_hdmi_connector(connector); - return hdmi_connector->encoder; -} - static const struct drm_connector_funcs vc4_hdmi_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, .detect = vc4_hdmi_connector_detect, @@ -228,7 +220,6 @@ static const struct drm_connector_funcs vc4_hdmi_connector_funcs = { static const struct drm_connector_helper_funcs vc4_hdmi_connector_helper_funcs = { .get_modes = vc4_hdmi_connector_get_modes, - .best_encoder = vc4_hdmi_connector_best_encoder, }; static struct drm_connector *vc4_hdmi_connector_init(struct drm_device *dev, @@ -465,12 +456,6 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) if (IS_ERR(hdmi->hd_regs)) return PTR_ERR(hdmi->hd_regs); - ddc_node = of_parse_phandle(dev->of_node, "ddc", 0); - if (!ddc_node) { - DRM_ERROR("Failed to find ddc node in device tree\n"); - return -ENODEV; - } - hdmi->pixel_clock = devm_clk_get(dev, "pixel"); if (IS_ERR(hdmi->pixel_clock)) { DRM_ERROR("Failed to get pixel clock\n"); @@ -482,7 +467,14 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) return PTR_ERR(hdmi->hsm_clock); } + ddc_node = of_parse_phandle(dev->of_node, "ddc", 0); + if (!ddc_node) { + DRM_ERROR("Failed to find ddc node in device tree\n"); + return -ENODEV; + } + hdmi->ddc = of_find_i2c_adapter_by_node(ddc_node); + of_node_put(ddc_node); if (!hdmi->ddc) { DRM_DEBUG("Failed to get ddc i2c adapter by node\n"); return -EPROBE_DEFER; @@ -573,7 +565,7 @@ err_unprepare_hsm: err_unprepare_pix: clk_disable_unprepare(hdmi->pixel_clock); err_put_i2c: - put_device(&vc4->hdmi->ddc->dev); + put_device(&hdmi->ddc->dev); return ret; } diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index b0104a346a74..094bc6a475c1 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -83,8 +83,10 @@ vc4_overflow_mem_work(struct work_struct *work) spin_lock_irqsave(&vc4->job_lock, irqflags); current_exec = vc4_first_bin_job(vc4); + if (!current_exec) + current_exec = vc4_last_render_job(vc4); if (current_exec) { - vc4->overflow_mem->seqno = vc4->finished_seqno + 1; + vc4->overflow_mem->seqno = current_exec->seqno; list_add_tail(&vc4->overflow_mem->unref_head, ¤t_exec->unref_list); vc4->overflow_mem = NULL; diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 4718ae5176cc..4ac894d993cd 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -26,8 +26,7 @@ static void vc4_output_poll_changed(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - if (vc4->fbdev) - drm_fbdev_cma_hotplug_event(vc4->fbdev); + drm_fbdev_cma_hotplug_event(vc4->fbdev); } struct vc4_commit { @@ -93,7 +92,7 @@ static struct vc4_commit *commit_init(struct drm_atomic_state *state) * vc4_atomic_commit - commit validated state object * @dev: DRM device * @state: the driver state object - * @async: asynchronous commit + * @nonblock: nonblocking commit * * This function commits a with drm_atomic_helper_check() pre-validated state * object. This can still fail when e.g. the framebuffer reservation fails. For @@ -104,23 +103,33 @@ static struct vc4_commit *commit_init(struct drm_atomic_state *state) */ static int vc4_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, - bool async) + bool nonblock) { struct vc4_dev *vc4 = to_vc4_dev(dev); int ret; int i; uint64_t wait_seqno = 0; struct vc4_commit *c; + struct drm_plane *plane; + struct drm_plane_state *new_state; c = commit_init(state); if (!c) return -ENOMEM; /* Make sure that any outstanding modesets have finished. */ - ret = down_interruptible(&vc4->async_modeset); - if (ret) { - kfree(c); - return ret; + if (nonblock) { + ret = down_trylock(&vc4->async_modeset); + if (ret) { + kfree(c); + return -EBUSY; + } + } else { + ret = down_interruptible(&vc4->async_modeset); + if (ret) { + kfree(c); + return ret; + } } ret = drm_atomic_helper_prepare_planes(dev, state); @@ -130,13 +139,7 @@ static int vc4_atomic_commit(struct drm_device *dev, return ret; } - for (i = 0; i < dev->mode_config.num_total_plane; i++) { - struct drm_plane *plane = state->planes[i]; - struct drm_plane_state *new_state = state->plane_states[i]; - - if (!plane) - continue; - + for_each_plane_in_state(state, plane, new_state, i) { if ((plane->state->fb != new_state->fb) && new_state->fb) { struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(new_state->fb, 0); @@ -152,7 +155,7 @@ static int vc4_atomic_commit(struct drm_device *dev, * the software side now. */ - drm_atomic_helper_swap_state(dev, state); + drm_atomic_helper_swap_state(state, true); /* * Everything below can be run asynchronously without the need to grab @@ -170,7 +173,7 @@ static int vc4_atomic_commit(struct drm_device *dev, * current layout. */ - if (async) { + if (nonblock) { vc4_queue_seqno_cb(dev, &c->cb, wait_seqno, vc4_atomic_complete_commit_seqno_cb); } else { @@ -207,8 +210,6 @@ int vc4_kms_load(struct drm_device *dev) dev->mode_config.preferred_depth = 24; dev->mode_config.async_page_flip = true; - dev->vblank_disable_allowed = true; - drm_mode_config_reset(dev); vc4->fbdev = drm_fbdev_cma_init(dev, 32, diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 7b0c72ae02a0..29e4b400e25e 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -94,6 +94,14 @@ static const struct hvs_format { .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true, }, { + .drm = DRM_FORMAT_ABGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, + .pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = true, + }, + { + .drm = DRM_FORMAT_XBGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, + .pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = false, + }, + { .drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565, .pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false, }, @@ -208,7 +216,7 @@ static void vc4_plane_destroy_state(struct drm_plane *plane, } kfree(vc4_state->dlist); - __drm_atomic_helper_plane_destroy_state(plane, &vc4_state->base); + __drm_atomic_helper_plane_destroy_state(&vc4_state->base); kfree(state); } @@ -690,9 +698,10 @@ u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) return vc4_state->dlist_count; } -u32 vc4_plane_dlist_size(struct drm_plane_state *state) +u32 vc4_plane_dlist_size(const struct drm_plane_state *state) { - struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + const struct vc4_plane_state *vc4_state = + container_of(state, typeof(*vc4_state), base); return vc4_state->dlist_count; } diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h index d5c2f3c85ebb..f4e795a0d3f6 100644 --- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h +++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h @@ -70,7 +70,7 @@ enum qpu_raddr { QPU_R_ELEM_QPU = 38, QPU_R_NOP, QPU_R_XY_PIXEL_COORD = 41, - QPU_R_MS_REV_FLAGS = 41, + QPU_R_MS_REV_FLAGS = 42, QPU_R_VPM = 48, QPU_R_VPM_LD_BUSY, QPU_R_VPM_LD_WAIT, @@ -230,6 +230,15 @@ enum qpu_unpack_r4 { #define QPU_COND_MUL_SHIFT 46 #define QPU_COND_MUL_MASK QPU_MASK(48, 46) +#define QPU_BRANCH_COND_SHIFT 52 +#define QPU_BRANCH_COND_MASK QPU_MASK(55, 52) + +#define QPU_BRANCH_REL ((uint64_t)1 << 51) +#define QPU_BRANCH_REG ((uint64_t)1 << 50) + +#define QPU_BRANCH_RADDR_A_SHIFT 45 +#define QPU_BRANCH_RADDR_A_MASK QPU_MASK(49, 45) + #define QPU_SF ((uint64_t)1 << 45) #define QPU_WADDR_ADD_SHIFT 38 @@ -261,4 +270,10 @@ enum qpu_unpack_r4 { #define QPU_OP_ADD_SHIFT 24 #define QPU_OP_ADD_MASK QPU_MASK(28, 24) +#define QPU_LOAD_IMM_SHIFT 0 +#define QPU_LOAD_IMM_MASK QPU_MASK(31, 0) + +#define QPU_BRANCH_TARGET_SHIFT 0 +#define QPU_BRANCH_TARGET_MASK QPU_MASK(31, 0) + #endif /* VC4_QPU_DEFINES_H */ diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index bf42a8e87111..160942a9180e 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -341,6 +341,10 @@ #define SCALER_DISPLACT0 0x00000030 #define SCALER_DISPLACT1 0x00000034 #define SCALER_DISPLACT2 0x00000038 +#define SCALER_DISPLACTX(x) (SCALER_DISPLACT0 + \ + (x) * (SCALER_DISPLACT1 - \ + SCALER_DISPLACT0)) + #define SCALER_DISPCTRL0 0x00000040 # define SCALER_DISPCTRLX_ENABLE BIT(31) # define SCALER_DISPCTRLX_RESET BIT(30) @@ -362,7 +366,6 @@ # define SCALER_DISPBKGND_FILL BIT(24) #define SCALER_DISPSTAT0 0x00000048 -#define SCALER_DISPBASE0 0x0000004c # define SCALER_DISPSTATX_MODE_MASK VC4_MASK(31, 30) # define SCALER_DISPSTATX_MODE_SHIFT 30 # define SCALER_DISPSTATX_MODE_DISABLED 0 @@ -371,6 +374,24 @@ # define SCALER_DISPSTATX_MODE_EOF 3 # define SCALER_DISPSTATX_FULL BIT(29) # define SCALER_DISPSTATX_EMPTY BIT(28) +# define SCALER_DISPSTATX_FRAME_COUNT_MASK VC4_MASK(17, 12) +# define SCALER_DISPSTATX_FRAME_COUNT_SHIFT 12 +# define SCALER_DISPSTATX_LINE_MASK VC4_MASK(11, 0) +# define SCALER_DISPSTATX_LINE_SHIFT 0 + +#define SCALER_DISPBASE0 0x0000004c +/* Last pixel in the COB (display FIFO memory) allocated to this HVS + * channel. Must be 4-pixel aligned (and thus 4 pixels less than the + * next COB base). + */ +# define SCALER_DISPBASEX_TOP_MASK VC4_MASK(31, 16) +# define SCALER_DISPBASEX_TOP_SHIFT 16 +/* First pixel in the COB (display FIFO memory) allocated to this HVS + * channel. Must be 4-pixel aligned. + */ +# define SCALER_DISPBASEX_BASE_MASK VC4_MASK(15, 0) +# define SCALER_DISPBASEX_BASE_SHIFT 0 + #define SCALER_DISPCTRL1 0x00000050 #define SCALER_DISPBKGND1 0x00000054 #define SCALER_DISPBKGNDX(x) (SCALER_DISPBKGND0 + \ @@ -381,6 +402,9 @@ (x) * (SCALER_DISPSTAT1 - \ SCALER_DISPSTAT0)) #define SCALER_DISPBASE1 0x0000005c +#define SCALER_DISPBASEX(x) (SCALER_DISPBASE0 + \ + (x) * (SCALER_DISPBASE1 - \ + SCALER_DISPBASE0)) #define SCALER_DISPCTRL2 0x00000060 #define SCALER_DISPCTRLX(x) (SCALER_DISPCTRL0 + \ (x) * (SCALER_DISPCTRL1 - \ @@ -390,6 +414,12 @@ #define SCALER_DISPBASE2 0x0000006c #define SCALER_DISPALPHA2 0x00000070 #define SCALER_GAMADDR 0x00000078 +# define SCALER_GAMADDR_AUTOINC BIT(31) +/* Enables all gamma ramp SRAMs, not just those of CRTCs with gamma + * enabled. + */ +# define SCALER_GAMADDR_SRAMENB BIT(30) + #define SCALER_GAMDATA 0x000000e0 #define SCALER_DLIST_START 0x00002000 #define SCALER_DLIST_SIZE 0x00004000 diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index 24c2c746e8f3..9ce1d0adf882 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -802,7 +802,7 @@ validate_gl_shader_rec(struct drm_device *dev, uint32_t src_offset = *(uint32_t *)(pkt_u + o); uint32_t *texture_handles_u; void *uniform_data_u; - uint32_t tex; + uint32_t tex, uni; *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset; @@ -840,6 +840,17 @@ validate_gl_shader_rec(struct drm_device *dev, } } + /* Fill in the uniform slots that need this shader's + * start-of-uniforms address (used for resetting the uniform + * stream in the presence of control flow). + */ + for (uni = 0; + uni < validated_shader->num_uniform_addr_offsets; + uni++) { + uint32_t o = validated_shader->uniform_addr_offsets[uni]; + ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p; + } + *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p; exec->uniforms_u += validated_shader->uniforms_src_size; diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index f67124b4c534..2543cf5b8b51 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -39,7 +39,17 @@ #include "vc4_drv.h" #include "vc4_qpu_defines.h" +#define LIVE_REG_COUNT (32 + 32 + 4) + struct vc4_shader_validation_state { + /* Current IP being validated. */ + uint32_t ip; + + /* IP at the end of the BO, do not read shader[max_ip] */ + uint32_t max_ip; + + uint64_t *shader; + struct vc4_texture_sample_info tmu_setup[2]; int tmu_write_count[2]; @@ -49,8 +59,30 @@ struct vc4_shader_validation_state { * * This is used for the validation of direct address memory reads. */ - uint32_t live_min_clamp_offsets[32 + 32 + 4]; - bool live_max_clamp_regs[32 + 32 + 4]; + uint32_t live_min_clamp_offsets[LIVE_REG_COUNT]; + bool live_max_clamp_regs[LIVE_REG_COUNT]; + uint32_t live_immediates[LIVE_REG_COUNT]; + + /* Bitfield of which IPs are used as branch targets. + * + * Used for validation that the uniform stream is updated at the right + * points and clearing the texturing/clamping state. + */ + unsigned long *branch_targets; + + /* Set when entering a basic block, and cleared when the uniform + * address update is found. This is used to make sure that we don't + * read uniforms when the address is undefined. + */ + bool needs_uniform_address_update; + + /* Set when we find a backwards branch. If the branch is backwards, + * the taraget is probably doing an address reset to read uniforms, + * and so we need to be sure that a uniforms address is present in the + * stream, even if the shader didn't need to read uniforms in later + * basic blocks. + */ + bool needs_uniform_address_for_loop; }; static uint32_t @@ -129,11 +161,11 @@ record_texture_sample(struct vc4_validated_shader_info *validated_shader, } static bool -check_tmu_write(uint64_t inst, - struct vc4_validated_shader_info *validated_shader, +check_tmu_write(struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state, bool is_mul) { + uint64_t inst = validation_state->shader[validation_state->ip]; uint32_t waddr = (is_mul ? QPU_GET_FIELD(inst, QPU_WADDR_MUL) : QPU_GET_FIELD(inst, QPU_WADDR_ADD)); @@ -162,7 +194,7 @@ check_tmu_write(uint64_t inst, return false; } - /* We assert that the the clamped address is the first + /* We assert that the clamped address is the first * argument, and the UBO base address is the second argument. * This is arbitrary, but simpler than supporting flipping the * two either way. @@ -212,8 +244,14 @@ check_tmu_write(uint64_t inst, /* Since direct uses a RADDR uniform reference, it will get counted in * check_instruction_reads() */ - if (!is_direct) + if (!is_direct) { + if (validation_state->needs_uniform_address_update) { + DRM_ERROR("Texturing with undefined uniform address\n"); + return false; + } + validated_shader->uniforms_size += 4; + } if (submit) { if (!record_texture_sample(validated_shader, @@ -227,23 +265,144 @@ check_tmu_write(uint64_t inst, return true; } +static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader) +{ + uint32_t o = validated_shader->num_uniform_addr_offsets; + uint32_t num_uniforms = validated_shader->uniforms_size / 4; + + validated_shader->uniform_addr_offsets = + krealloc(validated_shader->uniform_addr_offsets, + (o + 1) * + sizeof(*validated_shader->uniform_addr_offsets), + GFP_KERNEL); + if (!validated_shader->uniform_addr_offsets) + return false; + + validated_shader->uniform_addr_offsets[o] = num_uniforms; + validated_shader->num_uniform_addr_offsets++; + + return true; +} + static bool -check_reg_write(uint64_t inst, - struct vc4_validated_shader_info *validated_shader, +validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader, + struct vc4_shader_validation_state *validation_state, + bool is_mul) +{ + uint64_t inst = validation_state->shader[validation_state->ip]; + u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B); + u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); + u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); + u32 add_lri = raddr_add_a_to_live_reg_index(inst); + /* We want our reset to be pointing at whatever uniform follows the + * uniforms base address. + */ + u32 expected_offset = validated_shader->uniforms_size + 4; + + /* We only support absolute uniform address changes, and we + * require that they be in the current basic block before any + * of its uniform reads. + * + * One could potentially emit more efficient QPU code, by + * noticing that (say) an if statement does uniform control + * flow for all threads and that the if reads the same number + * of uniforms on each side. However, this scheme is easy to + * validate so it's all we allow for now. + */ + switch (QPU_GET_FIELD(inst, QPU_SIG)) { + case QPU_SIG_NONE: + case QPU_SIG_SCOREBOARD_UNLOCK: + case QPU_SIG_COLOR_LOAD: + case QPU_SIG_LOAD_TMU0: + case QPU_SIG_LOAD_TMU1: + break; + default: + DRM_ERROR("uniforms address change must be " + "normal math\n"); + return false; + } + + if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { + DRM_ERROR("Uniform address reset must be an ADD.\n"); + return false; + } + + if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) { + DRM_ERROR("Uniform address reset must be unconditional.\n"); + return false; + } + + if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP && + !(inst & QPU_PM)) { + DRM_ERROR("No packing allowed on uniforms reset\n"); + return false; + } + + if (add_lri == -1) { + DRM_ERROR("First argument of uniform address write must be " + "an immediate value.\n"); + return false; + } + + if (validation_state->live_immediates[add_lri] != expected_offset) { + DRM_ERROR("Resetting uniforms with offset %db instead of %db\n", + validation_state->live_immediates[add_lri], + expected_offset); + return false; + } + + if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && + !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { + DRM_ERROR("Second argument of uniform address write must be " + "a uniform.\n"); + return false; + } + + validation_state->needs_uniform_address_update = false; + validation_state->needs_uniform_address_for_loop = false; + return require_uniform_address_uniform(validated_shader); +} + +static bool +check_reg_write(struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state, bool is_mul) { + uint64_t inst = validation_state->shader[validation_state->ip]; uint32_t waddr = (is_mul ? QPU_GET_FIELD(inst, QPU_WADDR_MUL) : QPU_GET_FIELD(inst, QPU_WADDR_ADD)); + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); + bool ws = inst & QPU_WS; + bool is_b = is_mul ^ ws; + u32 lri = waddr_to_live_reg_index(waddr, is_b); + + if (lri != -1) { + uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); + uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL); + + if (sig == QPU_SIG_LOAD_IMM && + QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP && + ((is_mul && cond_mul == QPU_COND_ALWAYS) || + (!is_mul && cond_add == QPU_COND_ALWAYS))) { + validation_state->live_immediates[lri] = + QPU_GET_FIELD(inst, QPU_LOAD_IMM); + } else { + validation_state->live_immediates[lri] = ~0; + } + } switch (waddr) { case QPU_W_UNIFORMS_ADDRESS: - /* XXX: We'll probably need to support this for reladdr, but - * it's definitely a security-related one. - */ - DRM_ERROR("uniforms address load unsupported\n"); - return false; + if (is_b) { + DRM_ERROR("relative uniforms address change " + "unsupported\n"); + return false; + } + + return validate_uniform_address_write(validated_shader, + validation_state, + is_mul); case QPU_W_TLB_COLOR_MS: case QPU_W_TLB_COLOR_ALL: @@ -261,7 +420,7 @@ check_reg_write(uint64_t inst, case QPU_W_TMU1_T: case QPU_W_TMU1_R: case QPU_W_TMU1_B: - return check_tmu_write(inst, validated_shader, validation_state, + return check_tmu_write(validated_shader, validation_state, is_mul); case QPU_W_HOST_INT: @@ -294,10 +453,10 @@ check_reg_write(uint64_t inst, } static void -track_live_clamps(uint64_t inst, - struct vc4_validated_shader_info *validated_shader, +track_live_clamps(struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state) { + uint64_t inst = validation_state->shader[validation_state->ip]; uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); @@ -369,10 +528,10 @@ track_live_clamps(uint64_t inst, } static bool -check_instruction_writes(uint64_t inst, - struct vc4_validated_shader_info *validated_shader, +check_instruction_writes(struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state) { + uint64_t inst = validation_state->shader[validation_state->ip]; uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); bool ok; @@ -382,20 +541,44 @@ check_instruction_writes(uint64_t inst, return false; } - ok = (check_reg_write(inst, validated_shader, validation_state, - false) && - check_reg_write(inst, validated_shader, validation_state, - true)); + ok = (check_reg_write(validated_shader, validation_state, false) && + check_reg_write(validated_shader, validation_state, true)); - track_live_clamps(inst, validated_shader, validation_state); + track_live_clamps(validated_shader, validation_state); return ok; } static bool -check_instruction_reads(uint64_t inst, - struct vc4_validated_shader_info *validated_shader) +check_branch(uint64_t inst, + struct vc4_validated_shader_info *validated_shader, + struct vc4_shader_validation_state *validation_state, + int ip) +{ + int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); + uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); + uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); + + if ((int)branch_imm < 0) + validation_state->needs_uniform_address_for_loop = true; + + /* We don't want to have to worry about validation of this, and + * there's no need for it. + */ + if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) { + DRM_ERROR("branch instruction at %d wrote a register.\n", + validation_state->ip); + return false; + } + + return true; +} + +static bool +check_instruction_reads(struct vc4_validated_shader_info *validated_shader, + struct vc4_shader_validation_state *validation_state) { + uint64_t inst = validation_state->shader[validation_state->ip]; uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); @@ -407,40 +590,204 @@ check_instruction_reads(uint64_t inst, * already be OOM. */ validated_shader->uniforms_size += 4; + + if (validation_state->needs_uniform_address_update) { + DRM_ERROR("Uniform read with undefined uniform " + "address\n"); + return false; + } } return true; } +/* Make sure that all branches are absolute and point within the shader, and + * note their targets for later. + */ +static bool +vc4_validate_branches(struct vc4_shader_validation_state *validation_state) +{ + uint32_t max_branch_target = 0; + bool found_shader_end = false; + int ip; + int shader_end_ip = 0; + int last_branch = -2; + + for (ip = 0; ip < validation_state->max_ip; ip++) { + uint64_t inst = validation_state->shader[ip]; + int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); + uint32_t after_delay_ip = ip + 4; + uint32_t branch_target_ip; + + if (sig == QPU_SIG_PROG_END) { + shader_end_ip = ip; + found_shader_end = true; + continue; + } + + if (sig != QPU_SIG_BRANCH) + continue; + + if (ip - last_branch < 4) { + DRM_ERROR("Branch at %d during delay slots\n", ip); + return false; + } + last_branch = ip; + + if (inst & QPU_BRANCH_REG) { + DRM_ERROR("branching from register relative " + "not supported\n"); + return false; + } + + if (!(inst & QPU_BRANCH_REL)) { + DRM_ERROR("relative branching required\n"); + return false; + } + + /* The actual branch target is the instruction after the delay + * slots, plus whatever byte offset is in the low 32 bits of + * the instruction. Make sure we're not branching beyond the + * end of the shader object. + */ + if (branch_imm % sizeof(inst) != 0) { + DRM_ERROR("branch target not aligned\n"); + return false; + } + + branch_target_ip = after_delay_ip + (branch_imm >> 3); + if (branch_target_ip >= validation_state->max_ip) { + DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n", + ip, branch_target_ip, + validation_state->max_ip); + return false; + } + set_bit(branch_target_ip, validation_state->branch_targets); + + /* Make sure that the non-branching path is also not outside + * the shader. + */ + if (after_delay_ip >= validation_state->max_ip) { + DRM_ERROR("Branch at %d continues past shader end " + "(%d/%d)\n", + ip, after_delay_ip, validation_state->max_ip); + return false; + } + set_bit(after_delay_ip, validation_state->branch_targets); + max_branch_target = max(max_branch_target, after_delay_ip); + + /* There are two delay slots after program end is signaled + * that are still executed, then we're finished. + */ + if (found_shader_end && ip == shader_end_ip + 2) + break; + } + + if (max_branch_target > shader_end_ip) { + DRM_ERROR("Branch landed after QPU_SIG_PROG_END"); + return false; + } + + return true; +} + +/* Resets any known state for the shader, used when we may be branched to from + * multiple locations in the program (or at shader start). + */ +static void +reset_validation_state(struct vc4_shader_validation_state *validation_state) +{ + int i; + + for (i = 0; i < 8; i++) + validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0; + + for (i = 0; i < LIVE_REG_COUNT; i++) { + validation_state->live_min_clamp_offsets[i] = ~0; + validation_state->live_max_clamp_regs[i] = false; + validation_state->live_immediates[i] = ~0; + } +} + +static bool +texturing_in_progress(struct vc4_shader_validation_state *validation_state) +{ + return (validation_state->tmu_write_count[0] != 0 || + validation_state->tmu_write_count[1] != 0); +} + +static bool +vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state) +{ + uint32_t ip = validation_state->ip; + + if (!test_bit(ip, validation_state->branch_targets)) + return true; + + if (texturing_in_progress(validation_state)) { + DRM_ERROR("Branch target landed during TMU setup\n"); + return false; + } + + /* Reset our live values tracking, since this instruction may have + * multiple predecessors. + * + * One could potentially do analysis to determine that, for + * example, all predecessors have a live max clamp in the same + * register, but we don't bother with that. + */ + reset_validation_state(validation_state); + + /* Since we've entered a basic block from potentially multiple + * predecessors, we need the uniforms address to be updated before any + * unforms are read. We require that after any branch point, the next + * uniform to be loaded is a uniform address offset. That uniform's + * offset will be marked by the uniform address register write + * validation, or a one-off the end-of-program check. + */ + validation_state->needs_uniform_address_update = true; + + return true; +} + struct vc4_validated_shader_info * vc4_validate_shader(struct drm_gem_cma_object *shader_obj) { bool found_shader_end = false; int shader_end_ip = 0; - uint32_t ip, max_ip; - uint64_t *shader; - struct vc4_validated_shader_info *validated_shader; + uint32_t ip; + struct vc4_validated_shader_info *validated_shader = NULL; struct vc4_shader_validation_state validation_state; - int i; memset(&validation_state, 0, sizeof(validation_state)); + validation_state.shader = shader_obj->vaddr; + validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); - for (i = 0; i < 8; i++) - validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; - for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) - validation_state.live_min_clamp_offsets[i] = ~0; + reset_validation_state(&validation_state); - shader = shader_obj->vaddr; - max_ip = shader_obj->base.size / sizeof(uint64_t); + validation_state.branch_targets = + kcalloc(BITS_TO_LONGS(validation_state.max_ip), + sizeof(unsigned long), GFP_KERNEL); + if (!validation_state.branch_targets) + goto fail; validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); if (!validated_shader) - return NULL; + goto fail; + + if (!vc4_validate_branches(&validation_state)) + goto fail; - for (ip = 0; ip < max_ip; ip++) { - uint64_t inst = shader[ip]; + for (ip = 0; ip < validation_state.max_ip; ip++) { + uint64_t inst = validation_state.shader[ip]; uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); + validation_state.ip = ip; + + if (!vc4_handle_branch_target(&validation_state)) + goto fail; + switch (sig) { case QPU_SIG_NONE: case QPU_SIG_WAIT_FOR_SCOREBOARD: @@ -450,13 +797,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) case QPU_SIG_LOAD_TMU1: case QPU_SIG_PROG_END: case QPU_SIG_SMALL_IMM: - if (!check_instruction_writes(inst, validated_shader, + if (!check_instruction_writes(validated_shader, &validation_state)) { DRM_ERROR("Bad write at ip %d\n", ip); goto fail; } - if (!check_instruction_reads(inst, validated_shader)) + if (!check_instruction_reads(validated_shader, + &validation_state)) goto fail; if (sig == QPU_SIG_PROG_END) { @@ -467,13 +815,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) break; case QPU_SIG_LOAD_IMM: - if (!check_instruction_writes(inst, validated_shader, + if (!check_instruction_writes(validated_shader, &validation_state)) { DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip); goto fail; } break; + case QPU_SIG_BRANCH: + if (!check_branch(inst, validated_shader, + &validation_state, ip)) + goto fail; + break; default: DRM_ERROR("Unsupported QPU signal %d at " "instruction %d\n", sig, ip); @@ -487,13 +840,28 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) break; } - if (ip == max_ip) { + if (ip == validation_state.max_ip) { DRM_ERROR("shader failed to terminate before " "shader BO end at %zd\n", shader_obj->base.size); goto fail; } + /* If we did a backwards branch and we haven't emitted a uniforms + * reset since then, we still need the uniforms stream to have the + * uniforms address available so that the backwards branch can do its + * uniforms reset. + * + * We could potentially prove that the backwards branch doesn't + * contain any uses of uniforms until program exit, but that doesn't + * seem to be worth the trouble. + */ + if (validation_state.needs_uniform_address_for_loop) { + if (!require_uniform_address_uniform(validated_shader)) + goto fail; + validated_shader->uniforms_size += 4; + } + /* Again, no chance of integer overflow here because the worst case * scenario is 8 bytes of uniforms plus handles per 8-byte * instruction. @@ -502,9 +870,12 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) (validated_shader->uniforms_size + 4 * validated_shader->num_texture_samples); + kfree(validation_state.branch_targets); + return validated_shader; fail: + kfree(validation_state.branch_targets); if (validated_shader) { kfree(validated_shader->texture_samples); kfree(validated_shader); |