19 files changed, 1481 insertions, 445 deletions
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig
index 4361bdcfd28a..fdae18aeab4f 100644
--- a/drivers/gpu/drm/vc4/Kconfig
+++ b/drivers/gpu/drm/vc4/Kconfig
@@ -19,3 +19,11 @@ config DRM_VC4
 	  This driver requires that "avoid_warnings=2" be present in
 	  the config.txt for the firmware, to keep it from smashing
 	  our display setup.
+
+config DRM_VC4_HDMI_CEC
+       bool "Broadcom VC4 HDMI CEC Support"
+       depends on DRM_VC4
+       select CEC_CORE
+       help
+	  Choose this option if you have a Broadcom VC4 GPU
+	  and want to use CEC.
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index 25bd5d30415d..f5500df51686 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 # Please keep these build lists sorted!
 
 # core driver code
@@ -24,5 +25,3 @@ vc4-y := \
 vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
 
 obj-$(CONFIG_DRM_VC4)  += vc4.o
-
-CFLAGS_vc4_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 487f96412d35..4ae45d7dac42 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -24,21 +24,46 @@
 #include "vc4_drv.h"
 #include "uapi/drm/vc4_drm.h"
 
+static const char * const bo_type_names[] = {
+	"kernel",
+	"V3D",
+	"V3D shader",
+	"dumb",
+	"binner",
+	"RCL",
+	"BCL",
+	"kernel BO cache",
+};
+
+static bool is_user_label(int label)
+{
+	return label >= VC4_BO_TYPE_COUNT;
+}
+
 static void vc4_bo_stats_dump(struct vc4_dev *vc4)
 {
-	DRM_INFO("num bos allocated: %d\n",
-		 vc4->bo_stats.num_allocated);
-	DRM_INFO("size bos allocated: %dkb\n",
-		 vc4->bo_stats.size_allocated / 1024);
-	DRM_INFO("num bos used: %d\n",
-		 vc4->bo_stats.num_allocated - vc4->bo_stats.num_cached);
-	DRM_INFO("size bos used: %dkb\n",
-		 (vc4->bo_stats.size_allocated -
-		  vc4->bo_stats.size_cached) / 1024);
-	DRM_INFO("num bos cached: %d\n",
-		 vc4->bo_stats.num_cached);
-	DRM_INFO("size bos cached: %dkb\n",
-		 vc4->bo_stats.size_cached / 1024);
+	int i;
+
+	for (i = 0; i < vc4->num_labels; i++) {
+		if (!vc4->bo_labels[i].num_allocated)
+			continue;
+
+		DRM_INFO("%30s: %6dkb BOs (%d)\n",
+			 vc4->bo_labels[i].name,
+			 vc4->bo_labels[i].size_allocated / 1024,
+			 vc4->bo_labels[i].num_allocated);
+	}
+
+	mutex_lock(&vc4->purgeable.lock);
+	if (vc4->purgeable.num)
+		DRM_INFO("%30s: %6zdkb BOs (%d)\n", "userspace BO cache",
+			 vc4->purgeable.size / 1024, vc4->purgeable.num);
+
+	if (vc4->purgeable.purged_num)
+		DRM_INFO("%30s: %6zdkb BOs (%d)\n", "total purged BO",
+			 vc4->purgeable.purged_size / 1024,
+			 vc4->purgeable.purged_num);
+	mutex_unlock(&vc4->purgeable.lock);
 }
 
 #ifdef CONFIG_DEBUG_FS
@@ -47,64 +72,144 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	struct vc4_bo_stats stats;
+	int i;
 
-	/* Take a snapshot of the current stats with the lock held. */
 	mutex_lock(&vc4->bo_lock);
-	stats = vc4->bo_stats;
+	for (i = 0; i < vc4->num_labels; i++) {
+		if (!vc4->bo_labels[i].num_allocated)
+			continue;
+
+		seq_printf(m, "%30s: %6dkb BOs (%d)\n",
+			   vc4->bo_labels[i].name,
+			   vc4->bo_labels[i].size_allocated / 1024,
+			   vc4->bo_labels[i].num_allocated);
+	}
 	mutex_unlock(&vc4->bo_lock);
 
-	seq_printf(m, "num bos allocated: %d\n",
-		   stats.num_allocated);
-	seq_printf(m, "size bos allocated: %dkb\n",
-		   stats.size_allocated / 1024);
-	seq_printf(m, "num bos used: %d\n",
-		   stats.num_allocated - stats.num_cached);
-	seq_printf(m, "size bos used: %dkb\n",
-		   (stats.size_allocated - stats.size_cached) / 1024);
-	seq_printf(m, "num bos cached: %d\n",
-		   stats.num_cached);
-	seq_printf(m, "size bos cached: %dkb\n",
-		   stats.size_cached / 1024);
+	mutex_lock(&vc4->purgeable.lock);
+	if (vc4->purgeable.num)
+		seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "userspace BO cache",
+			   vc4->purgeable.size / 1024, vc4->purgeable.num);
+
+	if (vc4->purgeable.purged_num)
+		seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "total purged BO",
+			   vc4->purgeable.purged_size / 1024,
+			   vc4->purgeable.purged_num);
+	mutex_unlock(&vc4->purgeable.lock);
 
 	return 0;
 }
 #endif
 
+/* Takes ownership of *name and returns the appropriate slot for it in
+ * the bo_labels[] array, extending it as necessary.
+ *
+ * This is inefficient and could use a hash table instead of walking
+ * an array and strcmp()ing.  However, the assumption is that user
+ * labeling will be infrequent (scanout buffers and other long-lived
+ * objects, or debug driver builds), so we can live with it for now.
+ */
+static int vc4_get_user_label(struct vc4_dev *vc4, const char *name)
+{
+	int i;
+	int free_slot = -1;
+
+	for (i = 0; i < vc4->num_labels; i++) {
+		if (!vc4->bo_labels[i].name) {
+			free_slot = i;
+		} else if (strcmp(vc4->bo_labels[i].name, name) == 0) {
+			kfree(name);
+			return i;
+		}
+	}
+
+	if (free_slot != -1) {
+		WARN_ON(vc4->bo_labels[free_slot].num_allocated != 0);
+		vc4->bo_labels[free_slot].name = name;
+		return free_slot;
+	} else {
+		u32 new_label_count = vc4->num_labels + 1;
+		struct vc4_label *new_labels =
+			krealloc(vc4->bo_labels,
+				 new_label_count * sizeof(*new_labels),
+				 GFP_KERNEL);
+
+		if (!new_labels) {
+			kfree(name);
+			return -1;
+		}
+
+		free_slot = vc4->num_labels;
+		vc4->bo_labels = new_labels;
+		vc4->num_labels = new_label_count;
+
+		vc4->bo_labels[free_slot].name = name;
+		vc4->bo_labels[free_slot].num_allocated = 0;
+		vc4->bo_labels[free_slot].size_allocated = 0;
+
+		return free_slot;
+	}
+}
+
+static void vc4_bo_set_label(struct drm_gem_object *gem_obj, int label)
+{
+	struct vc4_bo *bo = to_vc4_bo(gem_obj);
+	struct vc4_dev *vc4 = to_vc4_dev(gem_obj->dev);
+
+	lockdep_assert_held(&vc4->bo_lock);
+
+	if (label != -1) {
+		vc4->bo_labels[label].num_allocated++;
+		vc4->bo_labels[label].size_allocated += gem_obj->size;
+	}
+
+	vc4->bo_labels[bo->label].num_allocated--;
+	vc4->bo_labels[bo->label].size_allocated -= gem_obj->size;
+
+	if (vc4->bo_labels[bo->label].num_allocated == 0 &&
+	    is_user_label(bo->label)) {
+		/* Free user BO label slots on last unreference.
+		 * Slots are just where we track the stats for a given
+		 * name, and once a name is unused we can reuse that
+		 * slot.
+		 */
+		kfree(vc4->bo_labels[bo->label].name);
+		vc4->bo_labels[bo->label].name = NULL;
+	}
+
+	bo->label = label;
+}
+
 static uint32_t bo_page_index(size_t size)
 {
 	return (size / PAGE_SIZE) - 1;
 }
 
-/* Must be called with bo_lock held. */
 static void vc4_bo_destroy(struct vc4_bo *bo)
 {
 	struct drm_gem_object *obj = &bo->base.base;
 	struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
 
+	lockdep_assert_held(&vc4->bo_lock);
+
+	vc4_bo_set_label(obj, -1);
+
 	if (bo->validated_shader) {
 		kfree(bo->validated_shader->texture_samples);
 		kfree(bo->validated_shader);
 		bo->validated_shader = NULL;
 	}
 
-	vc4->bo_stats.num_allocated--;
-	vc4->bo_stats.size_allocated -= obj->size;
-
 	reservation_object_fini(&bo->_resv);
 
 	drm_gem_cma_free_object(obj);
 }
 
-/* Must be called with bo_lock held. */
 static void vc4_bo_remove_from_cache(struct vc4_bo *bo)
 {
-	struct drm_gem_object *obj = &bo->base.base;
-	struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
-
-	vc4->bo_stats.num_cached--;
-	vc4->bo_stats.size_cached -= obj->size;
+	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
 
+	lockdep_assert_held(&vc4->bo_lock);
 	list_del(&bo->unref_head);
 	list_del(&bo->size_head);
 }
@@ -164,8 +269,112 @@ static void vc4_bo_cache_purge(struct drm_device *dev)
 	mutex_unlock(&vc4->bo_lock);
 }
 
+void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+	mutex_lock(&vc4->purgeable.lock);
+	list_add_tail(&bo->size_head, &vc4->purgeable.list);
+	vc4->purgeable.num++;
+	vc4->purgeable.size += bo->base.base.size;
+	mutex_unlock(&vc4->purgeable.lock);
+}
+
+static void vc4_bo_remove_from_purgeable_pool_locked(struct vc4_bo *bo)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+	/* list_del_init() is used here because the caller might release
+	 * the purgeable lock in order to acquire the madv one and update the
+	 * madv status.
+	 * During this short period of time a user might decide to mark
+	 * the BO as unpurgeable, and if bo->madv is set to
+	 * VC4_MADV_DONTNEED it will try to remove the BO from the
+	 * purgeable list which will fail if the ->next/prev fields
+	 * are set to LIST_POISON1/LIST_POISON2 (which is what
+	 * list_del() does).
+	 * Re-initializing the list element guarantees that list_del()
+	 * will work correctly even if it's a NOP.
+	 */
+	list_del_init(&bo->size_head);
+	vc4->purgeable.num--;
+	vc4->purgeable.size -= bo->base.base.size;
+}
+
+void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+	mutex_lock(&vc4->purgeable.lock);
+	vc4_bo_remove_from_purgeable_pool_locked(bo);
+	mutex_unlock(&vc4->purgeable.lock);
+}
+
+static void vc4_bo_purge(struct drm_gem_object *obj)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+	struct drm_device *dev = obj->dev;
+
+	WARN_ON(!mutex_is_locked(&bo->madv_lock));
+	WARN_ON(bo->madv != VC4_MADV_DONTNEED);
+
+	drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping);
+
+	dma_free_wc(dev->dev, obj->size, bo->base.vaddr, bo->base.paddr);
+	bo->base.vaddr = NULL;
+	bo->madv = __VC4_MADV_PURGED;
+}
+
+static void vc4_bo_userspace_cache_purge(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	mutex_lock(&vc4->purgeable.lock);
+	while (!list_empty(&vc4->purgeable.list)) {
+		struct vc4_bo *bo = list_first_entry(&vc4->purgeable.list,
+						     struct vc4_bo, size_head);
+		struct drm_gem_object *obj = &bo->base.base;
+		size_t purged_size = 0;
+
+		vc4_bo_remove_from_purgeable_pool_locked(bo);
+
+		/* Release the purgeable lock while we're purging the BO so
+		 * that other people can continue inserting things in the
+		 * purgeable pool without having to wait for all BOs to be
+		 * purged.
+		 */
+		mutex_unlock(&vc4->purgeable.lock);
+		mutex_lock(&bo->madv_lock);
+
+		/* Since we released the purgeable pool lock before acquiring
+		 * the BO madv one, the user may have marked the BO as WILLNEED
+		 * and re-used it in the meantime.
+		 * Before purging the BO we need to make sure
+		 * - it is still marked as DONTNEED
+		 * - it has not been re-inserted in the purgeable list
+		 * - it is not used by HW blocks
+		 * If one of these conditions is not met, just skip the entry.
+		 */
+		if (bo->madv == VC4_MADV_DONTNEED &&
+		    list_empty(&bo->size_head) &&
+		    !refcount_read(&bo->usecnt)) {
+			purged_size = bo->base.base.size;
+			vc4_bo_purge(obj);
+		}
+		mutex_unlock(&bo->madv_lock);
+		mutex_lock(&vc4->purgeable.lock);
+
+		if (purged_size) {
+			vc4->purgeable.purged_size += purged_size;
+			vc4->purgeable.purged_num++;
+		}
+	}
+	mutex_unlock(&vc4->purgeable.lock);
+}
+
 static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
-					    uint32_t size)
+					    uint32_t size,
+					    enum vc4_kernel_bo_type type)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	uint32_t page_index = bo_page_index(size);
@@ -186,6 +395,8 @@ static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
 	kref_init(&bo->base.base.refcount);
 
 out:
+	if (bo)
+		vc4_bo_set_label(&bo->base.base, type);
 	mutex_unlock(&vc4->bo_lock);
 	return bo;
 }
@@ -207,9 +418,13 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
 	if (!bo)
 		return ERR_PTR(-ENOMEM);
 
+	bo->madv = VC4_MADV_WILLNEED;
+	refcount_set(&bo->usecnt, 0);
+	mutex_init(&bo->madv_lock);
 	mutex_lock(&vc4->bo_lock);
-	vc4->bo_stats.num_allocated++;
-	vc4->bo_stats.size_allocated += size;
+	bo->label = VC4_BO_TYPE_KERNEL;
+	vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++;
+	vc4->bo_labels[VC4_BO_TYPE_KERNEL].size_allocated += size;
 	mutex_unlock(&vc4->bo_lock);
 	bo->resv = &bo->_resv;
 	reservation_object_init(bo->resv);
@@ -218,7 +433,7 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
 }
 
 struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
-			     bool allow_unzeroed)
+			     bool allow_unzeroed, enum vc4_kernel_bo_type type)
 {
 	size_t size = roundup(unaligned_size, PAGE_SIZE);
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
@@ -229,7 +444,7 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
 		return ERR_PTR(-EINVAL);
 
 	/* First, try to get a vc4_bo from the kernel BO cache. */
-	bo = vc4_bo_get_from_cache(dev, size);
+	bo = vc4_bo_get_from_cache(dev, size, type);
 	if (bo) {
 		if (!allow_unzeroed)
 			memset(bo->base.vaddr, 0, bo->base.base.size);
@@ -243,15 +458,43 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
 		 * CMA allocations we've got laying around and try again.
 		 */
 		vc4_bo_cache_purge(dev);
+		cma_obj = drm_gem_cma_create(dev, size);
+	}
 
+	if (IS_ERR(cma_obj)) {
+		/*
+		 * Still not enough CMA memory, purge the userspace BO
+		 * cache and retry.
+		 * This is sub-optimal since we purge the whole userspace
+		 * BO cache which forces user that want to re-use the BO to
+		 * restore its initial content.
+		 * Ideally, we should purge entries one by one and retry
+		 * after each to see if CMA allocation succeeds. Or even
+		 * better, try to find an entry with at least the same
+		 * size.
+		 */
+		vc4_bo_userspace_cache_purge(dev);
 		cma_obj = drm_gem_cma_create(dev, size);
-		if (IS_ERR(cma_obj)) {
-			DRM_ERROR("Failed to allocate from CMA:\n");
-			vc4_bo_stats_dump(vc4);
-			return ERR_PTR(-ENOMEM);
-		}
 	}
-	return to_vc4_bo(&cma_obj->base);
+
+	if (IS_ERR(cma_obj)) {
+		DRM_ERROR("Failed to allocate from CMA:\n");
+		vc4_bo_stats_dump(vc4);
+		return ERR_PTR(-ENOMEM);
+	}
+	bo = to_vc4_bo(&cma_obj->base);
+
+	/* By default, BOs do not support the MADV ioctl. This will be enabled
+	 * only on BOs that are exposed to userspace (V3D, V3D_SHADER and DUMB
+	 * BOs).
+	 */
+	bo->madv = __VC4_MADV_NOTSUPP;
+
+	mutex_lock(&vc4->bo_lock);
+	vc4_bo_set_label(&cma_obj->base, type);
+	mutex_unlock(&vc4->bo_lock);
+
+	return bo;
 }
 
 int vc4_dumb_create(struct drm_file *file_priv,
@@ -268,22 +511,25 @@ int vc4_dumb_create(struct drm_file *file_priv,
 	if (args->size < args->pitch * args->height)
 		args->size = args->pitch * args->height;
 
-	bo = vc4_bo_create(dev, args->size, false);
+	bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_DUMB);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
+	bo->madv = VC4_MADV_WILLNEED;
+
 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
-	drm_gem_object_unreference_unlocked(&bo->base.base);
+	drm_gem_object_put_unlocked(&bo->base.base);
 
 	return ret;
 }
 
-/* Must be called with bo_lock held. */
 static void vc4_bo_cache_free_old(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	unsigned long expire_time = jiffies - msecs_to_jiffies(1000);
 
+	lockdep_assert_held(&vc4->bo_lock);
+
 	while (!list_empty(&vc4->bo_cache.time_list)) {
 		struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list,
 						    struct vc4_bo, unref_head);
@@ -309,6 +555,12 @@ void vc4_free_object(struct drm_gem_object *gem_bo)
 	struct vc4_bo *bo = to_vc4_bo(gem_bo);
 	struct list_head *cache_list;
 
+	/* Remove the BO from the purgeable list. */
+	mutex_lock(&bo->madv_lock);
+	if (bo->madv == VC4_MADV_DONTNEED && !refcount_read(&bo->usecnt))
+		vc4_bo_remove_from_purgeable_pool(bo);
+	mutex_unlock(&bo->madv_lock);
+
 	mutex_lock(&vc4->bo_lock);
 	/* If the object references someone else's memory, we can't cache it.
 	 */
@@ -324,7 +576,8 @@ void vc4_free_object(struct drm_gem_object *gem_bo)
 	}
 
 	/* If this object was partially constructed but CMA allocation
-	 * had failed, just free it.
+	 * had failed, just free it. Can also happen when the BO has been
+	 * purged.
 	 */
 	if (!bo->base.vaddr) {
 		vc4_bo_destroy(bo);
@@ -343,13 +596,16 @@ void vc4_free_object(struct drm_gem_object *gem_bo)
 		bo->validated_shader = NULL;
 	}
 
+	/* Reset madv and usecnt before adding the BO to the cache. */
+	bo->madv = __VC4_MADV_NOTSUPP;
+	refcount_set(&bo->usecnt, 0);
+
 	bo->t_format = false;
 	bo->free_time = jiffies;
 	list_add(&bo->size_head, cache_list);
 	list_add(&bo->unref_head, &vc4->bo_cache.time_list);
 
-	vc4->bo_stats.num_cached++;
-	vc4->bo_stats.size_cached += gem_bo->size;
+	vc4_bo_set_label(&bo->base.base, VC4_BO_TYPE_KERNEL_CACHE);
 
 	vc4_bo_cache_free_old(dev);
 
@@ -368,10 +624,59 @@ static void vc4_bo_cache_time_work(struct work_struct *work)
 	mutex_unlock(&vc4->bo_lock);
 }
 
-static void vc4_bo_cache_time_timer(unsigned long data)
+int vc4_bo_inc_usecnt(struct vc4_bo *bo)
 {
-	struct drm_device *dev = (struct drm_device *)data;
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int ret;
+
+	/* Fast path: if the BO is already retained by someone, no need to
+	 * check the madv status.
+	 */
+	if (refcount_inc_not_zero(&bo->usecnt))
+		return 0;
+
+	mutex_lock(&bo->madv_lock);
+	switch (bo->madv) {
+	case VC4_MADV_WILLNEED:
+		refcount_inc(&bo->usecnt);
+		ret = 0;
+		break;
+	case VC4_MADV_DONTNEED:
+		/* We shouldn't use a BO marked as purgeable if at least
+		 * someone else retained its content by incrementing usecnt.
+		 * Luckily the BO hasn't been purged yet, but something wrong
+		 * is happening here. Just throw an error instead of
+		 * authorizing this use case.
+		 */
+	case __VC4_MADV_PURGED:
+		/* We can't use a purged BO. */
+	default:
+		/* Invalid madv value. */
+		ret = -EINVAL;
+		break;
+	}
+	mutex_unlock(&bo->madv_lock);
+
+	return ret;
+}
+
+void vc4_bo_dec_usecnt(struct vc4_bo *bo)
+{
+	/* Fast path: if the BO is still retained by someone, no need to test
+	 * the madv value.
+	 */
+	if (refcount_dec_not_one(&bo->usecnt))
+		return;
+
+	mutex_lock(&bo->madv_lock);
+	if (refcount_dec_and_test(&bo->usecnt) &&
+	    bo->madv == VC4_MADV_DONTNEED)
+		vc4_bo_add_to_purgeable_pool(bo);
+	mutex_unlock(&bo->madv_lock);
+}
+
+static void vc4_bo_cache_time_timer(struct timer_list *t)
+{
+	struct vc4_dev *vc4 = from_timer(vc4, t, bo_cache.time_timer);
 
 	schedule_work(&vc4->bo_cache.time_work);
 }
@@ -387,18 +692,52 @@ struct dma_buf *
 vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
 {
 	struct vc4_bo *bo = to_vc4_bo(obj);
+	struct dma_buf *dmabuf;
+	int ret;
 
 	if (bo->validated_shader) {
-		DRM_ERROR("Attempting to export shader BO\n");
+		DRM_DEBUG("Attempting to export shader BO\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	return drm_gem_prime_export(dev, obj, flags);
+	/* Note: as soon as the BO is exported it becomes unpurgeable, because
+	 * noone ever decrements the usecnt even if the reference held by the
+	 * exported BO is released. This shouldn't be a problem since we don't
+	 * expect exported BOs to be marked as purgeable.
+	 */
+	ret = vc4_bo_inc_usecnt(bo);
+	if (ret) {
+		DRM_ERROR("Failed to increment BO usecnt\n");
+		return ERR_PTR(ret);
+	}
+
+	dmabuf = drm_gem_prime_export(dev, obj, flags);
+	if (IS_ERR(dmabuf))
+		vc4_bo_dec_usecnt(bo);
+
+	return dmabuf;
+}
+
+int vc4_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	/* The only reason we would end up here is when user-space accesses
+	 * BO's memory after it's been purged.
+	 */
+	mutex_lock(&bo->madv_lock);
+	WARN_ON(bo->madv != __VC4_MADV_PURGED);
+	mutex_unlock(&bo->madv_lock);
+
+	return VM_FAULT_SIGBUS;
 }
 
 int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct drm_gem_object *gem_obj;
+	unsigned long vm_pgoff;
 	struct vc4_bo *bo;
 	int ret;
 
@@ -410,7 +749,14 @@ int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
 	bo = to_vc4_bo(gem_obj);
 
 	if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
-		DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
+		DRM_DEBUG("mmaping of shader BOs for writing not allowed.\n");
+		return -EINVAL;
+	}
+
+	if (bo->madv != VC4_MADV_WILLNEED) {
+		DRM_DEBUG("mmaping of %s BO not allowed\n",
+			  bo->madv == VC4_MADV_DONTNEED ?
+			  "purgeable" : "purged");
 		return -EINVAL;
 	}
 
@@ -420,10 +766,23 @@ int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
 	 * the whole buffer.
 	 */
 	vma->vm_flags &= ~VM_PFNMAP;
-	vma->vm_pgoff = 0;
 
+	/* This ->vm_pgoff dance is needed to make all parties happy:
+	 * - dma_mmap_wc() uses ->vm_pgoff as an offset within the allocated
+	 *   mem-region, hence the need to set it to zero (the value set by
+	 *   the DRM core is a virtual offset encoding the GEM object-id)
+	 * - the mmap() core logic needs ->vm_pgoff to be restored to its
+	 *   initial value before returning from this function because it
+	 *   encodes the  offset of this GEM in the dev->anon_inode pseudo-file
+	 *   and this information will be used when we invalidate userspace
+	 *   mappings  with drm_vma_node_unmap() (called from vc4_gem_purge()).
+	 */
+	vm_pgoff = vma->vm_pgoff;
+	vma->vm_pgoff = 0;
 	ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr,
 			  bo->base.paddr, vma->vm_end - vma->vm_start);
+	vma->vm_pgoff = vm_pgoff;
+
 	if (ret)
 		drm_gem_vm_close(vma);
 
@@ -435,7 +794,7 @@ int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 	struct vc4_bo *bo = to_vc4_bo(obj);
 
 	if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
-		DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
+		DRM_DEBUG("mmaping of shader BOs for writing not allowed.\n");
 		return -EINVAL;
 	}
 
@@ -447,7 +806,7 @@ void *vc4_prime_vmap(struct drm_gem_object *obj)
 	struct vc4_bo *bo = to_vc4_bo(obj);
 
 	if (bo->validated_shader) {
-		DRM_ERROR("mmaping of shader BOs not allowed.\n");
+		DRM_DEBUG("mmaping of shader BOs not allowed.\n");
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -483,12 +842,14 @@ int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
 	 * We can't allocate from the BO cache, because the BOs don't
 	 * get zeroed, and that might leak data between users.
 	 */
-	bo = vc4_bo_create(dev, args->size, false);
+	bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_V3D);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
+	bo->madv = VC4_MADV_WILLNEED;
+
 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
-	drm_gem_object_unreference_unlocked(&bo->base.base);
+	drm_gem_object_put_unlocked(&bo->base.base);
 
 	return ret;
 }
@@ -501,14 +862,14 @@ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
 
 	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
 	if (!gem_obj) {
-		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
 		return -EINVAL;
 	}
 
 	/* The mmap offset was set up at BO allocation time. */
 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
 
-	drm_gem_object_unreference_unlocked(gem_obj);
+	drm_gem_object_put_unlocked(gem_obj);
 	return 0;
 }
 
@@ -536,10 +897,12 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	bo = vc4_bo_create(dev, args->size, true);
+	bo = vc4_bo_create(dev, args->size, true, VC4_BO_TYPE_V3D_SHADER);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
+	bo->madv = VC4_MADV_WILLNEED;
+
 	if (copy_from_user(bo->base.vaddr,
 			     (void __user *)(uintptr_t)args->data,
 			     args->size)) {
@@ -564,7 +927,7 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
 
  fail:
-	drm_gem_object_unreference_unlocked(&bo->base.base);
+	drm_gem_object_put_unlocked(&bo->base.base);
 
 	return ret;
 }
@@ -605,13 +968,13 @@ int vc4_set_tiling_ioctl(struct drm_device *dev, void *data,
 
 	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
 	if (!gem_obj) {
-		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
 		return -ENOENT;
 	}
 	bo = to_vc4_bo(gem_obj);
 	bo->t_format = t_format;
 
-	drm_gem_object_unreference_unlocked(gem_obj);
+	drm_gem_object_put_unlocked(gem_obj);
 
 	return 0;
 }
@@ -636,7 +999,7 @@ int vc4_get_tiling_ioctl(struct drm_device *dev, void *data,
 
 	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
 	if (!gem_obj) {
-		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
 		return -ENOENT;
 	}
 	bo = to_vc4_bo(gem_obj);
@@ -646,36 +1009,96 @@ int vc4_get_tiling_ioctl(struct drm_device *dev, void *data,
 	else
 		args->modifier = DRM_FORMAT_MOD_NONE;
 
-	drm_gem_object_unreference_unlocked(gem_obj);
+	drm_gem_object_put_unlocked(gem_obj);
 
 	return 0;
 }
 
-void vc4_bo_cache_init(struct drm_device *dev)
+int vc4_bo_cache_init(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int i;
+
+	/* Create the initial set of BO labels that the kernel will
+	 * use.  This lets us avoid a bunch of string reallocation in
+	 * the kernel's draw and BO allocation paths.
+	 */
+	vc4->bo_labels = kcalloc(VC4_BO_TYPE_COUNT, sizeof(*vc4->bo_labels),
+				 GFP_KERNEL);
+	if (!vc4->bo_labels)
+		return -ENOMEM;
+	vc4->num_labels = VC4_BO_TYPE_COUNT;
+
+	BUILD_BUG_ON(ARRAY_SIZE(bo_type_names) != VC4_BO_TYPE_COUNT);
+	for (i = 0; i < VC4_BO_TYPE_COUNT; i++)
+		vc4->bo_labels[i].name = bo_type_names[i];
 
 	mutex_init(&vc4->bo_lock);
 
 	INIT_LIST_HEAD(&vc4->bo_cache.time_list);
 
 	INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work);
-	setup_timer(&vc4->bo_cache.time_timer,
-		    vc4_bo_cache_time_timer,
-		    (unsigned long)dev);
+	timer_setup(&vc4->bo_cache.time_timer, vc4_bo_cache_time_timer, 0);
+
+	return 0;
 }
 
 void vc4_bo_cache_destroy(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int i;
 
 	del_timer(&vc4->bo_cache.time_timer);
 	cancel_work_sync(&vc4->bo_cache.time_work);
 
 	vc4_bo_cache_purge(dev);
 
-	if (vc4->bo_stats.num_allocated) {
-		DRM_ERROR("Destroying BO cache while BOs still allocated:\n");
-		vc4_bo_stats_dump(vc4);
+	for (i = 0; i < vc4->num_labels; i++) {
+		if (vc4->bo_labels[i].num_allocated) {
+			DRM_ERROR("Destroying BO cache with %d %s "
+				  "BOs still allocated\n",
+				  vc4->bo_labels[i].num_allocated,
+				  vc4->bo_labels[i].name);
+		}
+
+		if (is_user_label(i))
+			kfree(vc4->bo_labels[i].name);
 	}
+	kfree(vc4->bo_labels);
+}
+
+int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_vc4_label_bo *args = data;
+	char *name;
+	struct drm_gem_object *gem_obj;
+	int ret = 0, label;
+
+	if (!args->len)
+		return -EINVAL;
+
+	name = strndup_user(u64_to_user_ptr(args->name), args->len + 1);
+	if (IS_ERR(name))
+		return PTR_ERR(name);
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		kfree(name);
+		return -ENOENT;
+	}
+
+	mutex_lock(&vc4->bo_lock);
+	label = vc4_get_user_label(vc4, name);
+	if (label != -1)
+		vc4_bo_set_label(gem_obj, label);
+	else
+		ret = -ENOMEM;
+	mutex_unlock(&vc4->bo_lock);
+
+	drm_gem_object_put_unlocked(gem_obj);
+
+	return ret;
 }
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index a12cc7ea99b6..ce1e3b9e14c9 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -479,7 +479,8 @@ static void require_hvs_enabled(struct drm_device *dev)
 		     SCALER_DISPCTRL_ENABLE);
 }
 
-static void vc4_crtc_disable(struct drm_crtc *crtc)
+static void vc4_crtc_atomic_disable(struct drm_crtc *crtc,
+				    struct drm_crtc_state *old_state)
 {
 	struct drm_device *dev = crtc->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
@@ -518,6 +519,19 @@ static void vc4_crtc_disable(struct drm_crtc *crtc)
 	WARN_ON_ONCE((HVS_READ(SCALER_DISPSTATX(chan)) &
 		      (SCALER_DISPSTATX_FULL | SCALER_DISPSTATX_EMPTY)) !=
 		     SCALER_DISPSTATX_EMPTY);
+
+	/*
+	 * Make sure we issue a vblank event after disabling the CRTC if
+	 * someone was waiting it.
+	 */
+	if (crtc->state->event) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&dev->event_lock, flags);
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		crtc->state->event = NULL;
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	}
 }
 
 static void vc4_crtc_update_dlist(struct drm_crtc *crtc)
@@ -548,7 +562,8 @@ static void vc4_crtc_update_dlist(struct drm_crtc *crtc)
 	}
 }
 
-static void vc4_crtc_enable(struct drm_crtc *crtc)
+static void vc4_crtc_atomic_enable(struct drm_crtc *crtc,
+				   struct drm_crtc_state *old_state)
 {
 	struct drm_device *dev = crtc->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
@@ -577,18 +592,17 @@ static void vc4_crtc_enable(struct drm_crtc *crtc)
 		   CRTC_READ(PV_V_CONTROL) | PV_VCONTROL_VIDEN);
 }
 
-static bool vc4_crtc_mode_fixup(struct drm_crtc *crtc,
-				const struct drm_display_mode *mode,
-				struct drm_display_mode *adjusted_mode)
+static enum drm_mode_status vc4_crtc_mode_valid(struct drm_crtc *crtc,
+						const struct drm_display_mode *mode)
 {
 	/* Do not allow doublescan modes from user space */
-	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) {
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN) {
 		DRM_DEBUG_KMS("[CRTC:%d] Doublescan mode rejected.\n",
 			      crtc->base.id);
-		return false;
+		return MODE_NO_DBLESCAN;
 	}
 
-	return true;
+	return MODE_OK;
 }
 
 static int vc4_crtc_atomic_check(struct drm_crtc *crtc,
@@ -682,14 +696,6 @@ static void vc4_disable_vblank(struct drm_crtc *crtc)
 	CRTC_WRITE(PV_INTEN, 0);
 }
 
-/* Must be called with the event lock held */
-bool vc4_event_pending(struct drm_crtc *crtc)
-{
-	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
-
-	return !!vc4_crtc->event;
-}
-
 static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc)
 {
 	struct drm_crtc *crtc = &vc4_crtc->base;
@@ -757,7 +763,7 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
 	}
 
 	drm_crtc_vblank_put(crtc);
-	drm_framebuffer_unreference(flip_state->fb);
+	drm_framebuffer_put(flip_state->fb);
 	kfree(flip_state);
 
 	up(&vc4->async_modeset);
@@ -786,7 +792,7 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
 	if (!flip_state)
 		return -ENOMEM;
 
-	drm_framebuffer_reference(fb);
+	drm_framebuffer_get(fb);
 	flip_state->fb = fb;
 	flip_state->crtc = crtc;
 	flip_state->event = event;
@@ -794,7 +800,7 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
 	/* Make sure all other async modesetes have landed. */
 	ret = down_interruptible(&vc4->async_modeset);
 	if (ret) {
-		drm_framebuffer_unreference(fb);
+		drm_framebuffer_put(fb);
 		kfree(flip_state);
 		return ret;
 	}
@@ -885,11 +891,11 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = {
 
 static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = {
 	.mode_set_nofb = vc4_crtc_mode_set_nofb,
-	.disable = vc4_crtc_disable,
-	.enable = vc4_crtc_enable,
-	.mode_fixup = vc4_crtc_mode_fixup,
+	.mode_valid = vc4_crtc_mode_valid,
 	.atomic_check = vc4_crtc_atomic_check,
 	.atomic_flush = vc4_crtc_atomic_flush,
+	.atomic_enable = vc4_crtc_atomic_enable,
+	.atomic_disable = vc4_crtc_atomic_disable,
 };
 
 static const struct vc4_crtc_data pv0_data = {
diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
index 2e0fe46aeb2e..72c9dbd81d7f 100644
--- a/drivers/gpu/drm/vc4/vc4_dpi.c
+++ b/drivers/gpu/drm/vc4/vc4_dpi.c
@@ -97,8 +97,6 @@ struct vc4_dpi {
 
 	struct drm_encoder *encoder;
 	struct drm_connector *connector;
-	struct drm_bridge *bridge;
-	bool is_panel_bridge;
 
 	void __iomem *regs;
 
@@ -224,20 +222,19 @@ static void vc4_dpi_encoder_enable(struct drm_encoder *encoder)
 		DRM_ERROR("Failed to set clock rate: %d\n", ret);
 }
 
-static bool vc4_dpi_encoder_mode_fixup(struct drm_encoder *encoder,
-				       const struct drm_display_mode *mode,
-				       struct drm_display_mode *adjusted_mode)
+static enum drm_mode_status vc4_dpi_encoder_mode_valid(struct drm_encoder *encoder,
+						       const struct drm_display_mode *mode)
 {
-	if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
-		return false;
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		return MODE_NO_INTERLACE;
 
-	return true;
+	return MODE_OK;
 }
 
 static const struct drm_encoder_helper_funcs vc4_dpi_encoder_helper_funcs = {
 	.disable = vc4_dpi_encoder_disable,
 	.enable = vc4_dpi_encoder_enable,
-	.mode_fixup = vc4_dpi_encoder_mode_fixup,
+	.mode_valid = vc4_dpi_encoder_mode_valid,
 };
 
 static const struct of_device_id vc4_dpi_dt_match[] = {
@@ -252,10 +249,11 @@ static int vc4_dpi_init_bridge(struct vc4_dpi *dpi)
 {
 	struct device *dev = &dpi->pdev->dev;
 	struct drm_panel *panel;
+	struct drm_bridge *bridge;
 	int ret;
 
 	ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0,
-					  &panel, &dpi->bridge);
+					  &panel, &bridge);
 	if (ret) {
 		/* If nothing was connected in the DT, that's not an
 		 * error.
@@ -266,13 +264,10 @@ static int vc4_dpi_init_bridge(struct vc4_dpi *dpi)
 			return ret;
 	}
 
-	if (panel) {
-		dpi->bridge = drm_panel_bridge_add(panel,
-						   DRM_MODE_CONNECTOR_DPI);
-		dpi->is_panel_bridge = true;
-	}
+	if (panel)
+		bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_DPI);
 
-	return drm_bridge_attach(dpi->encoder, dpi->bridge, NULL);
+	return drm_bridge_attach(dpi->encoder, bridge, NULL);
 }
 
 static int vc4_dpi_bind(struct device *dev, struct device *master, void *data)
@@ -353,8 +348,7 @@ static void vc4_dpi_unbind(struct device *dev, struct device *master,
 	struct vc4_dev *vc4 = to_vc4_dev(drm);
 	struct vc4_dpi *dpi = dev_get_drvdata(dev);
 
-	if (dpi->is_panel_bridge)
-		drm_panel_bridge_remove(dpi->bridge);
+	drm_of_panel_bridge_remove(dev->of_node, 0, 0);
 
 	drm_encoder_cleanup(dpi->encoder);
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index c6b487c3d2b7..e3c29729da2e 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -99,6 +99,8 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
 	case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
 	case DRM_VC4_PARAM_SUPPORTS_ETC1:
 	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
+	case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
+	case DRM_VC4_PARAM_SUPPORTS_MADVISE:
 		args->value = true;
 		break;
 	default:
@@ -116,6 +118,12 @@ static void vc4_lastclose(struct drm_device *dev)
 	drm_fbdev_cma_restore_mode(vc4->fbdev);
 }
 
+static const struct vm_operations_struct vc4_vm_ops = {
+	.fault = vc4_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
 static const struct file_operations vc4_drm_fops = {
 	.owner = THIS_MODULE,
 	.open = drm_open,
@@ -140,6 +148,8 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver vc4_drm_driver = {
@@ -164,7 +174,7 @@ static struct drm_driver vc4_drm_driver = {
 
 	.gem_create_object = vc4_create_object,
 	.gem_free_object_unlocked = vc4_free_object,
-	.gem_vm_ops = &drm_gem_cma_vm_ops,
+	.gem_vm_ops = &vc4_vm_ops,
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
@@ -178,8 +188,6 @@ static struct drm_driver vc4_drm_driver = {
 	.gem_prime_mmap = vc4_prime_mmap,
 
 	.dumb_create = vc4_dumb_create,
-	.dumb_map_offset = drm_gem_cma_dumb_map_offset,
-	.dumb_destroy = drm_gem_dumb_destroy,
 
 	.ioctls = vc4_drm_ioctls,
 	.num_ioctls = ARRAY_SIZE(vc4_drm_ioctls),
@@ -257,7 +265,9 @@ static int vc4_drm_bind(struct device *dev)
 	vc4->dev = drm;
 	drm->dev_private = vc4;
 
-	vc4_bo_cache_init(drm);
+	ret = vc4_bo_cache_init(drm);
+	if (ret)
+		goto dev_unref;
 
 	drm_mode_config_init(drm);
 
@@ -281,8 +291,9 @@ unbind_all:
 	component_unbind_all(dev, drm);
 gem_destroy:
 	vc4_gem_destroy(drm);
-	drm_dev_unref(drm);
 	vc4_bo_cache_destroy(drm);
+dev_unref:
+	drm_dev_unref(drm);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index df22698d62ee..9c0d380c96f2 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -11,6 +11,24 @@
 #include <drm/drm_encoder.h>
 #include <drm/drm_gem_cma_helper.h>
 
+/* Don't forget to update vc4_bo.c: bo_type_names[] when adding to
+ * this.
+ */
+enum vc4_kernel_bo_type {
+	/* Any kernel allocation (gem_create_object hook) before it
+	 * gets another type set.
+	 */
+	VC4_BO_TYPE_KERNEL,
+	VC4_BO_TYPE_V3D,
+	VC4_BO_TYPE_V3D_SHADER,
+	VC4_BO_TYPE_DUMB,
+	VC4_BO_TYPE_BIN,
+	VC4_BO_TYPE_RCL,
+	VC4_BO_TYPE_BCL,
+	VC4_BO_TYPE_KERNEL_CACHE,
+	VC4_BO_TYPE_COUNT
+};
+
 struct vc4_dev {
 	struct drm_device *dev;
 
@@ -46,16 +64,29 @@ struct vc4_dev {
 		struct timer_list time_timer;
 	} bo_cache;
 
-	struct vc4_bo_stats {
+	u32 num_labels;
+	struct vc4_label {
+		const char *name;
 		u32 num_allocated;
 		u32 size_allocated;
-		u32 num_cached;
-		u32 size_cached;
-	} bo_stats;
+	} *bo_labels;
 
-	/* Protects bo_cache and the BO stats. */
+	/* Protects bo_cache and bo_labels. */
 	struct mutex bo_lock;
 
+	/* Purgeable BO pool. All BOs in this pool can have their memory
+	 * reclaimed if the driver is unable to allocate new BOs. We also
+	 * keep stats related to the purge mechanism here.
+	 */
+	struct {
+		struct list_head list;
+		unsigned int num;
+		size_t size;
+		unsigned int purged_num;
+		size_t purged_size;
+		struct mutex lock;
+	} purgeable;
+
 	uint64_t dma_fence_context;
 
 	/* Sequence number for the last job queued in bin_job_list.
@@ -169,6 +200,21 @@ struct vc4_bo {
 	/* normally (resv == &_resv) except for imported bo's */
 	struct reservation_object *resv;
 	struct reservation_object _resv;
+
+	/* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i
+	 * for user-allocated labels.
+	 */
+	int label;
+
+	/* Count the number of active users. This is needed to determine
+	 * whether we can move the BO to the purgeable list or not (when the BO
+	 * is used by the GPU or the display engine we can't purge it).
+	 */
+	refcount_t usecnt;
+
+	/* Store purgeable/purged state here */
+	u32 madv;
+	struct mutex madv_lock;
 };
 
 static inline struct vc4_bo *
@@ -460,7 +506,7 @@ struct vc4_validated_shader_info {
 struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size);
 void vc4_free_object(struct drm_gem_object *gem_obj);
 struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size,
-			     bool from_cache);
+			     bool from_cache, enum vc4_kernel_bo_type type);
 int vc4_dumb_create(struct drm_file *file_priv,
 		    struct drm_device *dev,
 		    struct drm_mode_create_dumb *args);
@@ -478,6 +524,9 @@ int vc4_get_tiling_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
+int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv);
+int vc4_fault(struct vm_fault *vmf);
 int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
 struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
 int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
@@ -485,13 +534,16 @@ struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev,
 						 struct dma_buf_attachment *attach,
 						 struct sg_table *sgt);
 void *vc4_prime_vmap(struct drm_gem_object *obj);
-void vc4_bo_cache_init(struct drm_device *dev);
+int vc4_bo_cache_init(struct drm_device *dev);
 void vc4_bo_cache_destroy(struct drm_device *dev);
 int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
+int vc4_bo_inc_usecnt(struct vc4_bo *bo);
+void vc4_bo_dec_usecnt(struct vc4_bo *bo);
+void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo);
+void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo);
 
 /* vc4_crtc.c */
 extern struct platform_driver vc4_crtc_driver;
-bool vc4_event_pending(struct drm_crtc *crtc);
 int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
 bool vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
 			     bool in_vblank_irq, int *vpos, int *hpos,
@@ -533,6 +585,8 @@ void vc4_job_handle_completed(struct vc4_dev *vc4);
 int vc4_queue_seqno_cb(struct drm_device *dev,
 		       struct vc4_seqno_cb *cb, uint64_t seqno,
 		       void (*func)(struct vc4_seqno_cb *cb));
+int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv);
 
 /* vc4_hdmi.c */
 extern struct platform_driver vc4_hdmi_driver;
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index 5e8b81eaa168..94085f8bcd68 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -33,6 +33,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_mipi_dsi.h>
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
@@ -504,7 +505,6 @@ struct vc4_dsi {
 	struct mipi_dsi_host dsi_host;
 	struct drm_encoder *encoder;
 	struct drm_bridge *bridge;
-	bool is_panel_bridge;
 
 	void __iomem *regs;
 
@@ -736,18 +736,18 @@ static void vc4_dsi_latch_ulps(struct vc4_dsi *dsi, bool latch)
 /* Enters or exits Ultra Low Power State. */
 static void vc4_dsi_ulps(struct vc4_dsi *dsi, bool ulps)
 {
-	bool continuous = dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS;
-	u32 phyc_ulps = ((continuous ? DSI_PORT_BIT(PHYC_CLANE_ULPS) : 0) |
+	bool non_continuous = dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS;
+	u32 phyc_ulps = ((non_continuous ? DSI_PORT_BIT(PHYC_CLANE_ULPS) : 0) |
 			 DSI_PHYC_DLANE0_ULPS |
 			 (dsi->lanes > 1 ? DSI_PHYC_DLANE1_ULPS : 0) |
 			 (dsi->lanes > 2 ? DSI_PHYC_DLANE2_ULPS : 0) |
 			 (dsi->lanes > 3 ? DSI_PHYC_DLANE3_ULPS : 0));
-	u32 stat_ulps = ((continuous ? DSI1_STAT_PHY_CLOCK_ULPS : 0) |
+	u32 stat_ulps = ((non_continuous ? DSI1_STAT_PHY_CLOCK_ULPS : 0) |
 			 DSI1_STAT_PHY_D0_ULPS |
 			 (dsi->lanes > 1 ? DSI1_STAT_PHY_D1_ULPS : 0) |
 			 (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_ULPS : 0) |
 			 (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_ULPS : 0));
-	u32 stat_stop = ((continuous ? DSI1_STAT_PHY_CLOCK_STOP : 0) |
+	u32 stat_stop = ((non_continuous ? DSI1_STAT_PHY_CLOCK_STOP : 0) |
 			 DSI1_STAT_PHY_D0_STOP |
 			 (dsi->lanes > 1 ? DSI1_STAT_PHY_D1_STOP : 0) |
 			 (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_STOP : 0) |
@@ -859,14 +859,11 @@ static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
 	pll_clock = parent_rate / divider;
 	pixel_clock_hz = pll_clock / dsi->divider;
 
-	/* Round up the clk_set_rate() request slightly, since
-	 * PLLD_DSI1 is an integer divider and its rate selection will
-	 * never round up.
-	 */
-	adjusted_mode->clock = pixel_clock_hz / 1000 + 1;
+	adjusted_mode->clock = pixel_clock_hz / 1000;
 
 	/* Given the new pixel clock, adjust HFP to keep vrefresh the same. */
-	adjusted_mode->htotal = pixel_clock_hz / (mode->vrefresh * mode->vtotal);
+	adjusted_mode->htotal = adjusted_mode->clock * mode->htotal /
+				mode->clock;
 	adjusted_mode->hsync_end += adjusted_mode->htotal - mode->htotal;
 	adjusted_mode->hsync_start += adjusted_mode->htotal - mode->htotal;
 
@@ -900,7 +897,11 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
 		vc4_dsi_dump_regs(dsi);
 	}
 
-	phy_clock = pixel_clock_hz * dsi->divider;
+	/* Round up the clk_set_rate() request slightly, since
+	 * PLLD_DSI1 is an integer divider and its rate selection will
+	 * never round up.
+	 */
+	phy_clock = (pixel_clock_hz + 1000) * dsi->divider;
 	ret = clk_set_rate(dsi->pll_phy_clock, phy_clock);
 	if (ret) {
 		dev_err(&dsi->pdev->dev,
@@ -1035,7 +1036,17 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
 				     DSI_HS_DLT4_TRAIL) |
 		       VC4_SET_FIELD(0, DSI_HS_DLT4_ANLAT));
 
-	DSI_PORT_WRITE(HS_DLT5, VC4_SET_FIELD(dsi_hs_timing(ui_ns, 1000, 5000),
+	/* T_INIT is how long STOP is driven after power-up to
+	 * indicate to the slave (also coming out of power-up) that
+	 * master init is complete, and should be greater than the
+	 * maximum of two value: T_INIT,MASTER and T_INIT,SLAVE.  The
+	 * D-PHY spec gives a minimum 100us for T_INIT,MASTER and
+	 * T_INIT,SLAVE, while allowing protocols on top of it to give
+	 * greater minimums.  The vc4 firmware uses an extremely
+	 * conservative 5ms, and we maintain that here.
+	 */
+	DSI_PORT_WRITE(HS_DLT5, VC4_SET_FIELD(dsi_hs_timing(ui_ns,
+							    5 * 1000 * 1000, 0),
 					      DSI_HS_DLT5_INIT));
 
 	DSI_PORT_WRITE(HS_DLT6,
@@ -1278,7 +1289,6 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
 			       struct mipi_dsi_device *device)
 {
 	struct vc4_dsi *dsi = host_to_dsi(host);
-	int ret = 0;
 
 	dsi->lanes = device->lanes;
 	dsi->channel = device->channel;
@@ -1313,34 +1323,12 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
 		return 0;
 	}
 
-	dsi->bridge = of_drm_find_bridge(device->dev.of_node);
-	if (!dsi->bridge) {
-		struct drm_panel *panel =
-			of_drm_find_panel(device->dev.of_node);
-
-		dsi->bridge = drm_panel_bridge_add(panel,
-						   DRM_MODE_CONNECTOR_DSI);
-		if (IS_ERR(dsi->bridge)) {
-			ret = PTR_ERR(dsi->bridge);
-			dsi->bridge = NULL;
-			return ret;
-		}
-		dsi->is_panel_bridge = true;
-	}
-
-	return drm_bridge_attach(dsi->encoder, dsi->bridge, NULL);
+	return 0;
 }
 
 static int vc4_dsi_host_detach(struct mipi_dsi_host *host,
 			       struct mipi_dsi_device *device)
 {
-	struct vc4_dsi *dsi = host_to_dsi(host);
-
-	if (dsi->is_panel_bridge) {
-		drm_panel_bridge_remove(dsi->bridge);
-		dsi->bridge = NULL;
-	}
-
 	return 0;
 }
 
@@ -1372,6 +1360,27 @@ static void dsi_handle_error(struct vc4_dsi *dsi,
 	*ret = IRQ_HANDLED;
 }
 
+/*
+ * Initial handler for port 1 where we need the reg_dma workaround.
+ * The register DMA writes sleep, so we can't do it in the top half.
+ * Instead we use IRQF_ONESHOT so that the IRQ gets disabled in the
+ * parent interrupt contrller until our interrupt thread is done.
+ */
+static irqreturn_t vc4_dsi_irq_defer_to_thread_handler(int irq, void *data)
+{
+	struct vc4_dsi *dsi = data;
+	u32 stat = DSI_PORT_READ(INT_STAT);
+
+	if (!stat)
+		return IRQ_NONE;
+
+	return IRQ_WAKE_THREAD;
+}
+
+/*
+ * Normal IRQ handler for port 0, or the threaded IRQ handler for port
+ * 1 where we need the reg_dma workaround.
+ */
 static irqreturn_t vc4_dsi_irq_handler(int irq, void *data)
 {
 	struct vc4_dsi *dsi = data;
@@ -1482,16 +1491,13 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct vc4_dev *vc4 = to_vc4_dev(drm);
-	struct vc4_dsi *dsi;
+	struct vc4_dsi *dsi = dev_get_drvdata(dev);
 	struct vc4_dsi_encoder *vc4_dsi_encoder;
+	struct drm_panel *panel;
 	const struct of_device_id *match;
 	dma_cap_mask_t dma_mask;
 	int ret;
 
-	dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL);
-	if (!dsi)
-		return -ENOMEM;
-
 	match = of_match_device(vc4_dsi_dt_match, dev);
 	if (!match)
 		return -ENODEV;
@@ -1506,7 +1512,6 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 	vc4_dsi_encoder->dsi = dsi;
 	dsi->encoder = &vc4_dsi_encoder->base.base;
 
-	dsi->pdev = pdev;
 	dsi->regs = vc4_ioremap_regs(pdev, 0);
 	if (IS_ERR(dsi->regs))
 		return PTR_ERR(dsi->regs);
@@ -1555,8 +1560,15 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 	/* Clear any existing interrupt state. */
 	DSI_PORT_WRITE(INT_STAT, DSI_PORT_READ(INT_STAT));
 
-	ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
-			       vc4_dsi_irq_handler, 0, "vc4 dsi", dsi);
+	if (dsi->reg_dma_mem)
+		ret = devm_request_threaded_irq(dev, platform_get_irq(pdev, 0),
+						vc4_dsi_irq_defer_to_thread_handler,
+						vc4_dsi_irq_handler,
+						IRQF_ONESHOT,
+						"vc4 dsi", dsi);
+	else
+		ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
+				       vc4_dsi_irq_handler, 0, "vc4 dsi", dsi);
 	if (ret) {
 		if (ret != -EPROBE_DEFER)
 			dev_err(dev, "Failed to get interrupt: %d\n", ret);
@@ -1587,6 +1599,18 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 		return ret;
 	}
 
+	ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0,
+					  &panel, &dsi->bridge);
+	if (ret)
+		return ret;
+
+	if (panel) {
+		dsi->bridge = devm_drm_panel_bridge_add(dev, panel,
+							DRM_MODE_CONNECTOR_DSI);
+		if (IS_ERR(dsi->bridge))
+			return PTR_ERR(dsi->bridge);
+	}
+
 	/* The esc clock rate is supposed to always be 100Mhz. */
 	ret = clk_set_rate(dsi->escape_clock, 100 * 1000000);
 	if (ret) {
@@ -1605,12 +1629,11 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 			 DRM_MODE_ENCODER_DSI, NULL);
 	drm_encoder_helper_add(dsi->encoder, &vc4_dsi_encoder_helper_funcs);
 
-	dsi->dsi_host.ops = &vc4_dsi_host_ops;
-	dsi->dsi_host.dev = dev;
-
-	mipi_dsi_host_register(&dsi->dsi_host);
-
-	dev_set_drvdata(dev, dsi);
+	ret = drm_bridge_attach(dsi->encoder, dsi->bridge, NULL);
+	if (ret) {
+		dev_err(dev, "bridge attach failed: %d\n", ret);
+		return ret;
+	}
 
 	pm_runtime_enable(dev);
 
@@ -1626,14 +1649,8 @@ static void vc4_dsi_unbind(struct device *dev, struct device *master,
 
 	pm_runtime_disable(dev);
 
-	drm_bridge_remove(dsi->bridge);
 	vc4_dsi_encoder_destroy(dsi->encoder);
 
-	mipi_dsi_host_unregister(&dsi->dsi_host);
-
-	clk_disable_unprepare(dsi->pll_phy_clock);
-	clk_disable_unprepare(dsi->escape_clock);
-
 	if (dsi->port == 1)
 		vc4->dsi1 = NULL;
 }
@@ -1645,12 +1662,47 @@ static const struct component_ops vc4_dsi_ops = {
 
 static int vc4_dsi_dev_probe(struct platform_device *pdev)
 {
-	return component_add(&pdev->dev, &vc4_dsi_ops);
+	struct device *dev = &pdev->dev;
+	struct vc4_dsi *dsi;
+	int ret;
+
+	dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL);
+	if (!dsi)
+		return -ENOMEM;
+	dev_set_drvdata(dev, dsi);
+
+	dsi->pdev = pdev;
+
+	/* Note, the initialization sequence for DSI and panels is
+	 * tricky.  The component bind above won't get past its
+	 * -EPROBE_DEFER until the panel/bridge probes.  The
+	 * panel/bridge will return -EPROBE_DEFER until it has a
+	 * mipi_dsi_host to register its device to.  So, we register
+	 * the host during pdev probe time, so vc4 as a whole can then
+	 * -EPROBE_DEFER its component bind process until the panel
+	 * successfully attaches.
+	 */
+	dsi->dsi_host.ops = &vc4_dsi_host_ops;
+	dsi->dsi_host.dev = dev;
+	mipi_dsi_host_register(&dsi->dsi_host);
+
+	ret = component_add(&pdev->dev, &vc4_dsi_ops);
+	if (ret) {
+		mipi_dsi_host_unregister(&dsi->dsi_host);
+		return ret;
+	}
+
+	return 0;
 }
 
 static int vc4_dsi_dev_remove(struct platform_device *pdev)
 {
+	struct device *dev = &pdev->dev;
+	struct vc4_dsi *dsi = dev_get_drvdata(dev);
+
 	component_del(&pdev->dev, &vc4_dsi_ops);
+	mipi_dsi_host_unregister(&dsi->dsi_host);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index d5b821ad06af..6c32c89a83a9 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -55,7 +55,7 @@ vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
 	unsigned int i;
 
 	for (i = 0; i < state->user_state.bo_count; i++)
-		drm_gem_object_unreference_unlocked(state->bo[i]);
+		drm_gem_object_put_unlocked(state->bo[i]);
 
 	kfree(state);
 }
@@ -119,7 +119,7 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
 		bo_state[i].size = vc4_bo->base.base.size;
 	}
 
-	if (copy_to_user((void __user *)(uintptr_t)get_state->bo,
+	if (copy_to_user(u64_to_user_ptr(get_state->bo),
 			 bo_state,
 			 state->bo_count * sizeof(*bo_state)))
 		ret = -EFAULT;
@@ -188,12 +188,23 @@ vc4_save_hang_state(struct drm_device *dev)
 			continue;
 
 		for (j = 0; j < exec[i]->bo_count; j++) {
-			drm_gem_object_reference(&exec[i]->bo[j]->base);
+			bo = to_vc4_bo(&exec[i]->bo[j]->base);
+
+			/* Retain BOs just in case they were marked purgeable.
+			 * This prevents the BO from being purged before
+			 * someone had a chance to dump the hang state.
+			 */
+			WARN_ON(!refcount_read(&bo->usecnt));
+			refcount_inc(&bo->usecnt);
+			drm_gem_object_get(&exec[i]->bo[j]->base);
 			kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base;
 		}
 
 		list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
-			drm_gem_object_reference(&bo->base.base);
+			/* No need to retain BOs coming from the ->unref_list
+			 * because they are naturally unpurgeable.
+			 */
+			drm_gem_object_get(&bo->base.base);
 			kernel_state->bo[j + prev_idx] = &bo->base.base;
 			j++;
 		}
@@ -233,6 +244,26 @@ vc4_save_hang_state(struct drm_device *dev)
 	state->fdbgs = V3D_READ(V3D_FDBGS);
 	state->errstat = V3D_READ(V3D_ERRSTAT);
 
+	/* We need to turn purgeable BOs into unpurgeable ones so that
+	 * userspace has a chance to dump the hang state before the kernel
+	 * decides to purge those BOs.
+	 * Note that BO consistency at dump time cannot be guaranteed. For
+	 * example, if the owner of these BOs decides to re-use them or mark
+	 * them purgeable again there's nothing we can do to prevent it.
+	 */
+	for (i = 0; i < kernel_state->user_state.bo_count; i++) {
+		struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]);
+
+		if (bo->madv == __VC4_MADV_NOTSUPP)
+			continue;
+
+		mutex_lock(&bo->madv_lock);
+		if (!WARN_ON(bo->madv == __VC4_MADV_PURGED))
+			bo->madv = VC4_MADV_WILLNEED;
+		refcount_dec(&bo->usecnt);
+		mutex_unlock(&bo->madv_lock);
+	}
+
 	spin_lock_irqsave(&vc4->job_lock, irqflags);
 	if (vc4->hang_state) {
 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
@@ -281,10 +312,10 @@ vc4_reset_work(struct work_struct *work)
 }
 
 static void
-vc4_hangcheck_elapsed(unsigned long data)
+vc4_hangcheck_elapsed(struct timer_list *t)
 {
-	struct drm_device *dev = (struct drm_device *)data;
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_dev *vc4 = from_timer(vc4, t, hangcheck.timer);
+	struct drm_device *dev = vc4->dev;
 	uint32_t ct0ca, ct1ca;
 	unsigned long irqflags;
 	struct vc4_exec_info *bin_exec, *render_exec;
@@ -639,9 +670,6 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
  * The command validator needs to reference BOs by their index within
  * the submitted job's BO list.  This does the validation of the job's
  * BO list and reference counting for the lifetime of the job.
- *
- * Note that this function doesn't need to unreference the BOs on
- * failure, because that will happen at vc4_complete_exec() time.
  */
 static int
 vc4_cl_lookup_bos(struct drm_device *dev,
@@ -659,7 +687,7 @@ vc4_cl_lookup_bos(struct drm_device *dev,
 		/* See comment on bo_index for why we have to check
 		 * this.
 		 */
-		DRM_ERROR("Rendering requires BOs to validate\n");
+		DRM_DEBUG("Rendering requires BOs to validate\n");
 		return -EINVAL;
 	}
 
@@ -678,8 +706,7 @@ vc4_cl_lookup_bos(struct drm_device *dev,
 		goto fail;
 	}
 
-	if (copy_from_user(handles,
-			   (void __user *)(uintptr_t)args->bo_handles,
+	if (copy_from_user(handles, u64_to_user_ptr(args->bo_handles),
 			   exec->bo_count * sizeof(uint32_t))) {
 		ret = -EFAULT;
 		DRM_ERROR("Failed to copy in GEM handles\n");
@@ -691,19 +718,50 @@ vc4_cl_lookup_bos(struct drm_device *dev,
 		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
 						     handles[i]);
 		if (!bo) {
-			DRM_ERROR("Failed to look up GEM BO %d: %d\n",
+			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
 				  i, handles[i]);
 			ret = -EINVAL;
-			spin_unlock(&file_priv->table_lock);
-			goto fail;
+			break;
 		}
-		drm_gem_object_reference(bo);
+
+		drm_gem_object_get(bo);
 		exec->bo[i] = (struct drm_gem_cma_object *)bo;
 	}
 	spin_unlock(&file_priv->table_lock);
 
+	if (ret)
+		goto fail_put_bo;
+
+	for (i = 0; i < exec->bo_count; i++) {
+		ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base));
+		if (ret)
+			goto fail_dec_usecnt;
+	}
+
+	kvfree(handles);
+	return 0;
+
+fail_dec_usecnt:
+	/* Decrease usecnt on acquired objects.
+	 * We cannot rely on  vc4_complete_exec() to release resources here,
+	 * because vc4_complete_exec() has no information about which BO has
+	 * had its ->usecnt incremented.
+	 * To make things easier we just free everything explicitly and set
+	 * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release'
+	 * step.
+	 */
+	for (i-- ; i >= 0; i--)
+		vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base));
+
+fail_put_bo:
+	/* Release any reference to acquired objects. */
+	for (i = 0; i < exec->bo_count && exec->bo[i]; i++)
+		drm_gem_object_put_unlocked(&exec->bo[i]->base);
+
 fail:
 	kvfree(handles);
+	kvfree(exec->bo);
+	exec->bo = NULL;
 	return ret;
 }
 
@@ -729,7 +787,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 	    args->shader_rec_count >= (UINT_MAX /
 					  sizeof(struct vc4_shader_state)) ||
 	    temp_size < exec_size) {
-		DRM_ERROR("overflow in exec arguments\n");
+		DRM_DEBUG("overflow in exec arguments\n");
 		ret = -EINVAL;
 		goto fail;
 	}
@@ -755,27 +813,27 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 	exec->shader_state_size = args->shader_rec_count;
 
 	if (copy_from_user(bin,
-			   (void __user *)(uintptr_t)args->bin_cl,
+			   u64_to_user_ptr(args->bin_cl),
 			   args->bin_cl_size)) {
 		ret = -EFAULT;
 		goto fail;
 	}
 
 	if (copy_from_user(exec->shader_rec_u,
-			   (void __user *)(uintptr_t)args->shader_rec,
+			   u64_to_user_ptr(args->shader_rec),
 			   args->shader_rec_size)) {
 		ret = -EFAULT;
 		goto fail;
 	}
 
 	if (copy_from_user(exec->uniforms_u,
-			   (void __user *)(uintptr_t)args->uniforms,
+			   u64_to_user_ptr(args->uniforms),
 			   args->uniforms_size)) {
 		ret = -EFAULT;
 		goto fail;
 	}
 
-	bo = vc4_bo_create(dev, exec_size, true);
+	bo = vc4_bo_create(dev, exec_size, true, VC4_BO_TYPE_BCL);
 	if (IS_ERR(bo)) {
 		DRM_ERROR("Couldn't allocate BO for binning\n");
 		ret = PTR_ERR(bo);
@@ -834,8 +892,12 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 		dma_fence_signal(exec->fence);
 
 	if (exec->bo) {
-		for (i = 0; i < exec->bo_count; i++)
-			drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
+		for (i = 0; i < exec->bo_count; i++) {
+			struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
+
+			vc4_bo_dec_usecnt(bo);
+			drm_gem_object_put_unlocked(&exec->bo[i]->base);
+		}
 		kvfree(exec->bo);
 	}
 
@@ -843,7 +905,7 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 		struct vc4_bo *bo = list_first_entry(&exec->unref_list,
 						     struct vc4_bo, unref_head);
 		list_del(&bo->unref_head);
-		drm_gem_object_unreference_unlocked(&bo->base.base);
+		drm_gem_object_put_unlocked(&bo->base.base);
 	}
 
 	/* Free up the allocation of any bin slots we used. */
@@ -974,7 +1036,7 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
 
 	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
 	if (!gem_obj) {
-		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
 		return -EINVAL;
 	}
 	bo = to_vc4_bo(gem_obj);
@@ -982,7 +1044,7 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
 	ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
 					      &args->timeout_ns);
 
-	drm_gem_object_unreference_unlocked(gem_obj);
+	drm_gem_object_put_unlocked(gem_obj);
 	return ret;
 }
 
@@ -1008,8 +1070,11 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct ww_acquire_ctx acquire_ctx;
 	int ret = 0;
 
-	if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
-		DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
+	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
+			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
+			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
+			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
+		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
 		return -EINVAL;
 	}
 
@@ -1089,13 +1154,14 @@ vc4_gem_init(struct drm_device *dev)
 	spin_lock_init(&vc4->job_lock);
 
 	INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
-	setup_timer(&vc4->hangcheck.timer,
-		    vc4_hangcheck_elapsed,
-		    (unsigned long)dev);
+	timer_setup(&vc4->hangcheck.timer, vc4_hangcheck_elapsed, 0);
 
 	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
 
 	mutex_init(&vc4->power_lock);
+
+	INIT_LIST_HEAD(&vc4->purgeable.list);
+	mutex_init(&vc4->purgeable.lock);
 }
 
 void
@@ -1118,6 +1184,82 @@ vc4_gem_destroy(struct drm_device *dev)
 
 	if (vc4->hang_state)
 		vc4_free_hang_state(dev, vc4->hang_state);
+}
+
+int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv)
+{
+	struct drm_vc4_gem_madvise *args = data;
+	struct drm_gem_object *gem_obj;
+	struct vc4_bo *bo;
+	int ret;
 
-	vc4_bo_cache_destroy(dev);
+	switch (args->madv) {
+	case VC4_MADV_DONTNEED:
+	case VC4_MADV_WILLNEED:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+
+	bo = to_vc4_bo(gem_obj);
+
+	/* Only BOs exposed to userspace can be purged. */
+	if (bo->madv == __VC4_MADV_NOTSUPP) {
+		DRM_DEBUG("madvise not supported on this BO\n");
+		ret = -EINVAL;
+		goto out_put_gem;
+	}
+
+	/* Not sure it's safe to purge imported BOs. Let's just assume it's
+	 * not until proven otherwise.
+	 */
+	if (gem_obj->import_attach) {
+		DRM_DEBUG("madvise not supported on imported BOs\n");
+		ret = -EINVAL;
+		goto out_put_gem;
+	}
+
+	mutex_lock(&bo->madv_lock);
+
+	if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED &&
+	    !refcount_read(&bo->usecnt)) {
+		/* If the BO is about to be marked as purgeable, is not used
+		 * and is not already purgeable or purged, add it to the
+		 * purgeable list.
+		 */
+		vc4_bo_add_to_purgeable_pool(bo);
+	} else if (args->madv == VC4_MADV_WILLNEED &&
+		   bo->madv == VC4_MADV_DONTNEED &&
+		   !refcount_read(&bo->usecnt)) {
+		/* The BO has not been purged yet, just remove it from
+		 * the purgeable list.
+		 */
+		vc4_bo_remove_from_purgeable_pool(bo);
+	}
+
+	/* Save the purged state. */
+	args->retained = bo->madv != __VC4_MADV_PURGED;
+
+	/* Update internal madv state only if the bo was not purged. */
+	if (bo->madv != __VC4_MADV_PURGED)
+		bo->madv = args->madv;
+
+	mutex_unlock(&bo->madv_lock);
+
+	ret = 0;
+
+out_put_gem:
+	drm_gem_object_put_unlocked(gem_obj);
+
+	return ret;
 }
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index ed63d4e85762..fa37a1c07cf6 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -57,9 +57,14 @@
 #include <sound/pcm_drm_eld.h>
 #include <sound/pcm_params.h>
 #include <sound/soc.h>
+#include "media/cec.h"
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
+#define HSM_CLOCK_FREQ 163682864
+#define CEC_CLOCK_FREQ 40000
+#define CEC_CLOCK_DIV  (HSM_CLOCK_FREQ / CEC_CLOCK_FREQ)
+
 /* HDMI audio information */
 struct vc4_hdmi_audio {
 	struct snd_soc_card card;
@@ -85,6 +90,11 @@ struct vc4_hdmi {
 	int hpd_gpio;
 	bool hpd_active_low;
 
+	struct cec_adapter *cec_adap;
+	struct cec_msg cec_rx_msg;
+	bool cec_tx_ok;
+	bool cec_irq_was_rx;
+
 	struct clk *pixel_clock;
 	struct clk *hsm_clock;
 };
@@ -149,6 +159,23 @@ static const struct {
 	HDMI_REG(VC4_HDMI_VERTB1),
 	HDMI_REG(VC4_HDMI_TX_PHY_RESET_CTL),
 	HDMI_REG(VC4_HDMI_TX_PHY_CTL0),
+
+	HDMI_REG(VC4_HDMI_CEC_CNTRL_1),
+	HDMI_REG(VC4_HDMI_CEC_CNTRL_2),
+	HDMI_REG(VC4_HDMI_CEC_CNTRL_3),
+	HDMI_REG(VC4_HDMI_CEC_CNTRL_4),
+	HDMI_REG(VC4_HDMI_CEC_CNTRL_5),
+	HDMI_REG(VC4_HDMI_CPU_STATUS),
+	HDMI_REG(VC4_HDMI_CPU_MASK_STATUS),
+
+	HDMI_REG(VC4_HDMI_CEC_RX_DATA_1),
+	HDMI_REG(VC4_HDMI_CEC_RX_DATA_2),
+	HDMI_REG(VC4_HDMI_CEC_RX_DATA_3),
+	HDMI_REG(VC4_HDMI_CEC_RX_DATA_4),
+	HDMI_REG(VC4_HDMI_CEC_TX_DATA_1),
+	HDMI_REG(VC4_HDMI_CEC_TX_DATA_2),
+	HDMI_REG(VC4_HDMI_CEC_TX_DATA_3),
+	HDMI_REG(VC4_HDMI_CEC_TX_DATA_4),
 };
 
 static const struct {
@@ -216,8 +243,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
 		if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio) ^
 		    vc4->hdmi->hpd_active_low)
 			return connector_status_connected;
-		else
-			return connector_status_disconnected;
+		cec_phys_addr_invalidate(vc4->hdmi->cec_adap);
+		return connector_status_disconnected;
 	}
 
 	if (drm_probe_ddc(vc4->hdmi->ddc))
@@ -225,8 +252,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
 
 	if (HDMI_READ(VC4_HDMI_HOTPLUG) & VC4_HDMI_HOTPLUG_CONNECTED)
 		return connector_status_connected;
-	else
-		return connector_status_disconnected;
+	cec_phys_addr_invalidate(vc4->hdmi->cec_adap);
+	return connector_status_disconnected;
 }
 
 static void vc4_hdmi_connector_destroy(struct drm_connector *connector)
@@ -247,6 +274,7 @@ static int vc4_hdmi_connector_get_modes(struct drm_connector *connector)
 	struct edid *edid;
 
 	edid = drm_get_edid(connector, vc4->hdmi->ddc);
+	cec_s_phys_addr_from_edid(vc4->hdmi->cec_adap, edid);
 	if (!edid)
 		return -ENODEV;
 
@@ -260,12 +288,12 @@ static int vc4_hdmi_connector_get_modes(struct drm_connector *connector)
 	drm_mode_connector_update_edid_property(connector, edid);
 	ret = drm_add_edid_modes(connector, edid);
 	drm_edid_to_eld(connector, edid);
+	kfree(edid);
 
 	return ret;
 }
 
 static const struct drm_connector_funcs vc4_hdmi_connector_funcs = {
-	.dpms = drm_atomic_helper_connector_dpms,
 	.detect = vc4_hdmi_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = vc4_hdmi_connector_destroy,
@@ -281,16 +309,13 @@ static const struct drm_connector_helper_funcs vc4_hdmi_connector_helper_funcs =
 static struct drm_connector *vc4_hdmi_connector_init(struct drm_device *dev,
 						     struct drm_encoder *encoder)
 {
-	struct drm_connector *connector = NULL;
+	struct drm_connector *connector;
 	struct vc4_hdmi_connector *hdmi_connector;
-	int ret = 0;
 
 	hdmi_connector = devm_kzalloc(dev->dev, sizeof(*hdmi_connector),
 				      GFP_KERNEL);
-	if (!hdmi_connector) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	if (!hdmi_connector)
+		return ERR_PTR(-ENOMEM);
 	connector = &hdmi_connector->base;
 
 	hdmi_connector->encoder = encoder;
@@ -308,12 +333,6 @@ static struct drm_connector *vc4_hdmi_connector_init(struct drm_device *dev,
 	drm_mode_connector_attach_encoder(connector, encoder);
 
 	return connector;
-
- fail:
-	if (connector)
-		vc4_hdmi_connector_destroy(connector);
-
-	return ERR_PTR(ret);
 }
 
 static void vc4_hdmi_encoder_destroy(struct drm_encoder *encoder)
@@ -395,7 +414,7 @@ static void vc4_hdmi_set_avi_infoframe(struct drm_encoder *encoder)
 	union hdmi_infoframe frame;
 	int ret;
 
-	ret = drm_hdmi_avi_infoframe_from_display_mode(&frame.avi, mode);
+	ret = drm_hdmi_avi_infoframe_from_display_mode(&frame.avi, mode, false);
 	if (ret < 0) {
 		DRM_ERROR("couldn't fill AVI infoframe\n");
 		return;
@@ -463,11 +482,6 @@ static void vc4_hdmi_encoder_disable(struct drm_encoder *encoder)
 	HD_WRITE(VC4_HD_VID_CTL,
 		 HD_READ(VC4_HD_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE);
 
-	HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST);
-	udelay(1);
-	HD_WRITE(VC4_HD_M_CTL, 0);
-
-	clk_disable_unprepare(hdmi->hsm_clock);
 	clk_disable_unprepare(hdmi->pixel_clock);
 
 	ret = pm_runtime_put(&hdmi->pdev->dev);
@@ -509,16 +523,6 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder)
 		return;
 	}
 
-	/* This is the rate that is set by the firmware.  The number
-	 * needs to be a bit higher than the pixel clock rate
-	 * (generally 148.5Mhz).
-	 */
-	ret = clk_set_rate(hdmi->hsm_clock, 163682864);
-	if (ret) {
-		DRM_ERROR("Failed to set HSM clock rate: %d\n", ret);
-		return;
-	}
-
 	ret = clk_set_rate(hdmi->pixel_clock,
 			   mode->clock * 1000 *
 			   ((mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1));
@@ -533,20 +537,6 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder)
 		return;
 	}
 
-	ret = clk_prepare_enable(hdmi->hsm_clock);
-	if (ret) {
-		DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n",
-			  ret);
-		clk_disable_unprepare(hdmi->pixel_clock);
-		return;
-	}
-
-	HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST);
-	udelay(1);
-	HD_WRITE(VC4_HD_M_CTL, 0);
-
-	HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE);
-
 	HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL,
 		   VC4_HDMI_SW_RESET_HDMI |
 		   VC4_HDMI_SW_RESET_FORMAT_DETECT);
@@ -1150,6 +1140,159 @@ static void vc4_hdmi_audio_cleanup(struct vc4_hdmi *hdmi)
 		snd_soc_unregister_codec(dev);
 }
 
+#ifdef CONFIG_DRM_VC4_HDMI_CEC
+static irqreturn_t vc4_cec_irq_handler_thread(int irq, void *priv)
+{
+	struct vc4_dev *vc4 = priv;
+	struct vc4_hdmi *hdmi = vc4->hdmi;
+
+	if (hdmi->cec_irq_was_rx) {
+		if (hdmi->cec_rx_msg.len)
+			cec_received_msg(hdmi->cec_adap, &hdmi->cec_rx_msg);
+	} else if (hdmi->cec_tx_ok) {
+		cec_transmit_done(hdmi->cec_adap, CEC_TX_STATUS_OK,
+				  0, 0, 0, 0);
+	} else {
+		/*
+		 * This CEC implementation makes 1 retry, so if we
+		 * get a NACK, then that means it made 2 attempts.
+		 */
+		cec_transmit_done(hdmi->cec_adap, CEC_TX_STATUS_NACK,
+				  0, 2, 0, 0);
+	}
+	return IRQ_HANDLED;
+}
+
+static void vc4_cec_read_msg(struct vc4_dev *vc4, u32 cntrl1)
+{
+	struct cec_msg *msg = &vc4->hdmi->cec_rx_msg;
+	unsigned int i;
+
+	msg->len = 1 + ((cntrl1 & VC4_HDMI_CEC_REC_WRD_CNT_MASK) >>
+					VC4_HDMI_CEC_REC_WRD_CNT_SHIFT);
+	for (i = 0; i < msg->len; i += 4) {
+		u32 val = HDMI_READ(VC4_HDMI_CEC_RX_DATA_1 + i);
+
+		msg->msg[i] = val & 0xff;
+		msg->msg[i + 1] = (val >> 8) & 0xff;
+		msg->msg[i + 2] = (val >> 16) & 0xff;
+		msg->msg[i + 3] = (val >> 24) & 0xff;
+	}
+}
+
+static irqreturn_t vc4_cec_irq_handler(int irq, void *priv)
+{
+	struct vc4_dev *vc4 = priv;
+	struct vc4_hdmi *hdmi = vc4->hdmi;
+	u32 stat = HDMI_READ(VC4_HDMI_CPU_STATUS);
+	u32 cntrl1, cntrl5;
+
+	if (!(stat & VC4_HDMI_CPU_CEC))
+		return IRQ_NONE;
+	hdmi->cec_rx_msg.len = 0;
+	cntrl1 = HDMI_READ(VC4_HDMI_CEC_CNTRL_1);
+	cntrl5 = HDMI_READ(VC4_HDMI_CEC_CNTRL_5);
+	hdmi->cec_irq_was_rx = cntrl5 & VC4_HDMI_CEC_RX_CEC_INT;
+	if (hdmi->cec_irq_was_rx) {
+		vc4_cec_read_msg(vc4, cntrl1);
+		cntrl1 |= VC4_HDMI_CEC_CLEAR_RECEIVE_OFF;
+		HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1, cntrl1);
+		cntrl1 &= ~VC4_HDMI_CEC_CLEAR_RECEIVE_OFF;
+	} else {
+		hdmi->cec_tx_ok = cntrl1 & VC4_HDMI_CEC_TX_STATUS_GOOD;
+		cntrl1 &= ~VC4_HDMI_CEC_START_XMIT_BEGIN;
+	}
+	HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1, cntrl1);
+	HDMI_WRITE(VC4_HDMI_CPU_CLEAR, VC4_HDMI_CPU_CEC);
+
+	return IRQ_WAKE_THREAD;
+}
+
+static int vc4_hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable)
+{
+	struct vc4_dev *vc4 = cec_get_drvdata(adap);
+	/* clock period in microseconds */
+	const u32 usecs = 1000000 / CEC_CLOCK_FREQ;
+	u32 val = HDMI_READ(VC4_HDMI_CEC_CNTRL_5);
+
+	val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET |
+		 VC4_HDMI_CEC_CNT_TO_4700_US_MASK |
+		 VC4_HDMI_CEC_CNT_TO_4500_US_MASK);
+	val |= ((4700 / usecs) << VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT) |
+	       ((4500 / usecs) << VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT);
+
+	if (enable) {
+		HDMI_WRITE(VC4_HDMI_CEC_CNTRL_5, val |
+			   VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET);
+		HDMI_WRITE(VC4_HDMI_CEC_CNTRL_5, val);
+		HDMI_WRITE(VC4_HDMI_CEC_CNTRL_2,
+			 ((1500 / usecs) << VC4_HDMI_CEC_CNT_TO_1500_US_SHIFT) |
+			 ((1300 / usecs) << VC4_HDMI_CEC_CNT_TO_1300_US_SHIFT) |
+			 ((800 / usecs) << VC4_HDMI_CEC_CNT_TO_800_US_SHIFT) |
+			 ((600 / usecs) << VC4_HDMI_CEC_CNT_TO_600_US_SHIFT) |
+			 ((400 / usecs) << VC4_HDMI_CEC_CNT_TO_400_US_SHIFT));
+		HDMI_WRITE(VC4_HDMI_CEC_CNTRL_3,
+			 ((2750 / usecs) << VC4_HDMI_CEC_CNT_TO_2750_US_SHIFT) |
+			 ((2400 / usecs) << VC4_HDMI_CEC_CNT_TO_2400_US_SHIFT) |
+			 ((2050 / usecs) << VC4_HDMI_CEC_CNT_TO_2050_US_SHIFT) |
+			 ((1700 / usecs) << VC4_HDMI_CEC_CNT_TO_1700_US_SHIFT));
+		HDMI_WRITE(VC4_HDMI_CEC_CNTRL_4,
+			 ((4300 / usecs) << VC4_HDMI_CEC_CNT_TO_4300_US_SHIFT) |
+			 ((3900 / usecs) << VC4_HDMI_CEC_CNT_TO_3900_US_SHIFT) |
+			 ((3600 / usecs) << VC4_HDMI_CEC_CNT_TO_3600_US_SHIFT) |
+			 ((3500 / usecs) << VC4_HDMI_CEC_CNT_TO_3500_US_SHIFT));
+
+		HDMI_WRITE(VC4_HDMI_CPU_MASK_CLEAR, VC4_HDMI_CPU_CEC);
+	} else {
+		HDMI_WRITE(VC4_HDMI_CPU_MASK_SET, VC4_HDMI_CPU_CEC);
+		HDMI_WRITE(VC4_HDMI_CEC_CNTRL_5, val |
+			   VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET);
+	}
+	return 0;
+}
+
+static int vc4_hdmi_cec_adap_log_addr(struct cec_adapter *adap, u8 log_addr)
+{
+	struct vc4_dev *vc4 = cec_get_drvdata(adap);
+
+	HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1,
+		   (HDMI_READ(VC4_HDMI_CEC_CNTRL_1) & ~VC4_HDMI_CEC_ADDR_MASK) |
+		   (log_addr & 0xf) << VC4_HDMI_CEC_ADDR_SHIFT);
+	return 0;
+}
+
+static int vc4_hdmi_cec_adap_transmit(struct cec_adapter *adap, u8 attempts,
+				      u32 signal_free_time, struct cec_msg *msg)
+{
+	struct vc4_dev *vc4 = cec_get_drvdata(adap);
+	u32 val;
+	unsigned int i;
+
+	for (i = 0; i < msg->len; i += 4)
+		HDMI_WRITE(VC4_HDMI_CEC_TX_DATA_1 + i,
+			   (msg->msg[i]) |
+			   (msg->msg[i + 1] << 8) |
+			   (msg->msg[i + 2] << 16) |
+			   (msg->msg[i + 3] << 24));
+
+	val = HDMI_READ(VC4_HDMI_CEC_CNTRL_1);
+	val &= ~VC4_HDMI_CEC_START_XMIT_BEGIN;
+	HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1, val);
+	val &= ~VC4_HDMI_CEC_MESSAGE_LENGTH_MASK;
+	val |= (msg->len - 1) << VC4_HDMI_CEC_MESSAGE_LENGTH_SHIFT;
+	val |= VC4_HDMI_CEC_START_XMIT_BEGIN;
+
+	HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1, val);
+	return 0;
+}
+
+static const struct cec_adap_ops vc4_hdmi_cec_adap_ops = {
+	.adap_enable = vc4_hdmi_cec_adap_enable,
+	.adap_log_addr = vc4_hdmi_cec_adap_log_addr,
+	.adap_transmit = vc4_hdmi_cec_adap_transmit,
+};
+#endif
+
 static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -1205,6 +1348,23 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 		return -EPROBE_DEFER;
 	}
 
+	/* This is the rate that is set by the firmware.  The number
+	 * needs to be a bit higher than the pixel clock rate
+	 * (generally 148.5Mhz).
+	 */
+	ret = clk_set_rate(hdmi->hsm_clock, HSM_CLOCK_FREQ);
+	if (ret) {
+		DRM_ERROR("Failed to set HSM clock rate: %d\n", ret);
+		goto err_put_i2c;
+	}
+
+	ret = clk_prepare_enable(hdmi->hsm_clock);
+	if (ret) {
+		DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n",
+			  ret);
+		goto err_put_i2c;
+	}
+
 	/* Only use the GPIO HPD pin if present in the DT, otherwise
 	 * we'll use the HDMI core's register.
 	 */
@@ -1216,7 +1376,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 							 &hpd_gpio_flags);
 		if (hdmi->hpd_gpio < 0) {
 			ret = hdmi->hpd_gpio;
-			goto err_put_i2c;
+			goto err_unprepare_hsm;
 		}
 
 		hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW;
@@ -1224,6 +1384,14 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 
 	vc4->hdmi = hdmi;
 
+	/* HDMI core must be enabled. */
+	if (!(HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE)) {
+		HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST);
+		udelay(1);
+		HD_WRITE(VC4_HD_M_CTL, 0);
+
+		HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE);
+	}
 	pm_runtime_enable(dev);
 
 	drm_encoder_init(drm, hdmi->encoder, &vc4_hdmi_encoder_funcs,
@@ -1235,6 +1403,37 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 		ret = PTR_ERR(hdmi->connector);
 		goto err_destroy_encoder;
 	}
+#ifdef CONFIG_DRM_VC4_HDMI_CEC
+	hdmi->cec_adap = cec_allocate_adapter(&vc4_hdmi_cec_adap_ops,
+					      vc4, "vc4",
+					      CEC_CAP_TRANSMIT |
+					      CEC_CAP_LOG_ADDRS |
+					      CEC_CAP_PASSTHROUGH |
+					      CEC_CAP_RC, 1);
+	ret = PTR_ERR_OR_ZERO(hdmi->cec_adap);
+	if (ret < 0)
+		goto err_destroy_conn;
+	HDMI_WRITE(VC4_HDMI_CPU_MASK_SET, 0xffffffff);
+	value = HDMI_READ(VC4_HDMI_CEC_CNTRL_1);
+	value &= ~VC4_HDMI_CEC_DIV_CLK_CNT_MASK;
+	/*
+	 * Set the logical address to Unregistered and set the clock
+	 * divider: the hsm_clock rate and this divider setting will
+	 * give a 40 kHz CEC clock.
+	 */
+	value |= VC4_HDMI_CEC_ADDR_MASK |
+		 (4091 << VC4_HDMI_CEC_DIV_CLK_CNT_SHIFT);
+	HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1, value);
+	ret = devm_request_threaded_irq(dev, platform_get_irq(pdev, 0),
+					vc4_cec_irq_handler,
+					vc4_cec_irq_handler_thread, 0,
+					"vc4 hdmi cec", vc4);
+	if (ret)
+		goto err_delete_cec_adap;
+	ret = cec_register_adapter(hdmi->cec_adap, dev);
+	if (ret < 0)
+		goto err_delete_cec_adap;
+#endif
 
 	ret = vc4_hdmi_audio_init(hdmi);
 	if (ret)
@@ -1242,8 +1441,16 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 
 	return 0;
 
+#ifdef CONFIG_DRM_VC4_HDMI_CEC
+err_delete_cec_adap:
+	cec_delete_adapter(hdmi->cec_adap);
+err_destroy_conn:
+	vc4_hdmi_connector_destroy(hdmi->connector);
+#endif
 err_destroy_encoder:
 	vc4_hdmi_encoder_destroy(hdmi->encoder);
+err_unprepare_hsm:
+	clk_disable_unprepare(hdmi->hsm_clock);
 	pm_runtime_disable(dev);
 err_put_i2c:
 	put_device(&hdmi->ddc->dev);
@@ -1259,10 +1466,11 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master,
 	struct vc4_hdmi *hdmi = vc4->hdmi;
 
 	vc4_hdmi_audio_cleanup(hdmi);
-
+	cec_unregister_adapter(hdmi->cec_adap);
 	vc4_hdmi_connector_destroy(hdmi->connector);
 	vc4_hdmi_encoder_destroy(hdmi->encoder);
 
+	clk_disable_unprepare(hdmi->hsm_clock);
 	pm_runtime_disable(dev);
 
 	put_device(&hdmi->ddc->dev);
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index bc6ecdc6f104..50c4959b5bd3 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -20,6 +20,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include "vc4_drv.h"
 
 static void vc4_output_poll_changed(struct drm_device *dev)
@@ -29,16 +30,9 @@ static void vc4_output_poll_changed(struct drm_device *dev)
 	drm_fbdev_cma_hotplug_event(vc4->fbdev);
 }
 
-struct vc4_commit {
-	struct drm_device *dev;
-	struct drm_atomic_state *state;
-	struct vc4_seqno_cb cb;
-};
-
 static void
-vc4_atomic_complete_commit(struct vc4_commit *c)
+vc4_atomic_complete_commit(struct drm_atomic_state *state)
 {
-	struct drm_atomic_state *state = c->state;
 	struct drm_device *dev = state->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
@@ -72,28 +66,14 @@ vc4_atomic_complete_commit(struct vc4_commit *c)
 	drm_atomic_state_put(state);
 
 	up(&vc4->async_modeset);
-
-	kfree(c);
 }
 
-static void
-vc4_atomic_complete_commit_seqno_cb(struct vc4_seqno_cb *cb)
+static void commit_work(struct work_struct *work)
 {
-	struct vc4_commit *c = container_of(cb, struct vc4_commit, cb);
-
-	vc4_atomic_complete_commit(c);
-}
-
-static struct vc4_commit *commit_init(struct drm_atomic_state *state)
-{
-	struct vc4_commit *c = kzalloc(sizeof(*c), GFP_KERNEL);
-
-	if (!c)
-		return NULL;
-	c->dev = state->dev;
-	c->state = state;
-
-	return c;
+	struct drm_atomic_state *state = container_of(work,
+						      struct drm_atomic_state,
+						      commit_work);
+	vc4_atomic_complete_commit(state);
 }
 
 /**
@@ -115,40 +95,29 @@ static int vc4_atomic_commit(struct drm_device *dev,
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	int ret;
-	int i;
-	uint64_t wait_seqno = 0;
-	struct vc4_commit *c;
-	struct drm_plane *plane;
-	struct drm_plane_state *new_state;
-
-	c = commit_init(state);
-	if (!c)
-		return -ENOMEM;
 
 	ret = drm_atomic_helper_setup_commit(state, nonblock);
 	if (ret)
 		return ret;
 
+	INIT_WORK(&state->commit_work, commit_work);
+
 	ret = down_interruptible(&vc4->async_modeset);
-	if (ret) {
-		kfree(c);
+	if (ret)
 		return ret;
-	}
 
 	ret = drm_atomic_helper_prepare_planes(dev, state);
 	if (ret) {
-		kfree(c);
 		up(&vc4->async_modeset);
 		return ret;
 	}
 
-	for_each_plane_in_state(state, plane, new_state, i) {
-		if ((plane->state->fb != new_state->fb) && new_state->fb) {
-			struct drm_gem_cma_object *cma_bo =
-				drm_fb_cma_get_gem_obj(new_state->fb, 0);
-			struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
-
-			wait_seqno = max(bo->seqno, wait_seqno);
+	if (!nonblock) {
+		ret = drm_atomic_helper_wait_for_fences(dev, state, true);
+		if (ret) {
+			drm_atomic_helper_cleanup_planes(dev, state);
+			up(&vc4->async_modeset);
+			return ret;
 		}
 	}
 
@@ -158,7 +127,7 @@ static int vc4_atomic_commit(struct drm_device *dev,
 	 * the software side now.
 	 */
 
-	drm_atomic_helper_swap_state(state, true);
+	BUG_ON(drm_atomic_helper_swap_state(state, false) < 0);
 
 	/*
 	 * Everything below can be run asynchronously without the need to grab
@@ -177,13 +146,10 @@ static int vc4_atomic_commit(struct drm_device *dev,
 	 */
 
 	drm_atomic_state_get(state);
-	if (nonblock) {
-		vc4_queue_seqno_cb(dev, &c->cb, wait_seqno,
-				   vc4_atomic_complete_commit_seqno_cb);
-	} else {
-		vc4_wait_for_seqno(dev, wait_seqno, ~0ull, false);
-		vc4_atomic_complete_commit(c);
-	}
+	if (nonblock)
+		queue_work(system_unbound_wq, &state->commit_work);
+	else
+		vc4_atomic_complete_commit(state);
 
 	return 0;
 }
@@ -204,7 +170,7 @@ static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev,
 		gem_obj = drm_gem_object_lookup(file_priv,
 						mode_cmd->handles[0]);
 		if (!gem_obj) {
-			DRM_ERROR("Failed to look up GEM BO %d\n",
+			DRM_DEBUG("Failed to look up GEM BO %d\n",
 				  mode_cmd->handles[0]);
 			return ERR_PTR(-ENOENT);
 		}
@@ -219,12 +185,12 @@ static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev,
 			mode_cmd_local.modifier[0] = DRM_FORMAT_MOD_NONE;
 		}
 
-		drm_gem_object_unreference_unlocked(gem_obj);
+		drm_gem_object_put_unlocked(gem_obj);
 
 		mode_cmd = &mode_cmd_local;
 	}
 
-	return drm_fb_cma_create(dev, file_priv, mode_cmd);
+	return drm_gem_fb_create(dev, file_priv, mode_cmd);
 }
 
 static const struct drm_mode_config_funcs vc4_mode_funcs = {
@@ -241,6 +207,9 @@ int vc4_kms_load(struct drm_device *dev)
 
 	sema_init(&vc4->async_modeset, 1);
 
+	/* Set support for vblank irq fast disable, before drm_vblank_init() */
+	dev->vblank_disable_immediate = true;
+
 	ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to initialize vblank\n");
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index fa6809d8b0fe..423a23ed8fc2 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -23,6 +23,7 @@
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_plane_helper.h>
 
+#include "uapi/drm/vc4_drm.h"
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
@@ -547,14 +548,24 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 		tiling = SCALER_CTL0_TILING_LINEAR;
 		pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
 		break;
-	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
+
+	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
+		/* For T-tiled, the FB pitch is "how many bytes from
+		 * one row to the next, such that pitch * tile_h ==
+		 * tile_size * tiles_per_row."
+		 */
+		u32 tile_size_shift = 12; /* T tiles are 4kb */
+		u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
+		u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
+
 		tiling = SCALER_CTL0_TILING_256B_OR_T;
 
-		pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET),
-			  VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L),
-			  VC4_SET_FIELD((vc4_state->src_w[0] + 31) >> 5,
-					SCALER_PITCH0_TILE_WIDTH_R));
+		pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET) |
+			  VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L) |
+			  VC4_SET_FIELD(tiles_w, SCALER_PITCH0_TILE_WIDTH_R));
 		break;
+	}
+
 	default:
 		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
 			      (long long)fb->modifier);
@@ -759,9 +770,45 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
 	vc4_state->dlist[vc4_state->ptr0_offset] = addr;
 }
 
+static int vc4_prepare_fb(struct drm_plane *plane,
+			  struct drm_plane_state *state)
+{
+	struct vc4_bo *bo;
+	struct dma_fence *fence;
+	int ret;
+
+	if ((plane->state->fb == state->fb) || !state->fb)
+		return 0;
+
+	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
+
+	ret = vc4_bo_inc_usecnt(bo);
+	if (ret)
+		return ret;
+
+	fence = reservation_object_get_excl_rcu(bo->resv);
+	drm_atomic_set_fence_for_plane(state, fence);
+
+	return 0;
+}
+
+static void vc4_cleanup_fb(struct drm_plane *plane,
+			   struct drm_plane_state *state)
+{
+	struct vc4_bo *bo;
+
+	if (plane->state->fb == state->fb || !state->fb)
+		return;
+
+	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
+	vc4_bo_dec_usecnt(bo);
+}
+
 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
 	.atomic_check = vc4_plane_atomic_check,
 	.atomic_update = vc4_plane_atomic_update,
+	.prepare_fb = vc4_prepare_fb,
+	.cleanup_fb = vc4_cleanup_fb,
 };
 
 static void vc4_plane_destroy(struct drm_plane *plane)
@@ -885,7 +932,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
 	ret = drm_universal_plane_init(dev, plane, 0,
 				       &vc4_plane_funcs,
 				       formats, num_formats,
-				       type, NULL);
+				       NULL, type, NULL);
 
 	drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
 
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index d382c34c1b9e..55677bd50f66 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -561,16 +561,129 @@
 # define VC4_HDMI_VERTB_VBP_MASK		VC4_MASK(8, 0)
 # define VC4_HDMI_VERTB_VBP_SHIFT		0
 
+#define VC4_HDMI_CEC_CNTRL_1			0x0e8
+/* Set when the transmission has ended. */
+# define VC4_HDMI_CEC_TX_EOM			BIT(31)
+/* If set, transmission was acked on the 1st or 2nd attempt (only one
+ * retry is attempted).  If in continuous mode, this means TX needs to
+ * be filled if !TX_EOM.
+ */
+# define VC4_HDMI_CEC_TX_STATUS_GOOD		BIT(30)
+# define VC4_HDMI_CEC_RX_EOM			BIT(29)
+# define VC4_HDMI_CEC_RX_STATUS_GOOD		BIT(28)
+/* Number of bytes received for the message. */
+# define VC4_HDMI_CEC_REC_WRD_CNT_MASK		VC4_MASK(27, 24)
+# define VC4_HDMI_CEC_REC_WRD_CNT_SHIFT		24
+/* Sets continuous receive mode.  Generates interrupt after each 8
+ * bytes to signal that RX_DATA should be consumed, and at RX_EOM.
+ *
+ * If disabled, maximum 16 bytes will be received (including header),
+ * and interrupt at RX_EOM.  Later bytes will be acked but not put
+ * into the RX_DATA.
+ */
+# define VC4_HDMI_CEC_RX_CONTINUE		BIT(23)
+# define VC4_HDMI_CEC_TX_CONTINUE		BIT(22)
+/* Set this after a CEC interrupt. */
+# define VC4_HDMI_CEC_CLEAR_RECEIVE_OFF		BIT(21)
+/* Starts a TX.  Will wait for appropriate idel time before CEC
+ * activity. Must be cleared in between transmits.
+ */
+# define VC4_HDMI_CEC_START_XMIT_BEGIN		BIT(20)
+# define VC4_HDMI_CEC_MESSAGE_LENGTH_MASK	VC4_MASK(19, 16)
+# define VC4_HDMI_CEC_MESSAGE_LENGTH_SHIFT	16
+/* Device's CEC address */
+# define VC4_HDMI_CEC_ADDR_MASK			VC4_MASK(15, 12)
+# define VC4_HDMI_CEC_ADDR_SHIFT		12
+/* Divides off of HSM clock to generate CEC bit clock. */
+/* With the current defaults the CEC bit clock is 40 kHz = 25 usec */
+# define VC4_HDMI_CEC_DIV_CLK_CNT_MASK		VC4_MASK(11, 0)
+# define VC4_HDMI_CEC_DIV_CLK_CNT_SHIFT		0
+
+/* Set these fields to how many bit clock cycles get to that many
+ * microseconds.
+ */
+#define VC4_HDMI_CEC_CNTRL_2			0x0ec
+# define VC4_HDMI_CEC_CNT_TO_1500_US_MASK	VC4_MASK(30, 24)
+# define VC4_HDMI_CEC_CNT_TO_1500_US_SHIFT	24
+# define VC4_HDMI_CEC_CNT_TO_1300_US_MASK	VC4_MASK(23, 17)
+# define VC4_HDMI_CEC_CNT_TO_1300_US_SHIFT	17
+# define VC4_HDMI_CEC_CNT_TO_800_US_MASK	VC4_MASK(16, 11)
+# define VC4_HDMI_CEC_CNT_TO_800_US_SHIFT	11
+# define VC4_HDMI_CEC_CNT_TO_600_US_MASK	VC4_MASK(10, 5)
+# define VC4_HDMI_CEC_CNT_TO_600_US_SHIFT	5
+# define VC4_HDMI_CEC_CNT_TO_400_US_MASK	VC4_MASK(4, 0)
+# define VC4_HDMI_CEC_CNT_TO_400_US_SHIFT	0
+
+#define VC4_HDMI_CEC_CNTRL_3			0x0f0
+# define VC4_HDMI_CEC_CNT_TO_2750_US_MASK	VC4_MASK(31, 24)
+# define VC4_HDMI_CEC_CNT_TO_2750_US_SHIFT	24
+# define VC4_HDMI_CEC_CNT_TO_2400_US_MASK	VC4_MASK(23, 16)
+# define VC4_HDMI_CEC_CNT_TO_2400_US_SHIFT	16
+# define VC4_HDMI_CEC_CNT_TO_2050_US_MASK	VC4_MASK(15, 8)
+# define VC4_HDMI_CEC_CNT_TO_2050_US_SHIFT	8
+# define VC4_HDMI_CEC_CNT_TO_1700_US_MASK	VC4_MASK(7, 0)
+# define VC4_HDMI_CEC_CNT_TO_1700_US_SHIFT	0
+
+#define VC4_HDMI_CEC_CNTRL_4			0x0f4
+# define VC4_HDMI_CEC_CNT_TO_4300_US_MASK	VC4_MASK(31, 24)
+# define VC4_HDMI_CEC_CNT_TO_4300_US_SHIFT	24
+# define VC4_HDMI_CEC_CNT_TO_3900_US_MASK	VC4_MASK(23, 16)
+# define VC4_HDMI_CEC_CNT_TO_3900_US_SHIFT	16
+# define VC4_HDMI_CEC_CNT_TO_3600_US_MASK	VC4_MASK(15, 8)
+# define VC4_HDMI_CEC_CNT_TO_3600_US_SHIFT	8
+# define VC4_HDMI_CEC_CNT_TO_3500_US_MASK	VC4_MASK(7, 0)
+# define VC4_HDMI_CEC_CNT_TO_3500_US_SHIFT	0
+
+#define VC4_HDMI_CEC_CNTRL_5			0x0f8
+# define VC4_HDMI_CEC_TX_SW_RESET		BIT(27)
+# define VC4_HDMI_CEC_RX_SW_RESET		BIT(26)
+# define VC4_HDMI_CEC_PAD_SW_RESET		BIT(25)
+# define VC4_HDMI_CEC_MUX_TP_OUT_CEC		BIT(24)
+# define VC4_HDMI_CEC_RX_CEC_INT		BIT(23)
+# define VC4_HDMI_CEC_CLK_PRELOAD_MASK		VC4_MASK(22, 16)
+# define VC4_HDMI_CEC_CLK_PRELOAD_SHIFT		16
+# define VC4_HDMI_CEC_CNT_TO_4700_US_MASK	VC4_MASK(15, 8)
+# define VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT	8
+# define VC4_HDMI_CEC_CNT_TO_4500_US_MASK	VC4_MASK(7, 0)
+# define VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT	0
+
+/* Transmit data, first byte is low byte of the 32-bit reg.  MSB of
+ * each byte transmitted first.
+ */
+#define VC4_HDMI_CEC_TX_DATA_1			0x0fc
+#define VC4_HDMI_CEC_TX_DATA_2			0x100
+#define VC4_HDMI_CEC_TX_DATA_3			0x104
+#define VC4_HDMI_CEC_TX_DATA_4			0x108
+#define VC4_HDMI_CEC_RX_DATA_1			0x10c
+#define VC4_HDMI_CEC_RX_DATA_2			0x110
+#define VC4_HDMI_CEC_RX_DATA_3			0x114
+#define VC4_HDMI_CEC_RX_DATA_4			0x118
+
 #define VC4_HDMI_TX_PHY_RESET_CTL		0x2c0
 
 #define VC4_HDMI_TX_PHY_CTL0			0x2c4
 # define VC4_HDMI_TX_PHY_RNG_PWRDN		BIT(25)
 
+/* Interrupt status bits */
+#define VC4_HDMI_CPU_STATUS			0x340
+#define VC4_HDMI_CPU_SET			0x344
+#define VC4_HDMI_CPU_CLEAR			0x348
+# define VC4_HDMI_CPU_CEC			BIT(6)
+# define VC4_HDMI_CPU_HOTPLUG			BIT(0)
+
+#define VC4_HDMI_CPU_MASK_STATUS		0x34c
+#define VC4_HDMI_CPU_MASK_SET			0x350
+#define VC4_HDMI_CPU_MASK_CLEAR			0x354
+
 #define VC4_HDMI_GCP(x)				(0x400 + ((x) * 0x4))
 #define VC4_HDMI_RAM_PACKET(x)			(0x400 + ((x) * 0x24))
 #define VC4_HDMI_PACKET_STRIDE			0x24
 
 #define VC4_HD_M_CTL				0x00c
+/* Debug: Current receive value on the CEC pad. */
+# define VC4_HD_CECRXD				BIT(9)
+/* Debug: Override CEC output to 0. */
+# define VC4_HD_CECOVR				BIT(8)
 # define VC4_HD_M_REGISTER_FILE_STANDBY		(3 << 6)
 # define VC4_HD_M_RAM_STANDBY			(3 << 4)
 # define VC4_HD_M_SW_RST			BIT(2)
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
index 5dc19429d4ae..273984f71ae2 100644
--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -261,8 +261,17 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
 	uint8_t max_y_tile = args->max_y_tile;
 	uint8_t xtiles = max_x_tile - min_x_tile + 1;
 	uint8_t ytiles = max_y_tile - min_y_tile + 1;
-	uint8_t x, y;
+	uint8_t xi, yi;
 	uint32_t size, loop_body_size;
+	bool positive_x = true;
+	bool positive_y = true;
+
+	if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) {
+		if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X))
+			positive_x = false;
+		if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y))
+			positive_y = false;
+	}
 
 	size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
 	loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
@@ -320,7 +329,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
 
 	size += xtiles * ytiles * loop_body_size;
 
-	setup->rcl = &vc4_bo_create(dev, size, true)->base;
+	setup->rcl = &vc4_bo_create(dev, size, true, VC4_BO_TYPE_RCL)->base;
 	if (IS_ERR(setup->rcl))
 		return PTR_ERR(setup->rcl);
 	list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
@@ -354,10 +363,12 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
 	rcl_u16(setup, args->height);
 	rcl_u16(setup, args->color_write.bits);
 
-	for (y = min_y_tile; y <= max_y_tile; y++) {
-		for (x = min_x_tile; x <= max_x_tile; x++) {
-			bool first = (x == min_x_tile && y == min_y_tile);
-			bool last = (x == max_x_tile && y == max_y_tile);
+	for (yi = 0; yi < ytiles; yi++) {
+		int y = positive_y ? min_y_tile + yi : max_y_tile - yi;
+		for (xi = 0; xi < xtiles; xi++) {
+			int x = positive_x ? min_x_tile + xi : max_x_tile - xi;
+			bool first = (xi == 0 && yi == 0);
+			bool last = (xi == xtiles - 1 && yi == ytiles - 1);
 
 			emit_tile(exec, setup, x, y, first, last);
 		}
@@ -378,14 +389,14 @@ static int vc4_full_res_bounds_check(struct vc4_exec_info *exec,
 	u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width, 32);
 
 	if (surf->offset > obj->base.size) {
-		DRM_ERROR("surface offset %d > BO size %zd\n",
+		DRM_DEBUG("surface offset %d > BO size %zd\n",
 			  surf->offset, obj->base.size);
 		return -EINVAL;
 	}
 
 	if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE <
 	    render_tiles_stride * args->max_y_tile + args->max_x_tile) {
-		DRM_ERROR("MSAA tile %d, %d out of bounds "
+		DRM_DEBUG("MSAA tile %d, %d out of bounds "
 			  "(bo size %zd, offset %d).\n",
 			  args->max_x_tile, args->max_y_tile,
 			  obj->base.size,
@@ -401,7 +412,7 @@ static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,
 				      struct drm_vc4_submit_rcl_surface *surf)
 {
 	if (surf->flags != 0 || surf->bits != 0) {
-		DRM_ERROR("MSAA surface had nonzero flags/bits\n");
+		DRM_DEBUG("MSAA surface had nonzero flags/bits\n");
 		return -EINVAL;
 	}
 
@@ -415,7 +426,7 @@ static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,
 	exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
 
 	if (surf->offset & 0xf) {
-		DRM_ERROR("MSAA write must be 16b aligned.\n");
+		DRM_DEBUG("MSAA write must be 16b aligned.\n");
 		return -EINVAL;
 	}
 
@@ -437,7 +448,7 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
 	int ret;
 
 	if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
-		DRM_ERROR("Extra flags set\n");
+		DRM_DEBUG("Extra flags set\n");
 		return -EINVAL;
 	}
 
@@ -453,12 +464,12 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
 
 	if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
 		if (surf == &exec->args->zs_write) {
-			DRM_ERROR("general zs write may not be a full-res.\n");
+			DRM_DEBUG("general zs write may not be a full-res.\n");
 			return -EINVAL;
 		}
 
 		if (surf->bits != 0) {
-			DRM_ERROR("load/store general bits set with "
+			DRM_DEBUG("load/store general bits set with "
 				  "full res load/store.\n");
 			return -EINVAL;
 		}
@@ -473,19 +484,19 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
 	if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
 			   VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |
 			   VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {
-		DRM_ERROR("Unknown bits in load/store: 0x%04x\n",
+		DRM_DEBUG("Unknown bits in load/store: 0x%04x\n",
 			  surf->bits);
 		return -EINVAL;
 	}
 
 	if (tiling > VC4_TILING_FORMAT_LT) {
-		DRM_ERROR("Bad tiling format\n");
+		DRM_DEBUG("Bad tiling format\n");
 		return -EINVAL;
 	}
 
 	if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) {
 		if (format != 0) {
-			DRM_ERROR("No color format should be set for ZS\n");
+			DRM_DEBUG("No color format should be set for ZS\n");
 			return -EINVAL;
 		}
 		cpp = 4;
@@ -499,16 +510,16 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
 			cpp = 4;
 			break;
 		default:
-			DRM_ERROR("Bad tile buffer format\n");
+			DRM_DEBUG("Bad tile buffer format\n");
 			return -EINVAL;
 		}
 	} else {
-		DRM_ERROR("Bad load/store buffer %d.\n", buffer);
+		DRM_DEBUG("Bad load/store buffer %d.\n", buffer);
 		return -EINVAL;
 	}
 
 	if (surf->offset & 0xf) {
-		DRM_ERROR("load/store buffer must be 16b aligned.\n");
+		DRM_DEBUG("load/store buffer must be 16b aligned.\n");
 		return -EINVAL;
 	}
 
@@ -533,7 +544,7 @@ vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
 	int cpp;
 
 	if (surf->flags != 0) {
-		DRM_ERROR("No flags supported on render config.\n");
+		DRM_DEBUG("No flags supported on render config.\n");
 		return -EINVAL;
 	}
 
@@ -541,7 +552,7 @@ vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
 			   VC4_RENDER_CONFIG_FORMAT_MASK |
 			   VC4_RENDER_CONFIG_MS_MODE_4X |
 			   VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) {
-		DRM_ERROR("Unknown bits in render config: 0x%04x\n",
+		DRM_DEBUG("Unknown bits in render config: 0x%04x\n",
 			  surf->bits);
 		return -EINVAL;
 	}
@@ -556,7 +567,7 @@ vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
 	exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
 
 	if (tiling > VC4_TILING_FORMAT_LT) {
-		DRM_ERROR("Bad tiling format\n");
+		DRM_DEBUG("Bad tiling format\n");
 		return -EINVAL;
 	}
 
@@ -569,7 +580,7 @@ vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
 		cpp = 4;
 		break;
 	default:
-		DRM_ERROR("Bad tile buffer format\n");
+		DRM_DEBUG("Bad tile buffer format\n");
 		return -EINVAL;
 	}
 
@@ -590,7 +601,7 @@ int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
 
 	if (args->min_x_tile > args->max_x_tile ||
 	    args->min_y_tile > args->max_y_tile) {
-		DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",
+		DRM_DEBUG("Bad render tile set (%d,%d)-(%d,%d)\n",
 			  args->min_x_tile, args->min_y_tile,
 			  args->max_x_tile, args->max_y_tile);
 		return -EINVAL;
@@ -599,7 +610,7 @@ int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
 	if (has_bin &&
 	    (args->max_x_tile > exec->bin_tiles_x ||
 	     args->max_y_tile > exec->bin_tiles_y)) {
-		DRM_ERROR("Render tiles (%d,%d) outside of bin config "
+		DRM_DEBUG("Render tiles (%d,%d) outside of bin config "
 			  "(%d,%d)\n",
 			  args->max_x_tile, args->max_y_tile,
 			  exec->bin_tiles_x, exec->bin_tiles_y);
@@ -642,7 +653,7 @@ int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
 	 */
 	if (!setup.color_write && !setup.zs_write &&
 	    !setup.msaa_color_write && !setup.msaa_zs_write) {
-		DRM_ERROR("RCL requires color or Z/S write\n");
+		DRM_DEBUG("RCL requires color or Z/S write\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/vc4/vc4_trace.h b/drivers/gpu/drm/vc4/vc4_trace.h
index ad7b1ea720c2..deafb32923e1 100644
--- a/drivers/gpu/drm/vc4/vc4_trace.h
+++ b/drivers/gpu/drm/vc4/vc4_trace.h
@@ -59,5 +59,5 @@ TRACE_EVENT(vc4_wait_for_seqno_end,
 
 /* This part must be outside protection */
 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/vc4
 #include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 8c723da71f66..622cd43840b8 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -236,7 +236,8 @@ vc4_allocate_bin_bo(struct drm_device *drm)
 	INIT_LIST_HEAD(&list);
 
 	while (true) {
-		struct vc4_bo *bo = vc4_bo_create(drm, size, true);
+		struct vc4_bo *bo = vc4_bo_create(drm, size, true,
+						  VC4_BO_TYPE_BIN);
 
 		if (IS_ERR(bo)) {
 			ret = PTR_ERR(bo);
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
index 814b512c6b9a..2db485abb186 100644
--- a/drivers/gpu/drm/vc4/vc4_validate.c
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -109,7 +109,7 @@ vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
 	struct vc4_bo *bo;
 
 	if (hindex >= exec->bo_count) {
-		DRM_ERROR("BO index %d greater than BO count %d\n",
+		DRM_DEBUG("BO index %d greater than BO count %d\n",
 			  hindex, exec->bo_count);
 		return NULL;
 	}
@@ -117,7 +117,7 @@ vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
 	bo = to_vc4_bo(&obj->base);
 
 	if (bo->validated_shader) {
-		DRM_ERROR("Trying to use shader BO as something other than "
+		DRM_DEBUG("Trying to use shader BO as something other than "
 			  "a shader\n");
 		return NULL;
 	}
@@ -172,7 +172,7 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
 	 * our math.
 	 */
 	if (width > 4096 || height > 4096) {
-		DRM_ERROR("Surface dimensions (%d,%d) too large",
+		DRM_DEBUG("Surface dimensions (%d,%d) too large",
 			  width, height);
 		return false;
 	}
@@ -191,7 +191,7 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
 		aligned_height = round_up(height, utile_h);
 		break;
 	default:
-		DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
+		DRM_DEBUG("buffer tiling %d unsupported\n", tiling_format);
 		return false;
 	}
 
@@ -200,7 +200,7 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
 
 	if (size + offset < size ||
 	    size + offset > fbo->base.size) {
-		DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
+		DRM_DEBUG("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
 			  width, height,
 			  aligned_width, aligned_height,
 			  size, offset, fbo->base.size);
@@ -214,7 +214,7 @@ static int
 validate_flush(VALIDATE_ARGS)
 {
 	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
-		DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
+		DRM_DEBUG("Bin CL must end with VC4_PACKET_FLUSH\n");
 		return -EINVAL;
 	}
 	exec->found_flush = true;
@@ -226,13 +226,13 @@ static int
 validate_start_tile_binning(VALIDATE_ARGS)
 {
 	if (exec->found_start_tile_binning_packet) {
-		DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
+		DRM_DEBUG("Duplicate VC4_PACKET_START_TILE_BINNING\n");
 		return -EINVAL;
 	}
 	exec->found_start_tile_binning_packet = true;
 
 	if (!exec->found_tile_binning_mode_config_packet) {
-		DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
+		DRM_DEBUG("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
 		return -EINVAL;
 	}
 
@@ -243,7 +243,7 @@ static int
 validate_increment_semaphore(VALIDATE_ARGS)
 {
 	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
-		DRM_ERROR("Bin CL must end with "
+		DRM_DEBUG("Bin CL must end with "
 			  "VC4_PACKET_INCREMENT_SEMAPHORE\n");
 		return -EINVAL;
 	}
@@ -264,7 +264,7 @@ validate_indexed_prim_list(VALIDATE_ARGS)
 
 	/* Check overflow condition */
 	if (exec->shader_state_count == 0) {
-		DRM_ERROR("shader state must precede primitives\n");
+		DRM_DEBUG("shader state must precede primitives\n");
 		return -EINVAL;
 	}
 	shader_state = &exec->shader_state[exec->shader_state_count - 1];
@@ -281,7 +281,7 @@ validate_indexed_prim_list(VALIDATE_ARGS)
 
 	if (offset > ib->base.size ||
 	    (ib->base.size - offset) / index_size < length) {
-		DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
+		DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n",
 			  offset, length, index_size, ib->base.size);
 		return -EINVAL;
 	}
@@ -301,13 +301,13 @@ validate_gl_array_primitive(VALIDATE_ARGS)
 
 	/* Check overflow condition */
 	if (exec->shader_state_count == 0) {
-		DRM_ERROR("shader state must precede primitives\n");
+		DRM_DEBUG("shader state must precede primitives\n");
 		return -EINVAL;
 	}
 	shader_state = &exec->shader_state[exec->shader_state_count - 1];
 
 	if (length + base_index < length) {
-		DRM_ERROR("primitive vertex count overflow\n");
+		DRM_DEBUG("primitive vertex count overflow\n");
 		return -EINVAL;
 	}
 	max_index = length + base_index - 1;
@@ -324,7 +324,7 @@ validate_gl_shader_state(VALIDATE_ARGS)
 	uint32_t i = exec->shader_state_count++;
 
 	if (i >= exec->shader_state_size) {
-		DRM_ERROR("More requests for shader states than declared\n");
+		DRM_DEBUG("More requests for shader states than declared\n");
 		return -EINVAL;
 	}
 
@@ -332,7 +332,7 @@ validate_gl_shader_state(VALIDATE_ARGS)
 	exec->shader_state[i].max_index = 0;
 
 	if (exec->shader_state[i].addr & ~0xf) {
-		DRM_ERROR("high bits set in GL shader rec reference\n");
+		DRM_DEBUG("high bits set in GL shader rec reference\n");
 		return -EINVAL;
 	}
 
@@ -356,7 +356,7 @@ validate_tile_binning_config(VALIDATE_ARGS)
 	int bin_slot;
 
 	if (exec->found_tile_binning_mode_config_packet) {
-		DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
+		DRM_DEBUG("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
 		return -EINVAL;
 	}
 	exec->found_tile_binning_mode_config_packet = true;
@@ -368,14 +368,14 @@ validate_tile_binning_config(VALIDATE_ARGS)
 
 	if (exec->bin_tiles_x == 0 ||
 	    exec->bin_tiles_y == 0) {
-		DRM_ERROR("Tile binning config of %dx%d too small\n",
+		DRM_DEBUG("Tile binning config of %dx%d too small\n",
 			  exec->bin_tiles_x, exec->bin_tiles_y);
 		return -EINVAL;
 	}
 
 	if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
 		     VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
-		DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);
+		DRM_DEBUG("unsupported binning config flags 0x%02x\n", flags);
 		return -EINVAL;
 	}
 
@@ -493,20 +493,20 @@ vc4_validate_bin_cl(struct drm_device *dev,
 		const struct cmd_info *info;
 
 		if (cmd >= ARRAY_SIZE(cmd_info)) {
-			DRM_ERROR("0x%08x: packet %d out of bounds\n",
+			DRM_DEBUG("0x%08x: packet %d out of bounds\n",
 				  src_offset, cmd);
 			return -EINVAL;
 		}
 
 		info = &cmd_info[cmd];
 		if (!info->name) {
-			DRM_ERROR("0x%08x: packet %d invalid\n",
+			DRM_DEBUG("0x%08x: packet %d invalid\n",
 				  src_offset, cmd);
 			return -EINVAL;
 		}
 
 		if (src_offset + info->len > len) {
-			DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
+			DRM_DEBUG("0x%08x: packet %d (%s) length 0x%08x "
 				  "exceeds bounds (0x%08x)\n",
 				  src_offset, cmd, info->name, info->len,
 				  src_offset + len);
@@ -519,7 +519,7 @@ vc4_validate_bin_cl(struct drm_device *dev,
 		if (info->func && info->func(exec,
 					     dst_pkt + 1,
 					     src_pkt + 1)) {
-			DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
+			DRM_DEBUG("0x%08x: packet %d (%s) failed to validate\n",
 				  src_offset, cmd, info->name);
 			return -EINVAL;
 		}
@@ -537,7 +537,7 @@ vc4_validate_bin_cl(struct drm_device *dev,
 	exec->ct0ea = exec->ct0ca + dst_offset;
 
 	if (!exec->found_start_tile_binning_packet) {
-		DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
+		DRM_DEBUG("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
 		return -EINVAL;
 	}
 
@@ -549,7 +549,7 @@ vc4_validate_bin_cl(struct drm_device *dev,
 	 * semaphore increment.
 	 */
 	if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
-		DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
+		DRM_DEBUG("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
 			  "VC4_PACKET_FLUSH\n");
 		return -EINVAL;
 	}
@@ -588,11 +588,11 @@ reloc_tex(struct vc4_exec_info *exec,
 		uint32_t remaining_size = tex->base.size - p0;
 
 		if (p0 > tex->base.size - 4) {
-			DRM_ERROR("UBO offset greater than UBO size\n");
+			DRM_DEBUG("UBO offset greater than UBO size\n");
 			goto fail;
 		}
 		if (p1 > remaining_size - 4) {
-			DRM_ERROR("UBO clamp would allow reads "
+			DRM_DEBUG("UBO clamp would allow reads "
 				  "outside of UBO\n");
 			goto fail;
 		}
@@ -612,14 +612,14 @@ reloc_tex(struct vc4_exec_info *exec,
 		if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
 		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
 			if (cube_map_stride) {
-				DRM_ERROR("Cube map stride set twice\n");
+				DRM_DEBUG("Cube map stride set twice\n");
 				goto fail;
 			}
 
 			cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
 		}
 		if (!cube_map_stride) {
-			DRM_ERROR("Cube map stride not set\n");
+			DRM_DEBUG("Cube map stride not set\n");
 			goto fail;
 		}
 	}
@@ -660,7 +660,7 @@ reloc_tex(struct vc4_exec_info *exec,
 	case VC4_TEXTURE_TYPE_RGBA64:
 	case VC4_TEXTURE_TYPE_YUV422R:
 	default:
-		DRM_ERROR("Texture format %d unsupported\n", type);
+		DRM_DEBUG("Texture format %d unsupported\n", type);
 		goto fail;
 	}
 	utile_w = utile_width(cpp);
@@ -713,7 +713,7 @@ reloc_tex(struct vc4_exec_info *exec,
 		level_size = aligned_width * cpp * aligned_height;
 
 		if (offset < level_size) {
-			DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
+			DRM_DEBUG("Level %d (%dx%d -> %dx%d) size %db "
 				  "overflowed buffer bounds (offset %d)\n",
 				  i, level_width, level_height,
 				  aligned_width, aligned_height,
@@ -764,7 +764,7 @@ validate_gl_shader_rec(struct drm_device *dev,
 
 	nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
 	if (nr_relocs * 4 > exec->shader_rec_size) {
-		DRM_ERROR("overflowed shader recs reading %d handles "
+		DRM_DEBUG("overflowed shader recs reading %d handles "
 			  "from %d bytes left\n",
 			  nr_relocs, exec->shader_rec_size);
 		return -EINVAL;
@@ -774,7 +774,7 @@ validate_gl_shader_rec(struct drm_device *dev,
 	exec->shader_rec_size -= nr_relocs * 4;
 
 	if (packet_size > exec->shader_rec_size) {
-		DRM_ERROR("overflowed shader recs copying %db packet "
+		DRM_DEBUG("overflowed shader recs copying %db packet "
 			  "from %d bytes left\n",
 			  packet_size, exec->shader_rec_size);
 		return -EINVAL;
@@ -794,7 +794,7 @@ validate_gl_shader_rec(struct drm_device *dev,
 
 	for (i = 0; i < shader_reloc_count; i++) {
 		if (src_handles[i] > exec->bo_count) {
-			DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
+			DRM_DEBUG("Shader handle %d too big\n", src_handles[i]);
 			return -EINVAL;
 		}
 
@@ -810,13 +810,13 @@ validate_gl_shader_rec(struct drm_device *dev,
 
 	if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
 	    to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
-		DRM_ERROR("Thread mode of CL and FS do not match\n");
+		DRM_DEBUG("Thread mode of CL and FS do not match\n");
 		return -EINVAL;
 	}
 
 	if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
 	    to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
-		DRM_ERROR("cs and vs cannot be threaded\n");
+		DRM_DEBUG("cs and vs cannot be threaded\n");
 		return -EINVAL;
 	}
 
@@ -831,7 +831,7 @@ validate_gl_shader_rec(struct drm_device *dev,
 		*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
 
 		if (src_offset != 0) {
-			DRM_ERROR("Shaders must be at offset 0 of "
+			DRM_DEBUG("Shaders must be at offset 0 of "
 				  "the BO.\n");
 			return -EINVAL;
 		}
@@ -842,7 +842,7 @@ validate_gl_shader_rec(struct drm_device *dev,
 
 		if (validated_shader->uniforms_src_size >
 		    exec->uniforms_size) {
-			DRM_ERROR("Uniforms src buffer overflow\n");
+			DRM_DEBUG("Uniforms src buffer overflow\n");
 			return -EINVAL;
 		}
 
@@ -900,7 +900,7 @@ validate_gl_shader_rec(struct drm_device *dev,
 
 		if (vbo->base.size < offset ||
 		    vbo->base.size - offset < attr_size) {
-			DRM_ERROR("BO offset overflow (%d + %d > %zu)\n",
+			DRM_DEBUG("BO offset overflow (%d + %d > %zu)\n",
 				  offset, attr_size, vbo->base.size);
 			return -EINVAL;
 		}
@@ -909,7 +909,7 @@ validate_gl_shader_rec(struct drm_device *dev,
 			max_index = ((vbo->base.size - offset - attr_size) /
 				     stride);
 			if (state->max_index > max_index) {
-				DRM_ERROR("primitives use index %d out of "
+				DRM_DEBUG("primitives use index %d out of "
 					  "supplied %d\n",
 					  state->max_index, max_index);
 				return -EINVAL;
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
index 0b2df5c6efb4..d3f15bf60900 100644
--- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c
+++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
@@ -200,7 +200,7 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader,
 		uint32_t clamp_reg, clamp_offset;
 
 		if (sig == QPU_SIG_SMALL_IMM) {
-			DRM_ERROR("direct TMU read used small immediate\n");
+			DRM_DEBUG("direct TMU read used small immediate\n");
 			return false;
 		}
 
@@ -209,7 +209,7 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader,
 		 */
 		if (is_mul ||
 		    QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
-			DRM_ERROR("direct TMU load wasn't an add\n");
+			DRM_DEBUG("direct TMU load wasn't an add\n");
 			return false;
 		}
 
@@ -220,13 +220,13 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader,
 		 */
 		clamp_reg = raddr_add_a_to_live_reg_index(inst);
 		if (clamp_reg == ~0) {
-			DRM_ERROR("direct TMU load wasn't clamped\n");
+			DRM_DEBUG("direct TMU load wasn't clamped\n");
 			return false;
 		}
 
 		clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
 		if (clamp_offset == ~0) {
-			DRM_ERROR("direct TMU load wasn't clamped\n");
+			DRM_DEBUG("direct TMU load wasn't clamped\n");
 			return false;
 		}
 
@@ -238,7 +238,7 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader,
 
 		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
 		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
-			DRM_ERROR("direct TMU load didn't add to a uniform\n");
+			DRM_DEBUG("direct TMU load didn't add to a uniform\n");
 			return false;
 		}
 
@@ -246,14 +246,14 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader,
 	} else {
 		if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
 					      raddr_b == QPU_R_UNIF)) {
-			DRM_ERROR("uniform read in the same instruction as "
+			DRM_DEBUG("uniform read in the same instruction as "
 				  "texture setup.\n");
 			return false;
 		}
 	}
 
 	if (validation_state->tmu_write_count[tmu] >= 4) {
-		DRM_ERROR("TMU%d got too many parameters before dispatch\n",
+		DRM_DEBUG("TMU%d got too many parameters before dispatch\n",
 			  tmu);
 		return false;
 	}
@@ -265,7 +265,7 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader,
 	 */
 	if (!is_direct) {
 		if (validation_state->needs_uniform_address_update) {
-			DRM_ERROR("Texturing with undefined uniform address\n");
+			DRM_DEBUG("Texturing with undefined uniform address\n");
 			return false;
 		}
 
@@ -336,35 +336,35 @@ validate_uniform_address_write(struct vc4_validated_shader_info *validated_shade
 	case QPU_SIG_LOAD_TMU1:
 		break;
 	default:
-		DRM_ERROR("uniforms address change must be "
+		DRM_DEBUG("uniforms address change must be "
 			  "normal math\n");
 		return false;
 	}
 
 	if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
-		DRM_ERROR("Uniform address reset must be an ADD.\n");
+		DRM_DEBUG("Uniform address reset must be an ADD.\n");
 		return false;
 	}
 
 	if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
-		DRM_ERROR("Uniform address reset must be unconditional.\n");
+		DRM_DEBUG("Uniform address reset must be unconditional.\n");
 		return false;
 	}
 
 	if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
 	    !(inst & QPU_PM)) {
-		DRM_ERROR("No packing allowed on uniforms reset\n");
+		DRM_DEBUG("No packing allowed on uniforms reset\n");
 		return false;
 	}
 
 	if (add_lri == -1) {
-		DRM_ERROR("First argument of uniform address write must be "
+		DRM_DEBUG("First argument of uniform address write must be "
 			  "an immediate value.\n");
 		return false;
 	}
 
 	if (validation_state->live_immediates[add_lri] != expected_offset) {
-		DRM_ERROR("Resetting uniforms with offset %db instead of %db\n",
+		DRM_DEBUG("Resetting uniforms with offset %db instead of %db\n",
 			  validation_state->live_immediates[add_lri],
 			  expected_offset);
 		return false;
@@ -372,7 +372,7 @@ validate_uniform_address_write(struct vc4_validated_shader_info *validated_shade
 
 	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
 	    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
-		DRM_ERROR("Second argument of uniform address write must be "
+		DRM_DEBUG("Second argument of uniform address write must be "
 			  "a uniform.\n");
 		return false;
 	}
@@ -417,7 +417,7 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader,
 	switch (waddr) {
 	case QPU_W_UNIFORMS_ADDRESS:
 		if (is_b) {
-			DRM_ERROR("relative uniforms address change "
+			DRM_DEBUG("relative uniforms address change "
 				  "unsupported\n");
 			return false;
 		}
@@ -452,11 +452,11 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader,
 		/* XXX: I haven't thought about these, so don't support them
 		 * for now.
 		 */
-		DRM_ERROR("Unsupported waddr %d\n", waddr);
+		DRM_DEBUG("Unsupported waddr %d\n", waddr);
 		return false;
 
 	case QPU_W_VPM_ADDR:
-		DRM_ERROR("General VPM DMA unsupported\n");
+		DRM_DEBUG("General VPM DMA unsupported\n");
 		return false;
 
 	case QPU_W_VPM:
@@ -559,7 +559,7 @@ check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
 	bool ok;
 
 	if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
-		DRM_ERROR("ADD and MUL both set up textures\n");
+		DRM_DEBUG("ADD and MUL both set up textures\n");
 		return false;
 	}
 
@@ -588,7 +588,7 @@ check_branch(uint64_t inst,
 	 * there's no need for it.
 	 */
 	if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
-		DRM_ERROR("branch instruction at %d wrote a register.\n",
+		DRM_DEBUG("branch instruction at %d wrote a register.\n",
 			  validation_state->ip);
 		return false;
 	}
@@ -614,7 +614,7 @@ check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
 		validated_shader->uniforms_size += 4;
 
 		if (validation_state->needs_uniform_address_update) {
-			DRM_ERROR("Uniform read with undefined uniform "
+			DRM_DEBUG("Uniform read with undefined uniform "
 				  "address\n");
 			return false;
 		}
@@ -660,19 +660,19 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
 			continue;
 
 		if (ip - last_branch < 4) {
-			DRM_ERROR("Branch at %d during delay slots\n", ip);
+			DRM_DEBUG("Branch at %d during delay slots\n", ip);
 			return false;
 		}
 		last_branch = ip;
 
 		if (inst & QPU_BRANCH_REG) {
-			DRM_ERROR("branching from register relative "
+			DRM_DEBUG("branching from register relative "
 				  "not supported\n");
 			return false;
 		}
 
 		if (!(inst & QPU_BRANCH_REL)) {
-			DRM_ERROR("relative branching required\n");
+			DRM_DEBUG("relative branching required\n");
 			return false;
 		}
 
@@ -682,13 +682,13 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
 		 * end of the shader object.
 		 */
 		if (branch_imm % sizeof(inst) != 0) {
-			DRM_ERROR("branch target not aligned\n");
+			DRM_DEBUG("branch target not aligned\n");
 			return false;
 		}
 
 		branch_target_ip = after_delay_ip + (branch_imm >> 3);
 		if (branch_target_ip >= validation_state->max_ip) {
-			DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n",
+			DRM_DEBUG("Branch at %d outside of shader (ip %d/%d)\n",
 				  ip, branch_target_ip,
 				  validation_state->max_ip);
 			return false;
@@ -699,7 +699,7 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
 		 * the shader.
 		 */
 		if (after_delay_ip >= validation_state->max_ip) {
-			DRM_ERROR("Branch at %d continues past shader end "
+			DRM_DEBUG("Branch at %d continues past shader end "
 				  "(%d/%d)\n",
 				  ip, after_delay_ip, validation_state->max_ip);
 			return false;
@@ -709,7 +709,7 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
 	}
 
 	if (max_branch_target > validation_state->max_ip - 3) {
-		DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
+		DRM_DEBUG("Branch landed after QPU_SIG_PROG_END");
 		return false;
 	}
 
@@ -750,7 +750,7 @@ vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
 		return true;
 
 	if (texturing_in_progress(validation_state)) {
-		DRM_ERROR("Branch target landed during TMU setup\n");
+		DRM_DEBUG("Branch target landed during TMU setup\n");
 		return false;
 	}
 
@@ -837,7 +837,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 		case QPU_SIG_LAST_THREAD_SWITCH:
 			if (!check_instruction_writes(validated_shader,
 						      &validation_state)) {
-				DRM_ERROR("Bad write at ip %d\n", ip);
+				DRM_DEBUG("Bad write at ip %d\n", ip);
 				goto fail;
 			}
 
@@ -855,7 +855,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 				validated_shader->is_threaded = true;
 
 				if (ip < last_thread_switch_ip + 3) {
-					DRM_ERROR("Thread switch too soon after "
+					DRM_DEBUG("Thread switch too soon after "
 						  "last switch at ip %d\n", ip);
 					goto fail;
 				}
@@ -867,7 +867,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 		case QPU_SIG_LOAD_IMM:
 			if (!check_instruction_writes(validated_shader,
 						      &validation_state)) {
-				DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
+				DRM_DEBUG("Bad LOAD_IMM write at ip %d\n", ip);
 				goto fail;
 			}
 			break;
@@ -878,14 +878,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 				goto fail;
 
 			if (ip < last_thread_switch_ip + 3) {
-				DRM_ERROR("Branch in thread switch at ip %d",
+				DRM_DEBUG("Branch in thread switch at ip %d",
 					  ip);
 				goto fail;
 			}
 
 			break;
 		default:
-			DRM_ERROR("Unsupported QPU signal %d at "
+			DRM_DEBUG("Unsupported QPU signal %d at "
 				  "instruction %d\n", sig, ip);
 			goto fail;
 		}
@@ -898,7 +898,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 	}
 
 	if (ip == validation_state.max_ip) {
-		DRM_ERROR("shader failed to terminate before "
+		DRM_DEBUG("shader failed to terminate before "
 			  "shader BO end at %zd\n",
 			  shader_obj->base.size);
 		goto fail;
@@ -907,7 +907,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 	/* Might corrupt other thread */
 	if (validated_shader->is_threaded &&
 	    validation_state.all_registers_used) {
-		DRM_ERROR("Shader uses threading, but uses the upper "
+		DRM_DEBUG("Shader uses threading, but uses the upper "
 			  "half of the registers, too\n");
 		goto fail;
 	}
diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c
index 09c1e05765fa..3a9a302247a2 100644
--- a/drivers/gpu/drm/vc4/vc4_vec.c
+++ b/drivers/gpu/drm/vc4/vc4_vec.c
@@ -366,10 +366,8 @@ static int vc4_vec_connector_get_modes(struct drm_connector *connector)
 }
 
 static const struct drm_connector_funcs vc4_vec_connector_funcs = {
-	.dpms = drm_atomic_helper_connector_dpms,
 	.detect = vc4_vec_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
-	.set_property = drm_atomic_helper_connector_set_property,
 	.destroy = vc4_vec_connector_destroy,
 	.reset = drm_atomic_helper_connector_reset,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,