diff options
38 files changed, 1481 insertions, 563 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 1bd9bc5b8c5c..4eee91a3a236 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -69,6 +69,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ + i915_query.o \ i915_request.o \ i915_trace_points.o \ i915_vma.o \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e838c765b251..89f7ff2c652e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3201,6 +3201,16 @@ static int i915_engine_info(struct seq_file *m, void *unused) return 0; } +static int i915_rcs_topology(struct seq_file *m, void *unused) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + intel_device_info_dump_topology(&INTEL_INFO(dev_priv)->sseu, &p); + + return 0; +} + static int i915_shrinker_info(struct seq_file *m, void *unused) { struct drm_i915_private *i915 = node_to_i915(m->private); @@ -4323,7 +4333,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, continue; sseu->slice_mask = BIT(0); - sseu->subslice_mask |= BIT(ss); + sseu->subslice_mask[0] |= BIT(ss); eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + @@ -4338,11 +4348,11 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { const struct intel_device_info *info = INTEL_INFO(dev_priv); - int s_max = 6, ss_max = 4; int s, ss; - u32 s_reg[s_max], eu_reg[2 * s_max], eu_mask[2]; + u32 s_reg[info->sseu.max_slices]; + u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2]; - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { /* * FIXME: Valid SS Mask respects the spec and read * only valid bits for those registers, excluding reserverd @@ -4364,15 +4374,15 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, GEN9_PGCTL_SSB_EU210_ACK | GEN9_PGCTL_SSB_EU311_ACK; - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) /* skip disabled slice */ continue; sseu->slice_mask |= BIT(s); - sseu->subslice_mask = info->sseu.subslice_mask; + sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; - for (ss = 0; ss < ss_max; ss++) { + for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) @@ -4392,17 +4402,12 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { - int s_max = 3, ss_max = 4; + const struct intel_device_info *info = INTEL_INFO(dev_priv); int s, ss; - u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2]; - - /* BXT has a single slice and at most 3 subslices. */ - if (IS_GEN9_LP(dev_priv)) { - s_max = 1; - ss_max = 3; - } + u32 s_reg[info->sseu.max_slices]; + u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2]; - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s)); eu_reg[2*s] = I915_READ(GEN9_SS01_EU_PGCTL_ACK(s)); eu_reg[2*s + 1] = I915_READ(GEN9_SS23_EU_PGCTL_ACK(s)); @@ -4417,7 +4422,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, GEN9_PGCTL_SSB_EU210_ACK | GEN9_PGCTL_SSB_EU311_ACK; - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) /* skip disabled slice */ continue; @@ -4425,10 +4430,10 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); if (IS_GEN9_BC(dev_priv)) - sseu->subslice_mask = - INTEL_INFO(dev_priv)->sseu.subslice_mask; + sseu->subslice_mask[s] = + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; - for (ss = 0; ss < ss_max; ss++) { + for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; if (IS_GEN9_LP(dev_priv)) { @@ -4436,7 +4441,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, /* skip disabled subslice */ continue; - sseu->subslice_mask |= BIT(ss); + sseu->subslice_mask[s] |= BIT(ss); } eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & @@ -4458,9 +4463,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; if (sseu->slice_mask) { - sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; sseu->eu_per_subslice = INTEL_INFO(dev_priv)->sseu.eu_per_subslice; + for (s = 0; s < fls(sseu->slice_mask); s++) { + sseu->subslice_mask[s] = + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; + } sseu->eu_total = sseu->eu_per_subslice * sseu_subslice_total(sseu); @@ -4479,6 +4487,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, { struct drm_i915_private *dev_priv = node_to_i915(m->private); const char *type = is_available_info ? "Available" : "Enabled"; + int s; seq_printf(m, " %s Slice Mask: %04x\n", type, sseu->slice_mask); @@ -4486,10 +4495,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, hweight8(sseu->slice_mask)); seq_printf(m, " %s Subslice Total: %u\n", type, sseu_subslice_total(sseu)); - seq_printf(m, " %s Subslice Mask: %04x\n", type, - sseu->subslice_mask); - seq_printf(m, " %s Subslice Per Slice: %u\n", type, - hweight8(sseu->subslice_mask)); + for (s = 0; s < fls(sseu->slice_mask); s++) { + seq_printf(m, " %s Slice%i subslices: %u\n", type, + s, hweight8(sseu->subslice_mask[s])); + } seq_printf(m, " %s EU Total: %u\n", type, sseu->eu_total); seq_printf(m, " %s EU Per Subslice: %u\n", type, @@ -4523,6 +4532,10 @@ static int i915_sseu_status(struct seq_file *m, void *unused) seq_puts(m, "SSEU Device Status\n"); memset(&sseu, 0, sizeof(sseu)); + sseu.max_slices = INTEL_INFO(dev_priv)->sseu.max_slices; + sseu.max_subslices = INTEL_INFO(dev_priv)->sseu.max_subslices; + sseu.max_eus_per_subslice = + INTEL_INFO(dev_priv)->sseu.max_eus_per_subslice; intel_runtime_pm_get(dev_priv); @@ -4730,6 +4743,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_dmc_info", i915_dmc_info, 0}, {"i915_display_info", i915_display_info, 0}, {"i915_engine_info", i915_engine_info, 0}, + {"i915_rcs_topology", i915_rcs_topology, 0}, {"i915_shrinker_info", i915_shrinker_info, 0}, {"i915_shared_dplls_info", i915_shared_dplls_info, 0}, {"i915_dp_mst_info", i915_dp_mst_info, 0}, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d61b51c0bf0b..d7c4de45644d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -49,6 +49,7 @@ #include "i915_drv.h" #include "i915_trace.h" #include "i915_pmu.h" +#include "i915_query.h" #include "i915_vgpu.h" #include "intel_drv.h" #include "intel_uc.h" @@ -428,7 +429,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_SUBSLICE_MASK: - value = INTEL_INFO(dev_priv)->sseu.subslice_mask; + value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0]; if (!value) return -ENODEV; break; @@ -2832,6 +2833,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), }; static struct drm_driver driver = { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7eec99d7fad4..604389d0b6a3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2103,6 +2103,7 @@ struct drm_i915_private { */ struct ida hw_ida; #define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ } contexts; u32 fdi_rx_config; @@ -2771,6 +2772,8 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ ((dev_priv)->info.has_logical_ring_contexts) +#define HAS_LOGICAL_RING_ELSQ(dev_priv) \ + ((dev_priv)->info.has_logical_ring_elsq) #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \ ((dev_priv)->info.has_logical_ring_preemption) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a73340ae9419..f2cbea7cf940 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -211,9 +211,15 @@ static void context_close(struct i915_gem_context *ctx) static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) { int ret; + unsigned int max; + + if (INTEL_GEN(dev_priv) >= 11) + max = GEN11_MAX_CONTEXT_HW_ID; + else + max = MAX_CONTEXT_HW_ID; ret = ida_simple_get(&dev_priv->contexts.hw_ida, - 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + 0, max, GFP_KERNEL); if (ret < 0) { /* Contexts are only released when no longer active. * Flush any pending retires to hopefully release some @@ -221,7 +227,7 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) */ i915_retire_requests(dev_priv); ret = ida_simple_get(&dev_priv->contexts.hw_ida, - 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + 0, max, GFP_KERNEL); if (ret < 0) return ret; } @@ -463,6 +469,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) /* Using the simple ida interface, the max is limited by sizeof(int) */ BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->contexts.hw_ida); /* lowest priority; idle task */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a7933c9b5562..f89ac7a8f95f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -586,6 +586,7 @@ static void err_print_capabilities(struct drm_i915_error_state_buf *m, intel_device_info_dump_flags(info, &p); intel_driver_caps_print(caps, &p); + intel_device_info_dump_topology(&info->sseu, &p); } static void err_print_params(struct drm_i915_error_state_buf *m, @@ -1084,9 +1085,9 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, return error_code; } -static void i915_gem_record_fences(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void gem_record_fences(struct i915_gpu_state *error) { + struct drm_i915_private *dev_priv = error->i915; int i; if (INTEL_GEN(dev_priv) >= 6) { @@ -1102,27 +1103,6 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv, error->nfence = i; } -static inline u32 -gen8_engine_sync_index(struct intel_engine_cs *engine, - struct intel_engine_cs *other) -{ - int idx; - - /* - * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2; - * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs; - * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs; - * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs; - * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs; - */ - - idx = (other - engine) - 1; - if (idx < 0) - idx += I915_NUM_ENGINES; - - return idx; -} - static void gen6_record_semaphore_state(struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1445,14 +1425,14 @@ capture_object(struct drm_i915_private *dev_priv, } } -static void i915_gem_record_rings(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void gem_record_rings(struct i915_gpu_state *error) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_i915_private *i915 = error->i915; + struct i915_ggtt *ggtt = &i915->ggtt; int i; for (i = 0; i < I915_NUM_ENGINES; i++) { - struct intel_engine_cs *engine = dev_priv->engine[i]; + struct intel_engine_cs *engine = i915->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; struct i915_request *request; @@ -1481,17 +1461,16 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, * by userspace. */ ee->batchbuffer = - i915_error_object_create(dev_priv, - request->batch); + i915_error_object_create(i915, request->batch); - if (HAS_BROKEN_CS_TLB(dev_priv)) + if (HAS_BROKEN_CS_TLB(i915)) ee->wa_batchbuffer = - i915_error_object_create(dev_priv, + i915_error_object_create(i915, engine->scratch); request_record_user_bo(request, ee); ee->ctx = - i915_error_object_create(dev_priv, + i915_error_object_create(i915, request->ctx->engine[i].state); error->simulated |= @@ -1505,27 +1484,24 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ee->cpu_ring_head = ring->head; ee->cpu_ring_tail = ring->tail; ee->ringbuffer = - i915_error_object_create(dev_priv, ring->vma); + i915_error_object_create(i915, ring->vma); engine_record_requests(engine, request, ee); } ee->hws_page = - i915_error_object_create(dev_priv, + i915_error_object_create(i915, engine->status_page.vma); - ee->wa_ctx = - i915_error_object_create(dev_priv, engine->wa_ctx.vma); + ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma); - ee->default_state = - capture_object(dev_priv, engine->default_state); + ee->default_state = capture_object(i915, engine->default_state); } } -static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error, - struct i915_address_space *vm, - int idx) +static void gem_capture_vm(struct i915_gpu_state *error, + struct i915_address_space *vm, + int idx) { struct drm_i915_error_buffer *active_bo; struct i915_vma *vma; @@ -1548,8 +1524,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, error->active_bo_count[idx] = count; } -static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_active_buffers(struct i915_gpu_state *error) { int cnt = 0, i, j; @@ -1569,14 +1544,13 @@ static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, for (j = 0; j < i && !found; j++) found = error->engine[j].vm == ee->vm; if (!found) - i915_gem_capture_vm(dev_priv, error, ee->vm, cnt++); + gem_capture_vm(error, ee->vm, cnt++); } } -static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_pinned_buffers(struct i915_gpu_state *error) { - struct i915_address_space *vm = &dev_priv->ggtt.base; + struct i915_address_space *vm = &error->i915->ggtt.base; struct drm_i915_error_buffer *bo; struct i915_vma *vma; int count_inactive, count_active; @@ -1626,9 +1600,9 @@ static void capture_uc_state(struct i915_gpu_state *error) } /* Capture all registers which don't fit into another category. */ -static void i915_capture_reg_state(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_reg_state(struct i915_gpu_state *error) { + struct drm_i915_private *dev_priv = error->i915; int i; /* General organization @@ -1725,24 +1699,25 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, engine_mask ? "reset" : "continue"); } -static void i915_capture_gen_state(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_gen_state(struct i915_gpu_state *error) { - error->awake = dev_priv->gt.awake; - error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count); - error->suspended = dev_priv->runtime_pm.suspended; + struct drm_i915_private *i915 = error->i915; + + error->awake = i915->gt.awake; + error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count); + error->suspended = i915->runtime_pm.suspended; error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU error->iommu = intel_iommu_gfx_mapped; #endif - error->reset_count = i915_reset_count(&dev_priv->gpu_error); - error->suspend_count = dev_priv->suspend_count; + error->reset_count = i915_reset_count(&i915->gpu_error); + error->suspend_count = i915->suspend_count; memcpy(&error->device_info, - INTEL_INFO(dev_priv), + INTEL_INFO(i915), sizeof(error->device_info)); - error->driver_caps = dev_priv->caps; + error->driver_caps = i915->caps; } static __always_inline void dup_param(const char *type, void *x) @@ -1769,14 +1744,13 @@ static int capture(void *data) error->i915->gt.last_init_time); capture_params(error); + capture_gen_state(error); capture_uc_state(error); - - i915_capture_gen_state(error->i915, error); - i915_capture_reg_state(error->i915, error); - i915_gem_record_fences(error->i915, error); - i915_gem_record_rings(error->i915, error); - i915_capture_active_buffers(error->i915, error); - i915_capture_pinned_buffers(error->i915, error); + capture_reg_state(error); + gem_record_fences(error); + gem_record_rings(error); + capture_active_buffers(error); + capture_pinned_buffers(error); error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ce16003ef048..633c18785c1e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1123,6 +1123,7 @@ static void notify_ring(struct intel_engine_cs *engine) if (rq) { dma_fence_signal(&rq->fence); + GEM_BUG_ON(!i915_request_completed(rq)); i915_request_put(rq); } diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 26e8f5c13231..062e91b39085 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -594,7 +594,8 @@ static const struct intel_device_info intel_cannonlake_info = { GEN10_FEATURES, \ GEN(11), \ .ddb_size = 2048, \ - .has_csr = 0 + .has_csr = 0, \ + .has_logical_ring_elsq = 1 static const struct intel_device_info intel_icelake_11_info = { GEN11_FEATURES, diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c new file mode 100644 index 000000000000..3ace929dd90f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_query.c @@ -0,0 +1,125 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_query.h" +#include <uapi/drm/i915_drm.h> + +static int query_topology_info(struct drm_i915_private *dev_priv, + struct drm_i915_query_item *query_item) +{ + const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu; + struct drm_i915_query_topology_info topo; + u32 slice_length, subslice_length, eu_length, total_length; + + if (query_item->flags != 0) + return -EINVAL; + + if (sseu->max_slices == 0) + return -ENODEV; + + BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask)); + + slice_length = sizeof(sseu->slice_mask); + subslice_length = sseu->max_slices * + DIV_ROUND_UP(sseu->max_subslices, + sizeof(sseu->subslice_mask[0]) * BITS_PER_BYTE); + eu_length = sseu->max_slices * sseu->max_subslices * + DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); + + total_length = sizeof(topo) + slice_length + subslice_length + eu_length; + + if (query_item->length == 0) + return total_length; + + if (query_item->length < total_length) + return -EINVAL; + + if (copy_from_user(&topo, u64_to_user_ptr(query_item->data_ptr), + sizeof(topo))) + return -EFAULT; + + if (topo.flags != 0) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(query_item->data_ptr), + total_length)) + return -EFAULT; + + memset(&topo, 0, sizeof(topo)); + topo.max_slices = sseu->max_slices; + topo.max_subslices = sseu->max_subslices; + topo.max_eus_per_subslice = sseu->max_eus_per_subslice; + + topo.subslice_offset = slice_length; + topo.subslice_stride = DIV_ROUND_UP(sseu->max_subslices, BITS_PER_BYTE); + topo.eu_offset = slice_length + subslice_length; + topo.eu_stride = + DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr), + &topo, sizeof(topo))) + return -EFAULT; + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)), + &sseu->slice_mask, slice_length)) + return -EFAULT; + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + + sizeof(topo) + slice_length), + sseu->subslice_mask, subslice_length)) + return -EFAULT; + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + + sizeof(topo) + + slice_length + subslice_length), + sseu->eu_mask, eu_length)) + return -EFAULT; + + return total_length; +} + +static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, + struct drm_i915_query_item *query_item) = { + query_topology_info, +}; + +int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_query *args = data; + struct drm_i915_query_item __user *user_item_ptr = + u64_to_user_ptr(args->items_ptr); + u32 i; + + if (args->flags != 0) + return -EINVAL; + + for (i = 0; i < args->num_items; i++, user_item_ptr++) { + struct drm_i915_query_item item; + u64 func_idx; + int ret; + + if (copy_from_user(&item, user_item_ptr, sizeof(item))) + return -EFAULT; + + if (item.query_id == 0) + return -EINVAL; + + func_idx = item.query_id - 1; + + if (func_idx < ARRAY_SIZE(i915_query_funcs)) + ret = i915_query_funcs[func_idx](dev_priv, &item); + else + ret = -EINVAL; + + /* Only write the length back to userspace if they differ. */ + if (ret != item.length && put_user(ret, &user_item_ptr->length)) + return -EFAULT; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_query.h b/drivers/gpu/drm/i915/i915_query.h new file mode 100644 index 000000000000..31dcef181f63 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_query.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef _I915_QUERY_H_ +#define _I915_QUERY_H_ + +struct drm_device; +struct drm_file; + +int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 95a2e51ecbb0..9e765462ca44 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2345,7 +2345,13 @@ enum i915_power_well_id { #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 #define GEN8_BSD2_RING_BASE 0x1c000 +#define GEN11_BSD_RING_BASE 0x1c0000 +#define GEN11_BSD2_RING_BASE 0x1c4000 +#define GEN11_BSD3_RING_BASE 0x1d0000 +#define GEN11_BSD4_RING_BASE 0x1d4000 #define VEBOX_RING_BASE 0x1a000 +#define GEN11_VEBOX_RING_BASE 0x1c8000 +#define GEN11_VEBOX2_RING_BASE 0x1d8000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base) _MMIO((base)+0x30) #define RING_HEAD(base) _MMIO((base)+0x34) @@ -3906,6 +3912,12 @@ enum { #define GEN8_CTX_ID_SHIFT 32 #define GEN8_CTX_ID_WIDTH 21 +#define GEN11_SW_CTX_ID_SHIFT 37 +#define GEN11_SW_CTX_ID_WIDTH 11 +#define GEN11_ENGINE_CLASS_SHIFT 61 +#define GEN11_ENGINE_CLASS_WIDTH 3 +#define GEN11_ENGINE_INSTANCE_SHIFT 48 +#define GEN11_ENGINE_INSTANCE_WIDTH 6 #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) @@ -3953,6 +3965,9 @@ enum { #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) +#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) +#define GWUNIT_CLKGATE_DIS (1 << 16) + #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS (1 << 20) @@ -7986,9 +8001,13 @@ enum { #define VLV_GTLC_PW_RENDER_STATUS_MASK (1 << 7) #define FORCEWAKE_MT _MMIO(0xa188) /* multi-threaded */ #define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270) +#define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4) +#define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4) #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278) #define FORCEWAKE_BLITTER_GEN9 _MMIO(0xa188) #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88) +#define FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n) _MMIO(0x0D50 + (n) * 4) +#define FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n) _MMIO(0x0D70 + (n) * 4) #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84) #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044) #define FORCEWAKE_KERNEL BIT(0) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2265bb8ff4fa..d437beac3969 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -217,9 +217,9 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) struct intel_timeline *tl = engine->timeline; if (!i915_seqno_passed(seqno, tl->seqno)) { - /* spin until threads are complete */ - while (intel_breadcrumbs_busy(engine)) - cond_resched(); + /* Flush any waiters before we reuse the seqno */ + intel_engine_disarm_breadcrumbs(engine); + GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); } /* Check we are idle before we fiddle with hw state! */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 74311fc53e2f..7d6eb82eeb91 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -44,8 +44,8 @@ struct intel_wait { }; struct intel_signal_node { - struct rb_node node; struct intel_wait wait; + struct list_head link; }; struct i915_dependency { diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index a83690642aab..1f79e7a47433 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -168,17 +168,21 @@ static void irq_enable(struct intel_engine_cs *engine) set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_enable(engine); - spin_unlock(&engine->i915->irq_lock); + if (engine->irq_enable) { + spin_lock(&engine->i915->irq_lock); + engine->irq_enable(engine); + spin_unlock(&engine->i915->irq_lock); + } } static void irq_disable(struct intel_engine_cs *engine) { /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_disable(engine); - spin_unlock(&engine->i915->irq_lock); + if (engine->irq_disable) { + spin_lock(&engine->i915->irq_lock); + engine->irq_disable(engine); + spin_unlock(&engine->i915->irq_lock); + } } void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) @@ -243,6 +247,8 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) spin_unlock(&b->irq_lock); rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { + GEM_BUG_ON(!i915_seqno_passed(intel_engine_get_seqno(engine), + wait->seqno)); RB_CLEAR_NODE(&wait->node); wake_up_process(wait->tsk); } @@ -336,7 +342,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, lockdep_assert_held(&b->rb_lock); GEM_BUG_ON(b->irq_wait == wait); - /* This request is completed, so remove it from the tree, mark it as + /* + * This request is completed, so remove it from the tree, mark it as * complete, and *then* wake up the associated task. N.B. when the * task wakes up, it will find the empty rb_node, discern that it * has already been removed from the tree and skip the serialisation @@ -347,7 +354,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, rb_erase(&wait->node, &b->waiters); RB_CLEAR_NODE(&wait->node); - wake_up_process(wait->tsk); /* implicit smp_wmb() */ + if (wait->tsk->state != TASK_RUNNING) + wake_up_process(wait->tsk); /* implicit smp_wmb() */ } static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, @@ -588,23 +596,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, spin_unlock_irq(&b->rb_lock); } -static bool signal_complete(const struct i915_request *request) -{ - if (!request) - return false; - - /* - * Carefully check if the request is complete, giving time for the - * seqno to be visible or if the GPU hung. - */ - return __i915_request_irq_complete(request); -} - -static struct i915_request *to_signaler(struct rb_node *rb) -{ - return rb_entry(rb, struct i915_request, signaling.node); -} - static void signaler_set_rtpriority(void) { struct sched_param param = { .sched_priority = 1 }; @@ -612,78 +603,26 @@ static void signaler_set_rtpriority(void) sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); } -static void __intel_engine_remove_signal(struct intel_engine_cs *engine, - struct i915_request *request) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - lockdep_assert_held(&b->rb_lock); - - /* - * Wake up all other completed waiters and select the - * next bottom-half for the next user interrupt. - */ - __intel_engine_remove_wait(engine, &request->signaling.wait); - - /* - * Find the next oldest signal. Note that as we have - * not been holding the lock, another client may - * have installed an even older signal than the one - * we just completed - so double check we are still - * the oldest before picking the next one. - */ - if (request->signaling.wait.seqno) { - if (request == rcu_access_pointer(b->first_signal)) { - struct rb_node *rb = rb_next(&request->signaling.node); - rcu_assign_pointer(b->first_signal, - rb ? to_signaler(rb) : NULL); - } - - rb_erase(&request->signaling.node, &b->signals); - request->signaling.wait.seqno = 0; - } -} - -static struct i915_request * -get_first_signal_rcu(struct intel_breadcrumbs *b) -{ - /* - * See the big warnings for i915_gem_active_get_rcu() and similarly - * for dma_fence_get_rcu_safe() that explain the intricacies involved - * here with defeating CPU/compiler speculation and enforcing - * the required memory barriers. - */ - do { - struct i915_request *request; - - request = rcu_dereference(b->first_signal); - if (request) - request = i915_request_get_rcu(request); - - barrier(); - - if (!request || request == rcu_access_pointer(b->first_signal)) - return rcu_pointer_handoff(request); - - i915_request_put(request); - } while (1); -} - static int intel_breadcrumbs_signaler(void *arg) { struct intel_engine_cs *engine = arg; struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct i915_request *request; + struct i915_request *rq, *n; /* Install ourselves with high priority to reduce signalling latency */ signaler_set_rtpriority(); do { bool do_schedule = true; + LIST_HEAD(list); + u32 seqno; set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&b->signals)) + goto sleep; - /* We are either woken up by the interrupt bottom-half, + /* + * We are either woken up by the interrupt bottom-half, * or by a client adding a new signaller. In both cases, * the GPU seqno may have advanced beyond our oldest signal. * If it has, propagate the signal, remove the waiter and @@ -691,25 +630,45 @@ static int intel_breadcrumbs_signaler(void *arg) * need to wait for a new interrupt from the GPU or for * a new client. */ - rcu_read_lock(); - request = get_first_signal_rcu(b); - rcu_read_unlock(); - if (signal_complete(request)) { - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &request->fence.flags)) { - local_bh_disable(); - dma_fence_signal(&request->fence); - GEM_BUG_ON(!i915_request_completed(request)); - local_bh_enable(); /* kick start the tasklets */ - } + seqno = intel_engine_get_seqno(engine); + + spin_lock_irq(&b->rb_lock); + list_for_each_entry_safe(rq, n, &b->signals, signaling.link) { + u32 this = rq->signaling.wait.seqno; + + GEM_BUG_ON(!rq->signaling.wait.seqno); + + if (!i915_seqno_passed(seqno, this)) + break; - if (READ_ONCE(request->signaling.wait.seqno)) { - spin_lock_irq(&b->rb_lock); - __intel_engine_remove_signal(engine, request); - spin_unlock_irq(&b->rb_lock); + if (likely(this == i915_request_global_seqno(rq))) { + __intel_engine_remove_wait(engine, + &rq->signaling.wait); + + rq->signaling.wait.seqno = 0; + __list_del_entry(&rq->signaling.link); + + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags)) { + list_add_tail(&rq->signaling.link, + &list); + i915_request_get(rq); + } + } + } + spin_unlock_irq(&b->rb_lock); + + if (!list_empty(&list)) { + local_bh_disable(); + list_for_each_entry_safe(rq, n, &list, signaling.link) { + dma_fence_signal(&rq->fence); + GEM_BUG_ON(!i915_request_completed(rq)); + i915_request_put(rq); } + local_bh_enable(); /* kick start the tasklets */ - /* If the engine is saturated we may be continually + /* + * If the engine is saturated we may be continually * processing completed requests. This angers the * NMI watchdog if we never let anything else * have access to the CPU. Let's pretend to be nice @@ -718,9 +677,19 @@ static int intel_breadcrumbs_signaler(void *arg) */ do_schedule = need_resched(); } - i915_request_put(request); if (unlikely(do_schedule)) { + /* Before we sleep, check for a missed seqno */ + if (current->state & TASK_NORMAL && + !list_empty(&b->signals) && + engine->irq_seqno_barrier && + test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted)) { + engine->irq_seqno_barrier(engine); + intel_engine_wakeup(engine); + } + +sleep: if (kthread_should_park()) kthread_parkme(); @@ -735,13 +704,40 @@ static int intel_breadcrumbs_signaler(void *arg) return 0; } +static void insert_signal(struct intel_breadcrumbs *b, + struct i915_request *request, + const u32 seqno) +{ + struct i915_request *iter; + + lockdep_assert_held(&b->rb_lock); + + /* + * A reasonable assumption is that we are called to add signals + * in sequence, as the requests are submitted for execution and + * assigned a global_seqno. This will be the case for the majority + * of internally generated signals (inter-engine signaling). + * + * Out of order waiters triggering random signaling enabling will + * be more problematic, but hopefully rare enough and the list + * small enough that the O(N) insertion sort is not an issue. + */ + + list_for_each_entry_reverse(iter, &b->signals, signaling.link) + if (i915_seqno_passed(seqno, iter->signaling.wait.seqno)) + break; + + list_add(&request->signaling.link, &iter->signaling.link); +} + void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; u32 seqno; - /* Note that we may be called from an interrupt handler on another + /* + * Note that we may be called from an interrupt handler on another * device (e.g. nouveau signaling a fence completion causing us * to submit a request, and so enable signaling). As such, * we need to make sure that all other users of b->rb_lock protect @@ -753,17 +749,16 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) lockdep_assert_held(&request->lock); seqno = i915_request_global_seqno(request); - if (!seqno) + if (!seqno) /* will be enabled later upon execution */ return; - spin_lock(&b->rb_lock); - GEM_BUG_ON(request->signaling.wait.seqno); request->signaling.wait.tsk = b->signaler; request->signaling.wait.request = request; request->signaling.wait.seqno = seqno; - /* First add ourselves into the list of waiters, but register our + /* + * Add ourselves into the list of waiters, but registering our * bottom-half as the signaller thread. As per usual, only the oldest * waiter (not just signaller) is tasked as the bottom-half waking * up all completed waiters after the user interrupt. @@ -771,39 +766,9 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) * If we are the oldest waiter, enable the irq (after which we * must double check that the seqno did not complete). */ + spin_lock(&b->rb_lock); + insert_signal(b, request, seqno); wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); - - if (!__i915_request_completed(request, seqno)) { - struct rb_node *parent, **p; - bool first; - - /* Now insert ourselves into the retirement ordered list of - * signals on this engine. We track the oldest seqno as that - * will be the first signal to complete. - */ - parent = NULL; - first = true; - p = &b->signals.rb_node; - while (*p) { - parent = *p; - if (i915_seqno_passed(seqno, - to_signaler(parent)->signaling.wait.seqno)) { - p = &parent->rb_right; - first = false; - } else { - p = &parent->rb_left; - } - } - rb_link_node(&request->signaling.node, parent, p); - rb_insert_color(&request->signaling.node, &b->signals); - if (first) - rcu_assign_pointer(b->first_signal, request); - } else { - __intel_engine_remove_wait(engine, &request->signaling.wait); - request->signaling.wait.seqno = 0; - wakeup = false; - } - spin_unlock(&b->rb_lock); if (wakeup) @@ -812,17 +777,20 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) void intel_engine_cancel_signaling(struct i915_request *request) { + struct intel_engine_cs *engine = request->engine; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&request->lock); - if (READ_ONCE(request->signaling.wait.seqno)) { - struct intel_engine_cs *engine = request->engine; - struct intel_breadcrumbs *b = &engine->breadcrumbs; + if (!READ_ONCE(request->signaling.wait.seqno)) + return; - spin_lock(&b->rb_lock); - __intel_engine_remove_signal(engine, request); - spin_unlock(&b->rb_lock); - } + spin_lock(&b->rb_lock); + __intel_engine_remove_wait(engine, &request->signaling.wait); + if (fetch_and_zero(&request->signaling.wait.seqno)) + __list_del_entry(&request->signaling.link); + spin_unlock(&b->rb_lock); } int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) @@ -836,6 +804,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); + INIT_LIST_HEAD(&b->signals); + /* Spawn a thread to provide a common bottom-half for all signals. * As this is an asynchronous interface we cannot steal the current * task for handling the bottom-half to the user interrupt, therefore @@ -895,8 +865,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) /* The engines should be idle and all requests accounted for! */ WARN_ON(READ_ONCE(b->irq_wait)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(rcu_access_pointer(b->first_signal)); - WARN_ON(!RB_EMPTY_ROOT(&b->signals)); + WARN_ON(!list_empty(&b->signals)); if (!IS_ERR_OR_NULL(b->signaler)) kthread_stop(b->signaler); @@ -904,28 +873,6 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); } -bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - bool busy = false; - - spin_lock_irq(&b->rb_lock); - - if (b->irq_wait) { - wake_up_process(b->irq_wait->tsk); - busy = true; - } - - if (rcu_access_pointer(b->first_signal)) { - wake_up_process(b->signaler); - busy = true; - } - - spin_unlock_irq(&b->rb_lock); - - return busy; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_breadcrumbs.c" #endif diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 391dd69ae0a4..c0a8805b277f 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -956,8 +956,10 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.power_domain = POWER_DOMAIN_PORT_CRT; if (I915_HAS_HOTPLUG(dev_priv) && - !dmi_check_system(intel_spurious_crt_detect)) + !dmi_check_system(intel_spurious_crt_detect)) { crt->base.hpd_pin = HPD_CRT; + crt->base.hotplug = intel_encoder_hotplug; + } if (HAS_DDI(dev_priv)) { crt->base.port = PORT_E; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 8ca376aca8bd..ac8fc2a44ac6 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -25,6 +25,7 @@ * */ +#include <drm/drm_scdc_helper.h> #include "i915_drv.h" #include "intel_drv.h" @@ -2507,6 +2508,8 @@ static void intel_disable_ddi_dp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp->link_trained = false; + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); @@ -2798,6 +2801,150 @@ intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port) return connector; } +static int modeset_pipe(struct drm_crtc *crtc, + struct drm_modeset_acquire_ctx *ctx) +{ + struct drm_atomic_state *state; + struct drm_crtc_state *crtc_state; + int ret; + + state = drm_atomic_state_alloc(crtc->dev); + if (!state) + return -ENOMEM; + + state->acquire_ctx = ctx; + + crtc_state = drm_atomic_get_crtc_state(state, crtc); + if (IS_ERR(crtc_state)) { + ret = PTR_ERR(crtc_state); + goto out; + } + + crtc_state->mode_changed = true; + + ret = drm_atomic_add_affected_connectors(state, crtc); + if (ret) + goto out; + + ret = drm_atomic_add_affected_planes(state, crtc); + if (ret) + goto out; + + ret = drm_atomic_commit(state); + if (ret) + goto out; + + return 0; + + out: + drm_atomic_state_put(state); + + return ret; +} + +static int intel_hdmi_reset_link(struct intel_encoder *encoder, + struct drm_modeset_acquire_ctx *ctx) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_hdmi *hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_connector *connector = hdmi->attached_connector; + struct i2c_adapter *adapter = + intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus); + struct drm_connector_state *conn_state; + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + u8 config; + int ret; + + if (!connector || connector->base.status != connector_status_connected) + return 0; + + ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, + ctx); + if (ret) + return ret; + + conn_state = connector->base.state; + + crtc = to_intel_crtc(conn_state->crtc); + if (!crtc) + return 0; + + ret = drm_modeset_lock(&crtc->base.mutex, ctx); + if (ret) + return ret; + + crtc_state = to_intel_crtc_state(crtc->base.state); + + WARN_ON(!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)); + + if (!crtc_state->base.active) + return 0; + + if (!crtc_state->hdmi_high_tmds_clock_ratio && + !crtc_state->hdmi_scrambling) + return 0; + + if (conn_state->commit && + !try_wait_for_completion(&conn_state->commit->hw_done)) + return 0; + + ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config); + if (ret < 0) { + DRM_ERROR("Failed to read TMDS config: %d\n", ret); + return 0; + } + + if (!!(config & SCDC_TMDS_BIT_CLOCK_RATIO_BY_40) == + crtc_state->hdmi_high_tmds_clock_ratio && + !!(config & SCDC_SCRAMBLING_ENABLE) == + crtc_state->hdmi_scrambling) + return 0; + + /* + * HDMI 2.0 says that one should not send scrambled data + * prior to configuring the sink scrambling, and that + * TMDS clock/data transmission should be suspended when + * changing the TMDS clock rate in the sink. So let's + * just do a full modeset here, even though some sinks + * would be perfectly happy if were to just reconfigure + * the SCDC settings on the fly. + */ + return modeset_pipe(&crtc->base, ctx); +} + +static bool intel_ddi_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) +{ + struct drm_modeset_acquire_ctx ctx; + bool changed; + int ret; + + changed = intel_encoder_hotplug(encoder, connector); + + drm_modeset_acquire_init(&ctx, 0); + + for (;;) { + if (connector->base.connector_type == DRM_MODE_CONNECTOR_HDMIA) + ret = intel_hdmi_reset_link(encoder, &ctx); + else + ret = intel_dp_retrain_link(encoder, &ctx); + + if (ret == -EDEADLK) { + drm_modeset_backoff(&ctx); + continue; + } + + break; + } + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + WARN(ret, "Acquiring modeset locks failed with %i\n", ret); + + return changed; +} + static struct intel_connector * intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) { @@ -2842,39 +2989,45 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dport) return false; } +static int +intel_ddi_max_lanes(struct intel_digital_port *intel_dport) +{ + struct drm_i915_private *dev_priv = to_i915(intel_dport->base.base.dev); + enum port port = intel_dport->base.port; + int max_lanes = 4; + + if (INTEL_GEN(dev_priv) >= 11) + return max_lanes; + + if (port == PORT_A || port == PORT_E) { + if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) + max_lanes = port == PORT_A ? 4 : 0; + else + /* Both A and E share 2 lanes */ + max_lanes = 2; + } + + /* + * Some BIOS might fail to set this bit on port A if eDP + * wasn't lit up at boot. Force this bit set when needed + * so we use the proper lane count for our calculations. + */ + if (intel_ddi_a_force_4_lanes(intel_dport)) { + DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n"); + intel_dport->saved_port_bits |= DDI_A_4_LANES; + max_lanes = 4; + } + + return max_lanes; +} + void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *intel_dig_port; struct intel_encoder *intel_encoder; struct drm_encoder *encoder; bool init_hdmi, init_dp, init_lspcon = false; - int max_lanes; - if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) { - switch (port) { - case PORT_A: - max_lanes = 4; - break; - case PORT_E: - max_lanes = 0; - break; - default: - max_lanes = 4; - break; - } - } else { - switch (port) { - case PORT_A: - max_lanes = 2; - break; - case PORT_E: - max_lanes = 2; - break; - default: - max_lanes = 4; - break; - } - } init_hdmi = (dev_priv->vbt.ddi_port_info[port].supports_dvi || dev_priv->vbt.ddi_port_info[port].supports_hdmi); @@ -2908,6 +3061,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs, DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port)); + intel_encoder->hotplug = intel_ddi_hotplug; intel_encoder->compute_output_type = intel_ddi_compute_output_type; intel_encoder->compute_config = intel_ddi_compute_config; intel_encoder->enable = intel_enable_ddi; @@ -2920,10 +3074,17 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->get_config = intel_ddi_get_config; intel_encoder->suspend = intel_dp_encoder_suspend; intel_encoder->get_power_domains = intel_ddi_get_power_domains; + intel_encoder->type = INTEL_OUTPUT_DDI; + intel_encoder->power_domain = intel_port_to_power_domain(port); + intel_encoder->port = port; + intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_encoder->cloneable = 0; intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); + intel_dig_port->dp.output_reg = INVALID_MMIO_REG; + intel_dig_port->max_lanes = intel_ddi_max_lanes(intel_dig_port); switch (port) { case PORT_A: @@ -2954,26 +3115,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) MISSING_CASE(port); } - /* - * Some BIOS might fail to set this bit on port A if eDP - * wasn't lit up at boot. Force this bit set when needed - * so we use the proper lane count for our calculations. - */ - if (intel_ddi_a_force_4_lanes(intel_dig_port)) { - DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n"); - intel_dig_port->saved_port_bits |= DDI_A_4_LANES; - max_lanes = 4; - } - - intel_dig_port->dp.output_reg = INVALID_MMIO_REG; - intel_dig_port->max_lanes = max_lanes; - - intel_encoder->type = INTEL_OUTPUT_DDI; - intel_encoder->power_domain = intel_port_to_power_domain(port); - intel_encoder->port = port; - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); - intel_encoder->cloneable = 0; - intel_infoframe_init(intel_dig_port); if (init_dp) { diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index cadc5f81ed72..3dd350f7b8e6 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -81,12 +81,16 @@ void intel_device_info_dump_flags(const struct intel_device_info *info, static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) { + int s; + drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); - drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask); - drm_printf(p, "subslice per slice: %u\n", - hweight8(sseu->subslice_mask)); + for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) { + drm_printf(p, "slice%d %u subslices mask=%04x\n", + s, hweight8(sseu->subslice_mask[s]), + sseu->subslice_mask[s]); + } drm_printf(p, "EU total: %u\n", sseu->eu_total); drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); drm_printf(p, "has slice power gating: %s\n", @@ -120,22 +124,100 @@ void intel_device_info_dump(const struct intel_device_info *info, intel_device_info_dump_flags(info, p); } +void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p) +{ + int s, ss; + + if (sseu->max_slices == 0) { + drm_printf(p, "Unavailable\n"); + return; + } + + for (s = 0; s < sseu->max_slices; s++) { + drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n", + s, hweight8(sseu->subslice_mask[s]), + sseu->subslice_mask[s]); + + for (ss = 0; ss < sseu->max_subslices; ss++) { + u16 enabled_eus = sseu_get_eus(sseu, s, ss); + + drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", + ss, hweight16(enabled_eus), enabled_eus); + } + } +} + +static u16 compute_eu_total(const struct sseu_dev_info *sseu) +{ + u16 i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) + total += hweight8(sseu->eu_mask[i]); + + return total; +} + static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; const u32 fuse2 = I915_READ(GEN8_FUSE2); + int s, ss; + const int eu_mask = 0xff; + u32 subslice_mask, eu_en; sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> GEN10_F2_S_ENA_SHIFT; - sseu->subslice_mask = (1 << 4) - 1; - sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> - GEN10_F2_SS_DIS_SHIFT); + sseu->max_slices = 6; + sseu->max_subslices = 4; + sseu->max_eus_per_subslice = 8; + + subslice_mask = (1 << 4) - 1; + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> + GEN10_F2_SS_DIS_SHIFT); + + /* + * Slice0 can have up to 3 subslices, but there are only 2 in + * slice1/2. + */ + sseu->subslice_mask[0] = subslice_mask; + for (s = 1; s < sseu->max_slices; s++) + sseu->subslice_mask[s] = subslice_mask & 0x3; + + /* Slice0 */ + eu_en = ~I915_READ(GEN8_EU_DISABLE0); + for (ss = 0; ss < sseu->max_subslices; ss++) + sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); + /* Slice1 */ + sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN8_EU_DISABLE1); + sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); + /* Slice2 */ + sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); + sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); + /* Slice3 */ + sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN8_EU_DISABLE2); + sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); + /* Slice4 */ + sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); + sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); + /* Slice5 */ + sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN10_EU_DISABLE3); + sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); + + /* Do a second pass where we mark the subslices disabled if all their + * eus are off. + */ + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + if (sseu_get_eus(sseu, s, ss) == 0) + sseu->subslice_mask[s] &= ~BIT(ss); + } + } - sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0)); - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1)); - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2)); - sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) & - GEN10_EU_DIS_SS_MASK)); + sseu->eu_total = compute_eu_total(sseu); /* * CNL is expected to always have a uniform distribution @@ -156,26 +238,39 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; - u32 fuse, eu_dis; + u32 fuse; fuse = I915_READ(CHV_FUSE_GT); sseu->slice_mask = BIT(0); + sseu->max_slices = 1; + sseu->max_subslices = 2; + sseu->max_eus_per_subslice = 8; if (!(fuse & CHV_FGT_DISABLE_SS0)) { - sseu->subslice_mask |= BIT(0); - eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK | - CHV_FGT_EU_DIS_SS0_R1_MASK); - sseu->eu_total += 8 - hweight32(eu_dis); + u8 disabled_mask = + ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> + CHV_FGT_EU_DIS_SS0_R0_SHIFT) | + (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> + CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); + + sseu->subslice_mask[0] |= BIT(0); + sseu_set_eus(sseu, 0, 0, ~disabled_mask); } if (!(fuse & CHV_FGT_DISABLE_SS1)) { - sseu->subslice_mask |= BIT(1); - eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK | - CHV_FGT_EU_DIS_SS1_R1_MASK); - sseu->eu_total += 8 - hweight32(eu_dis); + u8 disabled_mask = + ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> + CHV_FGT_EU_DIS_SS1_R0_SHIFT) | + (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> + CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); + + sseu->subslice_mask[0] |= BIT(1); + sseu_set_eus(sseu, 0, 1, ~disabled_mask); } + sseu->eu_total = compute_eu_total(sseu); + /* * CHV expected to always have a uniform distribution of EU * across subslices. @@ -197,41 +292,52 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) { struct intel_device_info *info = mkwrite_device_info(dev_priv); struct sseu_dev_info *sseu = &info->sseu; - int s_max = 3, ss_max = 4, eu_max = 8; int s, ss; - u32 fuse2, eu_disable; - u8 eu_mask = 0xff; + u32 fuse2, eu_disable, subslice_mask; + const u8 eu_mask = 0xff; fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + /* BXT has a single slice and at most 3 subslices. */ + sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3; + sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4; + sseu->max_eus_per_subslice = 8; + /* * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = (1 << ss_max) - 1; - sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> - GEN9_F2_SS_DIS_SHIFT); + subslice_mask = (1 << sseu->max_subslices) - 1; + subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> + GEN9_F2_SS_DIS_SHIFT); /* * Iterate through enabled slices and subslices to * count the total enabled EU. */ - for (s = 0; s < s_max; s++) { + for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; + sseu->subslice_mask[s] = subslice_mask; + eu_disable = I915_READ(GEN9_EU_DISABLE(s)); - for (ss = 0; ss < ss_max; ss++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { int eu_per_ss; + u8 eu_disabled_mask; - if (!(sseu->subslice_mask & BIT(ss))) + if (!(sseu->subslice_mask[s] & BIT(ss))) /* skip disabled subslice */ continue; - eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) & - eu_mask); + eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; + + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + + eu_per_ss = sseu->max_eus_per_subslice - + hweight8(eu_disabled_mask); /* * Record which subslice(s) has(have) 7 EUs. we @@ -240,11 +346,11 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) */ if (eu_per_ss == 7) sseu->subslice_7eu[s] |= BIT(ss); - - sseu->eu_total += eu_per_ss; } } + sseu->eu_total = compute_eu_total(sseu); + /* * SKL is expected to always have a uniform distribution * of EU across subslices with the exception that any one @@ -270,8 +376,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = sseu->eu_per_subslice > 2; if (IS_GEN9_LP(dev_priv)) { -#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) - info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; +#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) + info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; sseu->min_eu_in_pool = 0; if (info->has_pooled_eu) { @@ -289,19 +395,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; - const int s_max = 3, ss_max = 3, eu_max = 8; int s, ss; - u32 fuse2, eu_disable[3]; /* s_max */ + u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + sseu->max_slices = 3; + sseu->max_subslices = 3; + sseu->max_eus_per_subslice = 8; + /* * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = GENMASK(ss_max - 1, 0); - sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> - GEN8_F2_SS_DIS_SHIFT); + subslice_mask = GENMASK(sseu->max_subslices - 1, 0); + subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> + GEN8_F2_SS_DIS_SHIFT); eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK; eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) | @@ -315,30 +424,38 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * Iterate through enabled slices and subslices to * count the total enabled EU. */ - for (s = 0; s < s_max; s++) { + for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; - for (ss = 0; ss < ss_max; ss++) { + sseu->subslice_mask[s] = subslice_mask; + + for (ss = 0; ss < sseu->max_subslices; ss++) { + u8 eu_disabled_mask; u32 n_disabled; - if (!(sseu->subslice_mask & BIT(ss))) + if (!(sseu->subslice_mask[ss] & BIT(ss))) /* skip disabled subslice */ continue; - n_disabled = hweight8(eu_disable[s] >> (ss * eu_max)); + eu_disabled_mask = + eu_disable[s] >> (ss * sseu->max_eus_per_subslice); + + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + + n_disabled = hweight8(eu_disabled_mask); /* * Record which subslices have 7 EUs. */ - if (eu_max - n_disabled == 7) + if (sseu->max_eus_per_subslice - n_disabled == 7) sseu->subslice_7eu[s] |= 1 << ss; - - sseu->eu_total += eu_max - n_disabled; } } + sseu->eu_total = compute_eu_total(sseu); + /* * BDW is expected to always have a uniform distribution of EU across * subslices with the exception that any one EU in any one subslice may @@ -362,6 +479,7 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) struct intel_device_info *info = mkwrite_device_info(dev_priv); struct sseu_dev_info *sseu = &info->sseu; u32 fuse1; + int s, ss; /* * There isn't a register to tell us how many slices/subslices. We @@ -373,18 +491,22 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) /* fall through */ case 1: sseu->slice_mask = BIT(0); - sseu->subslice_mask = BIT(0); + sseu->subslice_mask[0] = BIT(0); break; case 2: sseu->slice_mask = BIT(0); - sseu->subslice_mask = BIT(0) | BIT(1); + sseu->subslice_mask[0] = BIT(0) | BIT(1); break; case 3: sseu->slice_mask = BIT(0) | BIT(1); - sseu->subslice_mask = BIT(0) | BIT(1); + sseu->subslice_mask[0] = BIT(0) | BIT(1); + sseu->subslice_mask[1] = BIT(0) | BIT(1); break; } + sseu->max_slices = hweight8(sseu->slice_mask); + sseu->max_subslices = hweight8(sseu->subslice_mask[0]); + fuse1 = I915_READ(HSW_PAVP_FUSE1); switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { default: @@ -401,8 +523,16 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->eu_per_subslice = 6; break; } + sseu->max_eus_per_subslice = sseu->eu_per_subslice; + + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + sseu_set_eus(sseu, s, ss, + (1UL << sseu->eu_per_subslice) - 1); + } + } - sseu->eu_total = sseu_subslice_total(sseu) * sseu->eu_per_subslice; + sseu->eu_total = compute_eu_total(sseu); /* No powergating for you. */ sseu->has_slice_pg = 0; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index ab5bfd305477..0835752c8b22 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -96,6 +96,7 @@ enum intel_platform { func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ + func(has_logical_ring_elsq); \ func(has_logical_ring_preemption); \ func(has_overlay); \ func(has_pooled_eu); \ @@ -112,10 +113,13 @@ enum intel_platform { func(supports_tv); \ func(has_ipc); +#define GEN_MAX_SLICES (6) /* CNL upper bound */ +#define GEN_MAX_SUBSLICES (7) + struct sseu_dev_info { u8 slice_mask; - u8 subslice_mask; - u8 eu_total; + u8 subslice_mask[GEN_MAX_SUBSLICES]; + u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ @@ -123,6 +127,17 @@ struct sseu_dev_info { u8 has_slice_pg:1; u8 has_subslice_pg:1; u8 has_eu_pg:1; + + /* Topology fields */ + u8 max_slices; + u8 max_subslices; + u8 max_eus_per_subslice; + + /* We don't have more than 8 eus per subslice at the moment and as we + * store eus enabled using bits, no need to multiply by eus per + * subslice. + */ + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; typedef u8 intel_ring_mask_t; @@ -175,7 +190,49 @@ struct intel_driver_caps { static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) { - return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); + unsigned int i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) + total += hweight8(sseu->subslice_mask[i]); + + return total; +} + +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu, + int slice, int subslice) +{ + int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, + BITS_PER_BYTE); + int slice_stride = sseu->max_subslices * subslice_stride; + + return slice * slice_stride + subslice * subslice_stride; +} + +static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu, + int slice, int subslice) +{ + int i, offset = sseu_eu_idx(sseu, slice, subslice); + u16 eu_mask = 0; + + for (i = 0; + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { + eu_mask |= ((u16) sseu->eu_mask[offset + i]) << + (i * BITS_PER_BYTE); + } + + return eu_mask; +} + +static inline void sseu_set_eus(struct sseu_dev_info *sseu, + int slice, int subslice, u16 eu_mask) +{ + int i, offset = sseu_eu_idx(sseu, slice, subslice); + + for (i = 0; + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { + sseu->eu_mask[offset + i] = + (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; + } } const char *intel_platform_name(enum intel_platform platform); @@ -187,6 +244,8 @@ void intel_device_info_dump_flags(const struct intel_device_info *info, struct drm_printer *p); void intel_device_info_dump_runtime(const struct intel_device_info *info, struct drm_printer *p); +void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p); void intel_driver_caps_print(const struct intel_driver_caps *caps, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 90f0fc8cc2bd..ceed0821b37d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2152,6 +2152,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, */ ret = i915_vma_pin_fence(vma); if (ret != 0 && INTEL_GEN(dev_priv) < 4) { + i915_gem_object_unpin_from_display_plane(vma); vma = ERR_PTR(ret); goto err; } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index c722a6750e90..9a4a51e79fa1 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1913,6 +1913,7 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp, int link_rate, uint8_t lane_count, bool link_mst) { + intel_dp->link_trained = false; intel_dp->link_rate = link_rate; intel_dp->lane_count = lane_count; intel_dp->link_mst = link_mst; @@ -2761,6 +2762,8 @@ static void intel_disable_dp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp->link_trained = false; + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); @@ -4272,12 +4275,85 @@ go_again: return -EINVAL; } -static void -intel_dp_retrain_link(struct intel_dp *intel_dp) +static bool +intel_dp_needs_link_retrain(struct intel_dp *intel_dp) +{ + u8 link_status[DP_LINK_STATUS_SIZE]; + + if (!intel_dp->link_trained) + return false; + + if (!intel_dp_get_link_status(intel_dp, link_status)) + return false; + + /* + * Validate the cached values of intel_dp->link_rate and + * intel_dp->lane_count before attempting to retrain. + */ + if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate, + intel_dp->lane_count)) + return false; + + /* Retrain if Channel EQ or CR not ok */ + return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count); +} + +/* + * If display is now connected check links status, + * there has been known issues of link loss triggering + * long pulse. + * + * Some sinks (eg. ASUS PB287Q) seem to perform some + * weird HPD ping pong during modesets. So we can apparently + * end up with HPD going low during a modeset, and then + * going back up soon after. And once that happens we must + * retrain the link to get a picture. That's in case no + * userspace component reacted to intermittent HPD dip. + */ +int intel_dp_retrain_link(struct intel_encoder *encoder, + struct drm_modeset_acquire_ctx *ctx) { - struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_connector *connector = intel_dp->attached_connector; + struct drm_connector_state *conn_state; + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int ret; + + /* FIXME handle the MST connectors as well */ + + if (!connector || connector->base.status != connector_status_connected) + return 0; + + ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, + ctx); + if (ret) + return ret; + + conn_state = connector->base.state; + + crtc = to_intel_crtc(conn_state->crtc); + if (!crtc) + return 0; + + ret = drm_modeset_lock(&crtc->base.mutex, ctx); + if (ret) + return ret; + + crtc_state = to_intel_crtc_state(crtc->base.state); + + WARN_ON(!intel_crtc_has_dp_encoder(crtc_state)); + + if (!crtc_state->base.active) + return 0; + + if (conn_state->commit && + !try_wait_for_completion(&conn_state->commit->hw_done)) + return 0; + + if (!intel_dp_needs_link_retrain(intel_dp)) + return 0; /* Suppress underruns caused by re-training */ intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); @@ -4295,51 +4371,49 @@ intel_dp_retrain_link(struct intel_dp *intel_dp) if (crtc->config->has_pch_encoder) intel_set_pch_fifo_underrun_reporting(dev_priv, intel_crtc_pch_transcoder(crtc), true); + + return 0; } -static void -intel_dp_check_link_status(struct intel_dp *intel_dp) +/* + * If display is now connected check links status, + * there has been known issues of link loss triggering + * long pulse. + * + * Some sinks (eg. ASUS PB287Q) seem to perform some + * weird HPD ping pong during modesets. So we can apparently + * end up with HPD going low during a modeset, and then + * going back up soon after. And once that happens we must + * retrain the link to get a picture. That's in case no + * userspace component reacted to intermittent HPD dip. + */ +static bool intel_dp_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) { - struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; - struct drm_connector_state *conn_state = - intel_dp->attached_connector->base.state; - u8 link_status[DP_LINK_STATUS_SIZE]; - - WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); - - if (!intel_dp_get_link_status(intel_dp, link_status)) { - DRM_ERROR("Failed to get link status\n"); - return; - } + struct drm_modeset_acquire_ctx ctx; + bool changed; + int ret; - if (!conn_state->crtc) - return; + changed = intel_encoder_hotplug(encoder, connector); - WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex)); + drm_modeset_acquire_init(&ctx, 0); - if (!conn_state->crtc->state->active) - return; + for (;;) { + ret = intel_dp_retrain_link(encoder, &ctx); - if (conn_state->commit && - !try_wait_for_completion(&conn_state->commit->hw_done)) - return; + if (ret == -EDEADLK) { + drm_modeset_backoff(&ctx); + continue; + } - /* - * Validate the cached values of intel_dp->link_rate and - * intel_dp->lane_count before attempting to retrain. - */ - if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate, - intel_dp->lane_count)) - return; + break; + } - /* Retrain if Channel EQ or CR not ok */ - if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { - DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n", - intel_encoder->base.name); + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + WARN(ret, "Acquiring modeset locks failed with %i\n", ret); - intel_dp_retrain_link(intel_dp); - } + return changed; } /* @@ -4397,7 +4471,9 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n"); } - intel_dp_check_link_status(intel_dp); + /* defer to the hotplug work for link retraining if needed */ + if (intel_dp_needs_link_retrain(intel_dp)) + return false; if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); @@ -4782,20 +4858,6 @@ intel_dp_long_pulse(struct intel_connector *connector) */ status = connector_status_disconnected; goto out; - } else { - /* - * If display is now connected check links status, - * there has been known issues of link loss triggerring - * long pulse. - * - * Some sinks (eg. ASUS PB287Q) seem to perform some - * weird HPD ping pong during modesets. So we can apparently - * end up with HPD going low during a modeset, and then - * going back up soon after. And once that happens we must - * retrain the link to get a picture. That's in case no - * userspace component reacted to intermittent HPD dip. - */ - intel_dp_check_link_status(intel_dp); } /* @@ -5372,37 +5434,10 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) } if (!intel_dp->is_mst) { - struct drm_modeset_acquire_ctx ctx; - struct drm_connector *connector = &intel_dp->attached_connector->base; - struct drm_crtc *crtc; - int iret; - bool handled = false; - - drm_modeset_acquire_init(&ctx, 0); -retry: - iret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, &ctx); - if (iret) - goto err; - - crtc = connector->state->crtc; - if (crtc) { - iret = drm_modeset_lock(&crtc->mutex, &ctx); - if (iret) - goto err; - } + bool handled; handled = intel_dp_short_pulse(intel_dp); -err: - if (iret == -EDEADLK) { - drm_modeset_backoff(&ctx); - goto retry; - } - - drm_modeset_drop_locks(&ctx); - drm_modeset_acquire_fini(&ctx); - WARN(iret, "Acquiring modeset locks failed with %i\n", iret); - /* Short pulse can signify loss of hdcp authentication */ intel_hdcp_check_link(intel_dp->attached_connector); @@ -6393,6 +6428,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, "DP %c", port_name(port))) goto err_encoder_init; + intel_encoder->hotplug = intel_dp_hotplug; intel_encoder->compute_config = intel_dp_compute_config; intel_encoder->get_hw_state = intel_dp_get_hw_state; intel_encoder->get_config = intel_dp_get_config; diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index cf8fef8b6f58..f59b59bb0a21 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -248,6 +248,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) int tries; u32 training_pattern; uint8_t link_status[DP_LINK_STATUS_SIZE]; + bool channel_eq = false; training_pattern = intel_dp_training_pattern(intel_dp); @@ -259,7 +260,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) return false; } - intel_dp->channel_eq_status = false; for (tries = 0; tries < 5; tries++) { drm_dp_link_train_channel_eq_delay(intel_dp->dpcd); @@ -279,7 +279,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) if (drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { - intel_dp->channel_eq_status = true; + channel_eq = true; DRM_DEBUG_KMS("Channel EQ done. DP Training " "successful\n"); break; @@ -301,12 +301,14 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) intel_dp_set_idle_link_train(intel_dp); - return intel_dp->channel_eq_status; + return channel_eq; } void intel_dp_stop_link_train(struct intel_dp *intel_dp) { + intel_dp->link_trained = true; + intel_dp_set_link_train(intel_dp, DP_TRAINING_PATTERN_DISABLE); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 652b11e788cc..37d5412af8f5 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -215,7 +215,8 @@ struct intel_encoder { enum intel_output_type type; enum port port; unsigned int cloneable; - void (*hot_plug)(struct intel_encoder *); + bool (*hotplug)(struct intel_encoder *encoder, + struct intel_connector *connector); enum intel_output_type (*compute_output_type)(struct intel_encoder *, struct intel_crtc_state *, struct drm_connector_state *); @@ -1047,9 +1048,9 @@ struct intel_dp { uint8_t lane_count; uint8_t sink_count; bool link_mst; + bool link_trained; bool has_audio; bool detect_done; - bool channel_eq_status; bool reset_link_params; enum aux_ch aux_ch; uint8_t dpcd[DP_RECEIVER_CAP_SIZE]; @@ -1625,6 +1626,8 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, int link_rate, uint8_t lane_count); void intel_dp_start_link_train(struct intel_dp *intel_dp); void intel_dp_stop_link_train(struct intel_dp *intel_dp); +int intel_dp_retrain_link(struct intel_encoder *encoder, + struct drm_modeset_acquire_ctx *ctx); void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); void intel_dp_encoder_reset(struct drm_encoder *encoder); void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder); @@ -1704,7 +1707,8 @@ int intel_dsi_dcs_init_backlight_funcs(struct intel_connector *intel_connector); void intel_dvo_init(struct drm_i915_private *dev_priv); /* intel_hotplug.c */ void intel_hpd_poll_init(struct drm_i915_private *dev_priv); - +bool intel_encoder_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector); /* legacy fbdev emulation in intel_fbdev.c */ #ifdef CONFIG_DRM_FBDEV_EMULATION diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 3e1107ecb6ee..4ba139c27fba 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -123,6 +123,22 @@ static const struct engine_info intel_engines[] = { .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, }, + [VCS3] = { + .hw_id = VCS3_HW, + .uabi_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 2, + .mmio_base = GEN11_BSD3_RING_BASE, + .irq_shift = 0, /* not used */ + }, + [VCS4] = { + .hw_id = VCS4_HW, + .uabi_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 3, + .mmio_base = GEN11_BSD4_RING_BASE, + .irq_shift = 0, /* not used */ + }, [VECS] = { .hw_id = VECS_HW, .uabi_id = I915_EXEC_VEBOX, @@ -131,6 +147,14 @@ static const struct engine_info intel_engines[] = { .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, }, + [VECS2] = { + .hw_id = VECS2_HW, + .uabi_id = I915_EXEC_VEBOX, + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 1, + .mmio_base = GEN11_VEBOX2_RING_BASE, + .irq_shift = 0, /* not used */ + }, }; /** @@ -210,6 +234,9 @@ intel_engine_setup(struct drm_i915_private *dev_priv, GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); class_info = &intel_engine_classes[info->class]; + BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); + BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + if (GEM_WARN_ON(info->class > MAX_ENGINE_CLASS)) return -EINVAL; @@ -230,7 +257,25 @@ intel_engine_setup(struct drm_i915_private *dev_priv, class_info->name, info->instance) >= sizeof(engine->name)); engine->hw_id = engine->guc_id = info->hw_id; - engine->mmio_base = info->mmio_base; + if (INTEL_GEN(dev_priv) >= 11) { + switch (engine->id) { + case VCS: + engine->mmio_base = GEN11_BSD_RING_BASE; + break; + case VCS2: + engine->mmio_base = GEN11_BSD2_RING_BASE; + break; + case VECS: + engine->mmio_base = GEN11_VEBOX_RING_BASE; + break; + default: + /* take the original value for all other engines */ + engine->mmio_base = info->mmio_base; + break; + } + } else { + engine->mmio_base = info->mmio_base; + } engine->irq_shift = info->irq_shift; engine->class = info->class; engine->instance = info->instance; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 38a5535a5c63..707d49c12638 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -960,6 +960,30 @@ unlock: mutex_unlock(&fbc->lock); } +/** + * __intel_fbc_disable - disable FBC + * @dev_priv: i915 device instance + * + * This is the low level function that actually disables FBC. Callers should + * grab the FBC lock. + */ +static void __intel_fbc_disable(struct drm_i915_private *dev_priv) +{ + struct intel_fbc *fbc = &dev_priv->fbc; + struct intel_crtc *crtc = fbc->crtc; + + WARN_ON(!mutex_is_locked(&fbc->lock)); + WARN_ON(!fbc->enabled); + WARN_ON(fbc->active); + + DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe)); + + __intel_fbc_cleanup_cfb(dev_priv); + + fbc->enabled = false; + fbc->crtc = NULL; +} + static void __intel_fbc_post_update(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -971,6 +995,13 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc) if (!fbc->enabled || fbc->crtc != crtc) return; + if (!i915_modparams.enable_fbc) { + intel_fbc_deactivate(dev_priv, "disabled at runtime per module param"); + __intel_fbc_disable(dev_priv); + + return; + } + if (!intel_fbc_can_activate(crtc)) { WARN_ON(fbc->active); return; @@ -1175,31 +1206,6 @@ out: } /** - * __intel_fbc_disable - disable FBC - * @dev_priv: i915 device instance - * - * This is the low level function that actually disables FBC. Callers should - * grab the FBC lock. - */ -static void __intel_fbc_disable(struct drm_i915_private *dev_priv) -{ - struct intel_fbc *fbc = &dev_priv->fbc; - struct intel_crtc *crtc = fbc->crtc; - - WARN_ON(!mutex_is_locked(&fbc->lock)); - WARN_ON(!fbc->enabled); - WARN_ON(fbc->active); - WARN_ON(crtc->active); - - DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe)); - - __intel_fbc_cleanup_cfb(dev_priv); - - fbc->enabled = false; - fbc->crtc = NULL; -} - -/** * intel_fbc_disable - disable FBC if it's associated with crtc * @crtc: the CRTC * @@ -1213,6 +1219,8 @@ void intel_fbc_disable(struct intel_crtc *crtc) if (!fbc_supported(dev_priv)) return; + WARN_ON(crtc->active); + mutex_lock(&fbc->lock); if (fbc->crtc == crtc) __intel_fbc_disable(dev_priv); @@ -1235,8 +1243,10 @@ void intel_fbc_global_disable(struct drm_i915_private *dev_priv) return; mutex_lock(&fbc->lock); - if (fbc->enabled) + if (fbc->enabled) { + WARN_ON(fbc->crtc->active); __intel_fbc_disable(dev_priv); + } mutex_unlock(&fbc->lock); cancel_work_sync(&fbc->work.work); diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 7b5074e2120c..c0c2e7d1c7d7 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -61,8 +61,10 @@ static int guc_log_flush(struct intel_guc *guc) static int guc_log_control(struct intel_guc *guc, bool enable, u32 verbosity) { union guc_log_control control_val = { - .logging_enabled = enable, - .verbosity = verbosity, + { + .logging_enabled = enable, + .verbosity = verbosity, + }, }; u32 action[] = { INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING, diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index f5d7bfb43006..1baef4ac7ecb 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -2383,6 +2383,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, "HDMI %c", port_name(port)); + intel_encoder->hotplug = intel_encoder_hotplug; intel_encoder->compute_config = intel_hdmi_compute_config; if (HAS_PCH_SPLIT(dev_priv)) { intel_encoder->disable = pch_disable_hdmi; diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index fe28c1ea84a5..0e3d3e89d66a 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -274,24 +274,26 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) intel_runtime_pm_put(dev_priv); } -static bool intel_hpd_irq_event(struct drm_device *dev, - struct drm_connector *connector) +bool intel_encoder_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) { + struct drm_device *dev = connector->base.dev; enum drm_connector_status old_status; WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); - old_status = connector->status; + old_status = connector->base.status; - connector->status = drm_helper_probe_detect(connector, NULL, false); + connector->base.status = + drm_helper_probe_detect(&connector->base, NULL, false); - if (old_status == connector->status) + if (old_status == connector->base.status) return false; DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n", - connector->base.id, - connector->name, + connector->base.base.id, + connector->base.name, drm_get_connector_status_name(old_status), - drm_get_connector_status_name(connector->status)); + drm_get_connector_status_name(connector->base.status)); return true; } @@ -381,10 +383,9 @@ static void i915_hotplug_work_func(struct work_struct *work) if (hpd_event_bits & (1 << intel_encoder->hpd_pin)) { DRM_DEBUG_KMS("Connector %s (pin %i) received hotplug event.\n", connector->name, intel_encoder->hpd_pin); - if (intel_encoder->hot_plug) - intel_encoder->hot_plug(intel_encoder); - if (intel_hpd_irq_event(dev, connector)) - changed = true; + + changed |= intel_encoder->hotplug(intel_encoder, + intel_connector); } } drm_connector_list_iter_end(&conn_iter); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 36b376e4b105..3a69b367e565 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -204,6 +204,18 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * bits 32-52: ctx ID, a globally unique tag * bits 53-54: mbz, reserved for use by hardware * bits 55-63: group ID, currently unused and set to 0 + * + * Starting from Gen11, the upper dword of the descriptor has a new format: + * + * bits 32-36: reserved + * bits 37-47: SW context ID + * bits 48:53: engine instance + * bit 54: mbz, reserved for use by hardware + * bits 55-60: SW counter + * bits 61-63: engine class + * + * engine info, SW context ID and SW counter need to form a unique number + * (Context ID) per lrc. */ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, @@ -212,12 +224,32 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, struct intel_context *ce = &ctx->engine[engine->id]; u64 desc; - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH)); + BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); desc = ctx->desc_template; /* bits 0-11 */ + GEM_BUG_ON(desc & GENMASK_ULL(63, 12)); + desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; /* bits 12-31 */ - desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ + GEM_BUG_ON(desc & GENMASK_ULL(63, 32)); + + if (INTEL_GEN(ctx->i915) >= 11) { + GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); + desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; + /* bits 37-47 */ + + desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; + /* bits 48-53 */ + + /* TODO: decide what to do with SW counter (bits 55-60) */ + + desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; + /* bits 61-63 */ + } else { + GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); + desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ + } ce->lrc_desc = desc; } @@ -385,18 +417,30 @@ static u64 execlists_update_context(struct i915_request *rq) return ce->lrc_desc; } -static inline void elsp_write(u64 desc, u32 __iomem *elsp) +static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) { - writel(upper_32_bits(desc), elsp); - writel(lower_32_bits(desc), elsp); + if (execlists->ctrl_reg) { + writel(lower_32_bits(desc), execlists->submit_reg + port * 2); + writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); + } else { + writel(upper_32_bits(desc), execlists->submit_reg); + writel(lower_32_bits(desc), execlists->submit_reg); + } } static void execlists_submit_ports(struct intel_engine_cs *engine) { - struct execlist_port *port = engine->execlists.port; + struct intel_engine_execlists *execlists = &engine->execlists; + struct execlist_port *port = execlists->port; unsigned int n; - for (n = execlists_num_ports(&engine->execlists); n--; ) { + /* + * ELSQ note: the submit queue is not cleared after being submitted + * to the HW so we need to make sure we always clean it up. This is + * currently ensured by the fact that we always write the same number + * of elsq entries, keep this in mind before changing the loop below. + */ + for (n = execlists_num_ports(execlists); n--; ) { struct i915_request *rq; unsigned int count; u64 desc; @@ -420,9 +464,14 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = 0; } - elsp_write(desc, engine->execlists.elsp); + write_desc(execlists, desc, n); } - execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); + + /* we need to manually load the submit queue */ + if (execlists->ctrl_reg) + writel(EL_CTRL_LOAD, execlists->ctrl_reg); + + execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } static bool ctx_single_port_submission(const struct i915_gem_context *ctx) @@ -455,11 +504,12 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) static void inject_preempt_context(struct intel_engine_cs *engine) { + struct intel_engine_execlists *execlists = &engine->execlists; struct intel_context *ce = &engine->i915->preempt_context->engine[engine->id]; unsigned int n; - GEM_BUG_ON(engine->execlists.preempt_complete_status != + GEM_BUG_ON(execlists->preempt_complete_status != upper_32_bits(ce->lrc_desc)); GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] & _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | @@ -472,10 +522,15 @@ static void inject_preempt_context(struct intel_engine_cs *engine) * the state of the GPU is known (idle). */ GEM_TRACE("%s\n", engine->name); - for (n = execlists_num_ports(&engine->execlists); --n; ) - elsp_write(0, engine->execlists.elsp); + for (n = execlists_num_ports(execlists); --n; ) + write_desc(execlists, 0, n); + + write_desc(execlists, ce->lrc_desc, n); + + /* we need to manually load the submit queue */ + if (execlists->ctrl_reg) + writel(EL_CTRL_LOAD, execlists->ctrl_reg); - elsp_write(ce->lrc_desc, engine->execlists.elsp); execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); } @@ -672,7 +727,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) GEM_BUG_ON(!execlists->active); intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); + + execlists_context_status_change(rq, + i915_request_completed(rq) ? + INTEL_CONTEXT_SCHEDULE_OUT : + INTEL_CONTEXT_SCHEDULE_PREEMPTED); + i915_request_put(rq); memset(port, 0, sizeof(*port)); @@ -2037,8 +2097,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->set_default_submission = execlists_set_default_submission; - engine->irq_enable = gen8_logical_ring_enable_irq; - engine->irq_disable = gen8_logical_ring_disable_irq; + if (INTEL_GEN(engine->i915) < 11) { + engine->irq_enable = gen8_logical_ring_enable_irq; + engine->irq_disable = gen8_logical_ring_disable_irq; + } else { + /* + * TODO: On Gen11 interrupt masks need to be clear + * to allow C6 entry. Keep interrupts enabled at + * and take the hit of generating extra interrupts + * until a more refined solution exists. + */ + } engine->emit_bb_start = gen8_emit_bb_start; } @@ -2090,8 +2159,15 @@ static int logical_ring_init(struct intel_engine_cs *engine) if (ret) goto error; - engine->execlists.elsp = - engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + if (HAS_LOGICAL_RING_ELSQ(engine->i915)) { + engine->execlists.submit_reg = engine->i915->regs + + i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine)); + engine->execlists.ctrl_reg = engine->i915->regs + + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine)); + } else { + engine->execlists.submit_reg = engine->i915->regs + + i915_mmio_reg_offset(RING_ELSP(engine)); + } engine->execlists.preempt_complete_status = ~0u; if (engine->i915->preempt_context) @@ -2177,7 +2253,7 @@ make_rpcs(struct drm_i915_private *dev_priv) if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { rpcs |= GEN8_RPCS_SS_CNT_ENABLE; - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) << + rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) << GEN8_RPCS_SS_CNT_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } @@ -2201,6 +2277,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) default: MISSING_CASE(INTEL_GEN(engine->i915)); /* fall through */ + case 11: + indirect_ctx_offset = + GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + break; case 10: indirect_ctx_offset = GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; @@ -2360,7 +2440,7 @@ populate_lr_context(struct i915_gem_context *ctx, if (!engine->default_state) regs[CTX_CONTEXT_CONTROL + 1] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - if (ctx == ctx->i915->preempt_context) + if (ctx == ctx->i915->preempt_context && INTEL_GEN(engine->i915) < 11) regs[CTX_CONTEXT_CONTROL + 1] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 636ced41225d..59d7b86012e9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -42,6 +42,9 @@ #define RING_CONTEXT_STATUS_BUF_LO(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8) #define RING_CONTEXT_STATUS_BUF_HI(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8 + 4) #define RING_CONTEXT_STATUS_PTR(engine) _MMIO((engine)->mmio_base + 0x3a0) +#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510) +#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550) +#define EL_CTRL_LOAD (1 << 0) /* The docs specify that the write pointer wraps around after 5h, "After status * is written out to the last available status QW at offset 5h, this pointer diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/intel_lrc_reg.h index a53336e2fc97..169a2239d6c7 100644 --- a/drivers/gpu/drm/i915/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/intel_lrc_reg.h @@ -63,5 +63,6 @@ #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 #define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19 +#define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A #endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3e60279f18b1..b8da4dcdd584 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8492,7 +8492,7 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) if (!HAS_PCH_CNP(dev_priv)) return; - /* Display WA #1181: cnp */ + /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */ I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) | CNP_PWM_CGE_GATING_DISABLE); } @@ -8522,7 +8522,13 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) val |= SARBUNIT_CLKGATE_DIS; I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); + /* Wa_2201832410:cnl */ + val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE); + val |= GWUNIT_CLKGATE_DIS; + I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val); + /* WaDisableVFclkgate:cnl */ + /* WaVFUnitClockGatingDisable:cnl */ val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE); val |= VFUNIT_CLKGATE_DIS; I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val); diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 05770790a4e9..23175c5c4a50 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -451,8 +451,9 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + int crtc_hdisplay = crtc_state->base.adjusted_mode.crtc_hdisplay; + int crtc_vdisplay = crtc_state->base.adjusted_mode.crtc_vdisplay; + int psr_max_h = 0, psr_max_v = 0; /* * FIXME psr2_support is messed up. It's both computed @@ -462,10 +463,18 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, if (!dev_priv->psr.psr2_support) return false; - /* PSR2 is restricted to work with panel resolutions up to 3640x2304 */ - if (adjusted_mode->crtc_hdisplay > 3640 || - adjusted_mode->crtc_vdisplay > 2304) { - DRM_DEBUG_KMS("PSR2 not enabled, panel resolution too big\n"); + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + psr_max_h = 4096; + psr_max_v = 2304; + } else if (IS_GEN9(dev_priv)) { + psr_max_h = 3640; + psr_max_v = 2304; + } + + if (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v) { + DRM_DEBUG_KMS("PSR2 not enabled, resolution %dx%d > max supported %dx%d\n", + crtc_hdisplay, crtc_vdisplay, + psr_max_h, psr_max_v); return false; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 90e4380cbdd5..0320c2c4cfba 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -92,7 +92,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) #define instdone_subslice_mask(dev_priv__) \ (INTEL_GEN(dev_priv__) == 7 ? \ - 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) + 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ for ((slice__) = 0, (subslice__) = 0; \ @@ -209,9 +209,17 @@ struct intel_engine_execlists { bool no_priolist; /** - * @elsp: the ExecList Submission Port register + * @submit_reg: gen-specific execlist submission register + * set to the ExecList Submission Port (elsp) register pre-Gen11 and to + * the ExecList Submission Queue Contents register array for Gen11+ */ - u32 __iomem *elsp; + u32 __iomem *submit_reg; + + /** + * @ctrl_reg: the enhanced execlists control register, used to load the + * submit queue on the HW and to request preemptions to idle + */ + u32 __iomem *ctrl_reg; /** * @port: execlist port states @@ -356,9 +364,9 @@ struct intel_engine_cs { spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ struct rb_root waiters; /* sorted by retirement, priority */ - struct rb_root signals; /* sorted by retirement */ + struct list_head signals; /* sorted by retirement */ struct task_struct *signaler; /* used for fence signalling */ - struct i915_request __rcu *first_signal; + struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ @@ -951,7 +959,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) { diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 0c14d1c04cbd..96e213ec202d 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1705,7 +1705,15 @@ static void intel_sdvo_enable_hotplug(struct intel_encoder *encoder) struct intel_sdvo *intel_sdvo = to_sdvo(encoder); intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, - &intel_sdvo->hotplug_active, 2); + &intel_sdvo->hotplug_active, 2); +} + +static bool intel_sdvo_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) +{ + intel_sdvo_enable_hotplug(encoder); + + return intel_encoder_hotplug(encoder, connector); } static bool @@ -2516,7 +2524,7 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) * Some SDVO devices have one-shot hotplug interrupts. * Ensure that they get re-enabled when an interrupt happens. */ - intel_encoder->hot_plug = intel_sdvo_enable_hotplug; + intel_encoder->hotplug = intel_sdvo_hotplug; intel_sdvo_enable_hotplug(intel_encoder); } else { intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 5ae9a62712ca..4df7c2ef8576 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -37,6 +37,12 @@ static const char * const forcewake_domain_names[] = { "render", "blitter", "media", + "vdbox0", + "vdbox1", + "vdbox2", + "vdbox3", + "vebox0", + "vebox1", }; const char * @@ -774,6 +780,9 @@ void assert_forcewakes_active(struct drm_i915_private *dev_priv, /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000) +#define GEN11_NEEDS_FORCE_WAKE(reg) \ + ((reg) < 0x40000 || ((reg) >= 0x1c0000 && (reg) < 0x1dc000)) + #define __gen6_reg_read_fw_domains(offset) \ ({ \ enum forcewake_domains __fwd; \ @@ -826,6 +835,14 @@ find_fw_domain(struct drm_i915_private *dev_priv, u32 offset) if (!entry) return 0; + /* + * The list of FW domains depends on the SKU in gen11+ so we + * can't determine it statically. We use FORCEWAKE_ALL and + * translate it here to the list of available domains. + */ + if (entry->domains == FORCEWAKE_ALL) + return dev_priv->uncore.fw_domains; + WARN(entry->domains & ~dev_priv->uncore.fw_domains, "Uninitialized forcewake domain(s) 0x%x accessed at 0x%x\n", entry->domains & ~dev_priv->uncore.fw_domains, offset); @@ -860,6 +877,14 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { __fwd; \ }) +#define __gen11_fwtable_reg_read_fw_domains(offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + if (GEN11_NEEDS_FORCE_WAKE((offset))) \ + __fwd = find_fw_domain(dev_priv, offset); \ + __fwd; \ +}) + /* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const i915_reg_t gen8_shadowed_regs[] = { RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ @@ -871,6 +896,20 @@ static const i915_reg_t gen8_shadowed_regs[] = { /* TODO: Other registers are not yet used */ }; +static const i915_reg_t gen11_shadowed_regs[] = { + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + /* TODO: Other registers are not yet used */ +}; + static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) { u32 offset = i915_mmio_reg_offset(*reg); @@ -883,14 +922,17 @@ static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) return 0; } -static bool is_gen8_shadowed(u32 offset) -{ - const i915_reg_t *regs = gen8_shadowed_regs; - - return BSEARCH(offset, regs, ARRAY_SIZE(gen8_shadowed_regs), - mmio_reg_cmp); +#define __is_genX_shadowed(x) \ +static bool is_gen##x##_shadowed(u32 offset) \ +{ \ + const i915_reg_t *regs = gen##x##_shadowed_regs; \ + return BSEARCH(offset, regs, ARRAY_SIZE(gen##x##_shadowed_regs), \ + mmio_reg_cmp); \ } +__is_genX_shadowed(8) +__is_genX_shadowed(11) + #define __gen8_reg_write_fw_domains(offset) \ ({ \ enum forcewake_domains __fwd; \ @@ -929,6 +971,14 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { __fwd; \ }) +#define __gen11_fwtable_reg_write_fw_domains(offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + if (GEN11_NEEDS_FORCE_WAKE((offset)) && !is_gen11_shadowed(offset)) \ + __fwd = find_fw_domain(dev_priv, offset); \ + __fwd; \ +}) + /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), @@ -965,6 +1015,40 @@ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_MEDIA), }; +/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ +static const struct intel_forcewake_range __gen11_fw_ranges[] = { + GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */ + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x4000, 0x51ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8160, 0x82ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8500, 0x8bff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8c00, 0x8cff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8d00, 0x93ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL), + GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xb480, 0xdfff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xe000, 0xe8ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xe900, 0x243ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x40000, 0x1bffff, 0), + GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), + GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), + GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), + GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), + GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), + GEN_FW_RANGE(0x1d8000, 0x1dbfff, FORCEWAKE_MEDIA_VEBOX1) +}; + static void ilk_dummy_write(struct drm_i915_private *dev_priv) { @@ -1095,7 +1179,12 @@ func##_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { } #define __gen6_read(x) __gen_read(gen6, x) #define __fwtable_read(x) __gen_read(fwtable, x) +#define __gen11_fwtable_read(x) __gen_read(gen11_fwtable, x) +__gen11_fwtable_read(8) +__gen11_fwtable_read(16) +__gen11_fwtable_read(32) +__gen11_fwtable_read(64) __fwtable_read(8) __fwtable_read(16) __fwtable_read(32) @@ -1105,6 +1194,7 @@ __gen6_read(16) __gen6_read(32) __gen6_read(64) +#undef __gen11_fwtable_read #undef __fwtable_read #undef __gen6_read #undef GEN6_READ_FOOTER @@ -1181,7 +1271,11 @@ func##_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, boo } #define __gen8_write(x) __gen_write(gen8, x) #define __fwtable_write(x) __gen_write(fwtable, x) +#define __gen11_fwtable_write(x) __gen_write(gen11_fwtable, x) +__gen11_fwtable_write(8) +__gen11_fwtable_write(16) +__gen11_fwtable_write(32) __fwtable_write(8) __fwtable_write(16) __fwtable_write(32) @@ -1192,6 +1286,7 @@ __gen6_write(8) __gen6_write(16) __gen6_write(32) +#undef __gen11_fwtable_write #undef __fwtable_write #undef __gen8_write #undef __gen6_write @@ -1240,6 +1335,13 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER)); BUILD_BUG_ON(FORCEWAKE_BLITTER != (1 << FW_DOMAIN_ID_BLITTER)); BUILD_BUG_ON(FORCEWAKE_MEDIA != (1 << FW_DOMAIN_ID_MEDIA)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX0)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX1)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX2)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX3)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX0)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1)); + d->mask = BIT(domain_id); @@ -1267,7 +1369,34 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) dev_priv->uncore.fw_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL); } - if (INTEL_GEN(dev_priv) >= 9) { + if (INTEL_GEN(dev_priv) >= 11) { + int i; + + dev_priv->uncore.funcs.force_wake_get = fw_domains_get; + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; + fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, + FORCEWAKE_RENDER_GEN9, + FORCEWAKE_ACK_RENDER_GEN9); + fw_domain_init(dev_priv, FW_DOMAIN_ID_BLITTER, + FORCEWAKE_BLITTER_GEN9, + FORCEWAKE_ACK_BLITTER_GEN9); + for (i = 0; i < I915_MAX_VCS; i++) { + if (!HAS_ENGINE(dev_priv, _VCS(i))) + continue; + + fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA_VDBOX0 + i, + FORCEWAKE_MEDIA_VDBOX_GEN11(i), + FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i)); + } + for (i = 0; i < I915_MAX_VECS; i++) { + if (!HAS_ENGINE(dev_priv, _VECS(i))) + continue; + + fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA_VEBOX0 + i, + FORCEWAKE_MEDIA_VEBOX_GEN11(i), + FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i)); + } + } else if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_fallback; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; @@ -1422,10 +1551,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen8); ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6); } - } else { + } else if (IS_GEN(dev_priv, 9, 10)) { ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable); ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable); + } else { + ASSIGN_FW_DOMAINS_TABLE(__gen11_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen11_fwtable); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen11_fwtable); } iosf_mbi_register_pmic_bus_access_notifier( @@ -1994,7 +2127,9 @@ intel_uncore_forcewake_for_read(struct drm_i915_private *dev_priv, u32 offset = i915_mmio_reg_offset(reg); enum forcewake_domains fw_domains; - if (HAS_FWTABLE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 11) { + fw_domains = __gen11_fwtable_reg_read_fw_domains(offset); + } else if (HAS_FWTABLE(dev_priv)) { fw_domains = __fwtable_reg_read_fw_domains(offset); } else if (INTEL_GEN(dev_priv) >= 6) { fw_domains = __gen6_reg_read_fw_domains(offset); @@ -2015,7 +2150,9 @@ intel_uncore_forcewake_for_write(struct drm_i915_private *dev_priv, u32 offset = i915_mmio_reg_offset(reg); enum forcewake_domains fw_domains; - if (HAS_FWTABLE(dev_priv) && !IS_VALLEYVIEW(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 11) { + fw_domains = __gen11_fwtable_reg_write_fw_domains(offset); + } else if (HAS_FWTABLE(dev_priv) && !IS_VALLEYVIEW(dev_priv)) { fw_domains = __fwtable_reg_write_fw_domains(offset); } else if (IS_GEN8(dev_priv)) { fw_domains = __gen8_reg_write_fw_domains(offset); diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 53ef77d0c97c..dfdf444e4bcc 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -37,17 +37,28 @@ enum forcewake_domain_id { FW_DOMAIN_ID_RENDER = 0, FW_DOMAIN_ID_BLITTER, FW_DOMAIN_ID_MEDIA, + FW_DOMAIN_ID_MEDIA_VDBOX0, + FW_DOMAIN_ID_MEDIA_VDBOX1, + FW_DOMAIN_ID_MEDIA_VDBOX2, + FW_DOMAIN_ID_MEDIA_VDBOX3, + FW_DOMAIN_ID_MEDIA_VEBOX0, + FW_DOMAIN_ID_MEDIA_VEBOX1, FW_DOMAIN_ID_COUNT }; enum forcewake_domains { - FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), - FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), - FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), - FORCEWAKE_ALL = (FORCEWAKE_RENDER | - FORCEWAKE_BLITTER | - FORCEWAKE_MEDIA) + FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), + FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), + FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), + FORCEWAKE_MEDIA_VDBOX0 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX0), + FORCEWAKE_MEDIA_VDBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX1), + FORCEWAKE_MEDIA_VDBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX2), + FORCEWAKE_MEDIA_VDBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX3), + FORCEWAKE_MEDIA_VEBOX0 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX0), + FORCEWAKE_MEDIA_VEBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX1), + + FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1 }; struct intel_uncore_funcs { diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 2f6367643171..f76f2597df5c 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -61,20 +61,30 @@ static int intel_fw_table_check(const struct intel_forcewake_range *ranges, static int intel_shadow_table_check(void) { - const i915_reg_t *reg = gen8_shadowed_regs; - unsigned int i; + struct { + const i915_reg_t *regs; + unsigned int size; + } reg_lists[] = { + { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, + { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, + }; + const i915_reg_t *reg; + unsigned int i, j; s32 prev; - for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { - u32 offset = i915_mmio_reg_offset(*reg); + for (j = 0; j < ARRAY_SIZE(reg_lists); ++j) { + reg = reg_lists[j].regs; + for (i = 0, prev = -1; i < reg_lists[j].size; i++, reg++) { + u32 offset = i915_mmio_reg_offset(*reg); - if (prev >= (s32)offset) { - pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", - __func__, i, offset, prev); - return -EINVAL; - } + if (prev >= (s32)offset) { + pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", + __func__, i, offset, prev); + return -EINVAL; + } - prev = offset; + prev = offset; + } } return 0; @@ -90,6 +100,7 @@ int intel_uncore_mock_selftests(void) { __vlv_fw_ranges, ARRAY_SIZE(__vlv_fw_ranges), false }, { __chv_fw_ranges, ARRAY_SIZE(__chv_fw_ranges), false }, { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, + { __gen11_fw_ranges, ARRAY_SIZE(__gen11_fw_ranges), true }, }; int err, i; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 29fa48e4755d..7f5634ce8e88 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -318,6 +318,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_PERF_OPEN 0x36 #define DRM_I915_PERF_ADD_CONFIG 0x37 #define DRM_I915_PERF_REMOVE_CONFIG 0x38 +#define DRM_I915_QUERY 0x39 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -375,6 +376,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) +#define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -1606,15 +1608,115 @@ struct drm_i915_perf_oa_config { __u32 n_flex_regs; /* - * These fields are pointers to tuples of u32 values (register - * address, value). For example the expected length of the buffer - * pointed by mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). + * These fields are pointers to tuples of u32 values (register address, + * value). For example the expected length of the buffer pointed by + * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). */ __u64 mux_regs_ptr; __u64 boolean_regs_ptr; __u64 flex_regs_ptr; }; +struct drm_i915_query_item { + __u64 query_id; +#define DRM_I915_QUERY_TOPOLOGY_INFO 1 + + /* + * When set to zero by userspace, this is filled with the size of the + * data to be written at the data_ptr pointer. The kernel sets this + * value to a negative value to signal an error on a particular query + * item. + */ + __s32 length; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * Data will be written at the location pointed by data_ptr when the + * value of length matches the length of the data to be written by the + * kernel. + */ + __u64 data_ptr; +}; + +struct drm_i915_query { + __u32 num_items; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * This points to an array of num_items drm_i915_query_item structures. + */ + __u64 items_ptr; +}; + +/* + * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO : + * + * data: contains the 3 pieces of information : + * + * - the slice mask with one bit per slice telling whether a slice is + * available. The availability of slice X can be queried with the following + * formula : + * + * (data[X / 8] >> (X % 8)) & 1 + * + * - the subslice mask for each slice with one bit per subslice telling + * whether a subslice is available. The availability of subslice Y in slice + * X can be queried with the following formula : + * + * (data[subslice_offset + + * X * subslice_stride + + * Y / 8] >> (Y % 8)) & 1 + * + * - the EU mask for each subslice in each slice with one bit per EU telling + * whether an EU is available. The availability of EU Z in subslice Y in + * slice X can be queried with the following formula : + * + * (data[eu_offset + + * (X * max_subslices + Y) * eu_stride + + * Z / 8] >> (Z % 8)) & 1 + */ +struct drm_i915_query_topology_info { + /* + * Unused for now. Must be cleared to zero. + */ + __u16 flags; + + __u16 max_slices; + __u16 max_subslices; + __u16 max_eus_per_subslice; + + /* + * Offset in data[] at which the subslice masks are stored. + */ + __u16 subslice_offset; + + /* + * Stride at which each of the subslice masks for each slice are + * stored. + */ + __u16 subslice_stride; + + /* + * Offset in data[] at which the EU masks are stored. + */ + __u16 eu_offset; + + /* + * Stride at which each of the EU masks for each subslice are stored. + */ + __u16 eu_stride; + + __u8 data[]; +}; + #if defined(__cplusplus) } #endif |