From 7b92c1bd0540b64f54d98331d67e57266f9343c4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 28 Jun 2017 13:35:48 +0100 Subject: drm/i915: Avoid keeping waitboost active for signaling threads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once a client has requested a waitboost, we keep that waitboost active until all clients are no longer waiting. This is because we don't distinguish which waiter deserves the boost. However, with the advent of fence signaling, the signaler threads appear as waiters to the RPS interrupt handler. So instead of using a single boolean to track when to keep the waitboost active, use a counter of all outstanding waitboosted requests. At this point, I have removed all vestiges of the rate limiting on clients. Whilst this means that compositors should remain more fluid, it also means that boosts are more prevalent. See commit b29c19b64528 ("drm/i915: Boost RPS frequency for CPU stalls") for a longer discussion on the pros and cons of both approaches. A drawback of this implementation is that it requires constant request submission to keep the waitboost trimmed (as it is now cancelled when the request is completed). This will be fine for a busy system, but near idle the boosts may be kept for longer than desired (effectively tens of vblanks worstcase) and there is a reliance on rc6 instead. v2: Remove defunct rps.client_lock Reported-by: Michał Winiarski Signed-off-by: Chris Wilson Cc: Michał Winiarski Reviewed-by: Michał Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/20170628123548.9236-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 57 ++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 35 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 48ea0fca1f72..c3fcadfa0ae7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6126,47 +6126,35 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } mutex_unlock(&dev_priv->rps.hw_lock); - - spin_lock(&dev_priv->rps.client_lock); - while (!list_empty(&dev_priv->rps.clients)) - list_del_init(dev_priv->rps.clients.next); - spin_unlock(&dev_priv->rps.client_lock); } -void gen6_rps_boost(struct drm_i915_private *dev_priv, - struct intel_rps_client *rps, - unsigned long submitted) +void gen6_rps_boost(struct drm_i915_gem_request *rq, + struct intel_rps_client *rps) { + struct drm_i915_private *i915 = rq->i915; + bool boost; + /* This is intentionally racy! We peek at the state here, then * validate inside the RPS worker. */ - if (!(dev_priv->gt.awake && - dev_priv->rps.enabled && - dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) + if (!i915->rps.enabled) return; - /* Force a RPS boost (and don't count it against the client) if - * the GPU is severely congested. - */ - if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) - rps = NULL; - - spin_lock(&dev_priv->rps.client_lock); - if (rps == NULL || list_empty(&rps->link)) { - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->rps.interrupts_enabled) { - dev_priv->rps.client_boost = true; - schedule_work(&dev_priv->rps.work); - } - spin_unlock_irq(&dev_priv->irq_lock); - - if (rps != NULL) { - list_add(&rps->link, &dev_priv->rps.clients); - rps->boosts++; - } else - dev_priv->rps.boosts++; + boost = false; + spin_lock_irq(&rq->lock); + if (!rq->waitboost && !i915_gem_request_completed(rq)) { + atomic_inc(&i915->rps.num_waiters); + rq->waitboost = true; + boost = true; } - spin_unlock(&dev_priv->rps.client_lock); + spin_unlock_irq(&rq->lock); + if (!boost) + return; + + if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq) + schedule_work(&i915->rps.work); + + atomic_inc(rps ? &rps->boosts : &i915->rps.boosts); } int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) @@ -9113,7 +9101,7 @@ static void __intel_rps_boost_work(struct work_struct *work) struct drm_i915_gem_request *req = boost->req; if (!i915_gem_request_completed(req)) - gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); + gen6_rps_boost(req, NULL); i915_gem_request_put(req); kfree(boost); @@ -9142,11 +9130,10 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) void intel_pm_setup(struct drm_i915_private *dev_priv) { mutex_init(&dev_priv->rps.hw_lock); - spin_lock_init(&dev_priv->rps.client_lock); INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, __intel_autoenable_gt_powersave); - INIT_LIST_HEAD(&dev_priv->rps.clients); + atomic_set(&dev_priv->rps.num_waiters, 0); dev_priv->pm.suspended = false; atomic_set(&dev_priv->pm.wakeref_count, 0); -- cgit v1.2.3 From 35ceabf3cdb557b23bbc09f0b6f7bb2b545185b1 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 6 Jul 2017 13:41:13 -0700 Subject: drm/i915/cnl: Inherit RPS stuff from previous platforms. Apparently no change on RPS stuff from previous platforms. v2: Merging to rps related patches in one and also adding missed cases. Cc: David Weinehall Signed-off-by: Rodrigo Vivi Reviewed-by: David Weinehall Link: http://patchwork.freedesktop.org/patch/msgid/1499373673-25066-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 20 ++++++++++++-------- drivers/gpu/drm/i915/i915_reg.h | 4 ++-- drivers/gpu/drm/i915/i915_sysfs.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 18 +++++++++--------- 4 files changed, 24 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 643f56b8b87c..ca2e34b1c798 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1159,7 +1159,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); reqf = I915_READ(GEN6_RPNSWREQ); - if (IS_GEN9(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) reqf >>= 23; else { reqf &= ~GEN6_TURBO_DISABLE; @@ -1181,7 +1181,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - if (IS_GEN9(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; @@ -1210,7 +1210,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) dev_priv->rps.pm_intrmsk_mbz); seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); seq_printf(m, "Render p-state ratio: %d\n", - (gt_perf_status & (IS_GEN9(dev_priv) ? 0x1ff00 : 0xff00)) >> 8); + (gt_perf_status & (INTEL_GEN(dev_priv) >= 9 ? 0x1ff00 : 0xff00)) >> 8); seq_printf(m, "Render p-state VID: %d\n", gt_perf_status & 0xff); seq_printf(m, "Render p-state limit: %d\n", @@ -1241,18 +1241,21 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; - max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) || + IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; - max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) || + IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; - max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) || + IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -1855,7 +1858,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) if (ret) goto out; - if (IS_GEN9_BC(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; @@ -1875,7 +1878,8 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) &ia_freq); seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", intel_gpu_freq(dev_priv, (gpu_freq * - (IS_GEN9_BC(dev_priv) ? + (IS_GEN9_BC(dev_priv) || + IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 64cc674b652a..21ab12f4e72a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3522,7 +3522,7 @@ enum skl_disp_power_wells { #define INTERVAL_1_28_US(us) roundup(((us) * 100) >> 7, 25) #define INTERVAL_1_33_US(us) (((us) * 3) >> 2) #define INTERVAL_0_833_US(us) (((us) * 6) / 5) -#define GT_INTERVAL_FROM_US(dev_priv, us) (IS_GEN9(dev_priv) ? \ +#define GT_INTERVAL_FROM_US(dev_priv, us) (INTEL_GEN(dev_priv) >= 9 ? \ (IS_GEN9_LP(dev_priv) ? \ INTERVAL_0_833_US(us) : \ INTERVAL_1_33_US(us)) : \ @@ -3531,7 +3531,7 @@ enum skl_disp_power_wells { #define INTERVAL_1_28_TO_US(interval) (((interval) << 7) / 100) #define INTERVAL_1_33_TO_US(interval) (((interval) << 2) / 3) #define INTERVAL_0_833_TO_US(interval) (((interval) * 5) / 6) -#define GT_PM_INTERVAL_TO_US(dev_priv, interval) (IS_GEN9(dev_priv) ? \ +#define GT_PM_INTERVAL_TO_US(dev_priv, interval) (INTEL_GEN(dev_priv) >= 9 ? \ (IS_GEN9_LP(dev_priv) ? \ INTERVAL_0_833_TO_US(interval) : \ INTERVAL_1_33_TO_US(interval)) : \ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 3736c9f79197..7fcf00622c4c 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -253,7 +253,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, ret = intel_gpu_freq(dev_priv, (freq >> 8) & 0xff); } else { u32 rpstat = I915_READ(GEN6_RPSTAT1); - if (IS_GEN9(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) ret = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) ret = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c3fcadfa0ae7..6db833e6dcbd 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5852,7 +5852,7 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) * the hw runs at the minimal clock before selecting the desired * frequency, if the down threshold expires in that window we will not * receive a down interrupt. */ - if (IS_GEN9(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { limits = (dev_priv->rps.max_freq_softlimit) << 23; if (val <= dev_priv->rps.min_freq_softlimit) limits |= (dev_priv->rps.min_freq_softlimit) << 14; @@ -5994,7 +5994,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) if (val != dev_priv->rps.cur_freq) { gen6_set_rps_thresholds(dev_priv, val); - if (IS_GEN9(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) I915_WRITE(GEN6_RPNSWREQ, GEN9_FREQUENCY(val)); else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) @@ -6353,7 +6353,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_GEN9_BC(dev_priv)) { + IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { u32 ddcc_status = 0; if (sandybridge_pcode_read(dev_priv, @@ -6366,7 +6366,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.max_freq); } - if (IS_GEN9_BC(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -6672,7 +6672,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) /* convert DDR frequency from units of 266.6MHz to bandwidth */ min_ring_freq = mult_frac(min_ring_freq, 8, 3); - if (IS_GEN9_BC(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; @@ -6690,7 +6690,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) int diff = max_gpu_freq - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (IS_GEN9_BC(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* * ring_freq = 2 * GT. ring_freq is in 100MHz units * No floor required for ring frequency on SKL. @@ -7821,7 +7821,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); - if (IS_GEN9_BC(dev_priv)) + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); @@ -9066,7 +9066,7 @@ static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) { - if (IS_GEN9(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); else if (IS_CHERRYVIEW(dev_priv)) @@ -9079,7 +9079,7 @@ int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) { - if (IS_GEN9(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, GT_FREQUENCY_MULTIPLIER); else if (IS_CHERRYVIEW(dev_priv)) -- cgit v1.2.3 From eed02a7b53131abb796ba8a8cf2886cee366a89f Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 5 Jul 2017 20:01:45 +0530 Subject: drm/i915: Always perform internal fixed16 division in 64 bits This patch combines fixed_16_16_div & fixed_16_16_div_u64 wrappers. And new fixed_16_16_div wrapper always performs division operation in u64 internally, to avoid any data loss which was happening in earlier version of wrapper. earlier wrapper was converting u32 to fixed16 in 32 bit so we were losing 16-MSB data. Signed-off-by: Mahesh Kumar Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170705143154.32132-3-mahesh1.kumar@intel.com [mlankhorst: Fix typo in commit message.] --- drivers/gpu/drm/i915/i915_drv.h | 9 --------- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1b525051bf5f..95d5328a26e9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -196,15 +196,6 @@ static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val, } static inline uint_fixed_16_16_t fixed_16_16_div(uint32_t val, uint32_t d) -{ - uint_fixed_16_16_t fp, res; - - fp = u32_to_fixed_16_16(val); - res.val = DIV_ROUND_UP(fp.val, d); - return res; -} - -static inline uint_fixed_16_16_t fixed_16_16_div_u64(uint32_t val, uint32_t d) { uint64_t interm_val; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6db833e6dcbd..05eabadaa23d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4276,7 +4276,7 @@ static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, return FP_16_16_MAX; wm_intermediate_val = latency * pixel_rate * cpp; - ret = fixed_16_16_div_u64(wm_intermediate_val, 1000 * 512); + ret = fixed_16_16_div(wm_intermediate_val, 1000 * 512); return ret; } @@ -4314,7 +4314,7 @@ intel_get_linetime_us(struct intel_crtc_state *cstate) return u32_to_fixed_16_16(0); crtc_htotal = cstate->base.adjusted_mode.crtc_htotal; - linetime_us = fixed_16_16_div_u64(crtc_htotal * 1000, pixel_rate); + linetime_us = fixed_16_16_div(crtc_htotal * 1000, pixel_rate); return linetime_us; } -- cgit v1.2.3 From eac2cb81fb87223198c2be93bfd49357d71be669 Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 5 Jul 2017 20:01:46 +0530 Subject: drm/i915: cleanup fixed-point wrappers naming This patch make naming of fixed-point wrappers consistent operation__<1st operand>_<2nd operand> also shorten the name for fixed_16_16 to fixed16 s/u32_to_fixed_16_16/u32_to_fixed16 s/fixed_16_16_to_u32/fixed16_to_u32 s/fixed_16_16_to_u32_round_up/fixed16_to_u32_round_up s/min_fixed_16_16/min_fixed16 s/max_fixed_16_16/max_fixed16 s/mul_u32_fixed_16_16/mul_u32_fixed16 s/fixed_16_16_div/div_fixed16 Changes Since V1: - Split the patch in more logical patches (Maarten) Changes Since V2: - Rebase Signed-off-by: Mahesh Kumar Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170705143154.32132-4-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 14 ++++----- drivers/gpu/drm/i915/intel_pm.c | 63 ++++++++++++++++++++--------------------- 2 files changed, 38 insertions(+), 39 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 95d5328a26e9..1fc25bd5c904 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -122,7 +122,7 @@ static inline bool is_fixed16_zero(uint_fixed_16_16_t val) return false; } -static inline uint_fixed_16_16_t u32_to_fixed_16_16(uint32_t val) +static inline uint_fixed_16_16_t u32_to_fixed16(uint32_t val) { uint_fixed_16_16_t fp; @@ -132,17 +132,17 @@ static inline uint_fixed_16_16_t u32_to_fixed_16_16(uint32_t val) return fp; } -static inline uint32_t fixed_16_16_to_u32_round_up(uint_fixed_16_16_t fp) +static inline uint32_t fixed16_to_u32_round_up(uint_fixed_16_16_t fp) { return DIV_ROUND_UP(fp.val, 1 << 16); } -static inline uint32_t fixed_16_16_to_u32(uint_fixed_16_16_t fp) +static inline uint32_t fixed16_to_u32(uint_fixed_16_16_t fp) { return fp.val >> 16; } -static inline uint_fixed_16_16_t min_fixed_16_16(uint_fixed_16_16_t min1, +static inline uint_fixed_16_16_t min_fixed16(uint_fixed_16_16_t min1, uint_fixed_16_16_t min2) { uint_fixed_16_16_t min; @@ -151,7 +151,7 @@ static inline uint_fixed_16_16_t min_fixed_16_16(uint_fixed_16_16_t min1, return min; } -static inline uint_fixed_16_16_t max_fixed_16_16(uint_fixed_16_16_t max1, +static inline uint_fixed_16_16_t max_fixed16(uint_fixed_16_16_t max1, uint_fixed_16_16_t max2) { uint_fixed_16_16_t max; @@ -195,7 +195,7 @@ static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val, return clamp_u64_to_fixed16(intermediate_val); } -static inline uint_fixed_16_16_t fixed_16_16_div(uint32_t val, uint32_t d) +static inline uint_fixed_16_16_t div_fixed16(uint32_t val, uint32_t d) { uint64_t interm_val; @@ -215,7 +215,7 @@ static inline uint32_t div_round_up_u32_fixed16(uint32_t val, return clamp_t(uint32_t, interm_val, 0, ~0); } -static inline uint_fixed_16_16_t mul_u32_fixed_16_16(uint32_t val, +static inline uint_fixed_16_16_t mul_u32_fixed16(uint32_t val, uint_fixed_16_16_t mul) { uint64_t intermediate_val; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 05eabadaa23d..2603df15b4e1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3837,7 +3837,7 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, uint_fixed_16_16_t downscale_h, downscale_w; if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) - return u32_to_fixed_16_16(0); + return u32_to_fixed16(0); /* n.b., src is 16.16 fixed point, dst is whole integer */ if (plane->id == PLANE_CURSOR) { @@ -3861,10 +3861,10 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, dst_h = drm_rect_height(&pstate->base.dst); } - fp_w_ratio = fixed_16_16_div(src_w, dst_w); - fp_h_ratio = fixed_16_16_div(src_h, dst_h); - downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1)); - downscale_h = max_fixed_16_16(fp_h_ratio, u32_to_fixed_16_16(1)); + fp_w_ratio = div_fixed16(src_w, dst_w); + fp_h_ratio = div_fixed16(src_h, dst_h); + downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1)); + downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1)); return mul_fixed16(downscale_w, downscale_h); } @@ -3872,7 +3872,7 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, static uint_fixed_16_16_t skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) { - uint_fixed_16_16_t pipe_downscale = u32_to_fixed_16_16(1); + uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1); if (!crtc_state->base.enable) return pipe_downscale; @@ -3891,10 +3891,10 @@ skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) if (!dst_w || !dst_h) return pipe_downscale; - fp_w_ratio = fixed_16_16_div(src_w, dst_w); - fp_h_ratio = fixed_16_16_div(src_h, dst_h); - downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1)); - downscale_h = max_fixed_16_16(fp_h_ratio, u32_to_fixed_16_16(1)); + fp_w_ratio = div_fixed16(src_w, dst_w); + fp_h_ratio = div_fixed16(src_h, dst_h); + downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1)); + downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1)); pipe_downscale = mul_fixed16(downscale_w, downscale_h); } @@ -3913,14 +3913,14 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, int crtc_clock, dotclk; uint32_t pipe_max_pixel_rate; uint_fixed_16_16_t pipe_downscale; - uint_fixed_16_16_t max_downscale = u32_to_fixed_16_16(1); + uint_fixed_16_16_t max_downscale = u32_to_fixed16(1); if (!cstate->base.enable) return 0; drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { uint_fixed_16_16_t plane_downscale; - uint_fixed_16_16_t fp_9_div_8 = fixed_16_16_div(9, 8); + uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8); int bpp; if (!intel_wm_plane_visible(cstate, @@ -3938,7 +3938,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, plane_downscale = mul_fixed16(plane_downscale, fp_9_div_8); - max_downscale = max_fixed_16_16(plane_downscale, max_downscale); + max_downscale = max_fixed16(plane_downscale, max_downscale); } pipe_downscale = skl_pipe_downscale_amount(cstate); @@ -4276,7 +4276,7 @@ static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, return FP_16_16_MAX; wm_intermediate_val = latency * pixel_rate * cpp; - ret = fixed_16_16_div(wm_intermediate_val, 1000 * 512); + ret = div_fixed16(wm_intermediate_val, 1000 * 512); return ret; } @@ -4294,7 +4294,7 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate, wm_intermediate_val = latency * pixel_rate; wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000); - ret = mul_u32_fixed_16_16(wm_intermediate_val, plane_blocks_per_line); + ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line); return ret; } @@ -4306,15 +4306,15 @@ intel_get_linetime_us(struct intel_crtc_state *cstate) uint_fixed_16_16_t linetime_us; if (!cstate->base.active) - return u32_to_fixed_16_16(0); + return u32_to_fixed16(0); pixel_rate = cstate->pixel_rate; if (WARN_ON(pixel_rate == 0)) - return u32_to_fixed_16_16(0); + return u32_to_fixed16(0); crtc_htotal = cstate->base.adjusted_mode.crtc_htotal; - linetime_us = fixed_16_16_div(crtc_htotal * 1000, pixel_rate); + linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate); return linetime_us; } @@ -4434,14 +4434,14 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (y_tiled) { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line * y_min_scanlines, 512); - plane_blocks_per_line = fixed_16_16_div(interm_pbpl, + plane_blocks_per_line = div_fixed16(interm_pbpl, y_min_scanlines); } else if (x_tiled) { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512); - plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl); + plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } else { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; - plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl); + plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } method1 = skl_wm_method1(plane_pixel_rate, cpp, latency); @@ -4450,35 +4450,35 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, latency, plane_blocks_per_line); - y_tile_minimum = mul_u32_fixed_16_16(y_min_scanlines, - plane_blocks_per_line); + y_tile_minimum = mul_u32_fixed16(y_min_scanlines, + plane_blocks_per_line); if (y_tiled) { - selected_result = max_fixed_16_16(method2, y_tile_minimum); + selected_result = max_fixed16(method2, y_tile_minimum); } else { uint32_t linetime_us; - linetime_us = fixed_16_16_to_u32_round_up( + linetime_us = fixed16_to_u32_round_up( intel_get_linetime_us(cstate)); if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && (plane_bytes_per_line / 512 < 1)) selected_result = method2; else if ((ddb_allocation && ddb_allocation / - fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1) - selected_result = min_fixed_16_16(method1, method2); + fixed16_to_u32_round_up(plane_blocks_per_line)) >= 1) + selected_result = min_fixed16(method1, method2); else if (latency >= linetime_us) - selected_result = min_fixed_16_16(method1, method2); + selected_result = min_fixed16(method1, method2); else selected_result = method1; } - res_blocks = fixed_16_16_to_u32_round_up(selected_result) + 1; + res_blocks = fixed16_to_u32_round_up(selected_result) + 1; res_lines = div_round_up_fixed16(selected_result, plane_blocks_per_line); if (level >= 1 && level <= 7) { if (y_tiled) { - res_blocks += fixed_16_16_to_u32_round_up(y_tile_minimum); + res_blocks += fixed16_to_u32_round_up(y_tile_minimum); res_lines += y_min_scanlines; } else { res_blocks++; @@ -4563,8 +4563,7 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate) if (is_fixed16_zero(linetime_us)) return 0; - linetime_wm = fixed_16_16_to_u32_round_up(mul_u32_fixed_16_16(8, - linetime_us)); + linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us)); /* Display WA #1135: bxt. */ if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled) -- cgit v1.2.3 From 129eaa957dd5a717edd70cfaf0626c143c03e54e Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 5 Jul 2017 20:01:48 +0530 Subject: drm/i915/skl+: WM calculation don't require height height of plane was require to swap width/height in case of 90/270 rotation. Now src structure contains already swapped values, So we don't have to calculate height of the plane. Signed-off-by: Mahesh Kumar Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170705143154.32132-6-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2603df15b4e1..81e77f073d8c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4361,7 +4361,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint32_t plane_bytes_per_line; uint32_t res_blocks, res_lines; uint8_t cpp; - uint32_t width = 0, height = 0; + uint32_t width = 0; uint32_t plane_pixel_rate; uint_fixed_16_16_t y_tile_minimum; uint32_t y_min_scanlines; @@ -4390,7 +4390,6 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (plane->id == PLANE_CURSOR) { width = intel_pstate->base.crtc_w; - height = intel_pstate->base.crtc_h; } else { /* * Src coordinates are already rotated by 270 degrees for @@ -4398,7 +4397,6 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, * GTT mapping), hence no need to account for rotation here. */ width = drm_rect_width(&intel_pstate->base.src) >> 16; - height = drm_rect_height(&intel_pstate->base.src) >> 16; } cpp = fb->format->cpp[0]; -- cgit v1.2.3 From b064be0784530d2a98b589b40793e3d421fb93ba Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 5 Jul 2017 20:01:49 +0530 Subject: drm/i915/skl+: unify cpp value in WM calculation use same cpp value in different phase of plane WM caluclation. Signed-off-by: Mahesh Kumar Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170705143154.32132-7-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 81e77f073d8c..ee2a349cfe68 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4399,13 +4399,11 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, width = drm_rect_width(&intel_pstate->base.src) >> 16; } - cpp = fb->format->cpp[0]; + cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : + fb->format->cpp[0]; plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); if (drm_rotation_90_or_270(pstate->rotation)) { - int cpp = (fb->format->format == DRM_FORMAT_NV12) ? - fb->format->cpp[1] : - fb->format->cpp[0]; switch (cpp) { case 1: -- cgit v1.2.3 From 54d20ed1fff23c7d2633f01fc788111bf9c51c5d Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 17 Jul 2017 14:02:30 +0200 Subject: drm/i915: Fix bad comparison in skl_compute_plane_wm, v2. ddb_allocation && ddb_allocation / blocks_per_line >= 1 is the same as ddb_allocation >= blocks_per_line, so use the latter to simplify this. This fixes the following compiler warning: drivers/gpu/drm/i915/intel_pm.c:4467]: (warning) Comparison of a boolean expression with an integer other than 0 or 1. Changes since v1: - Rebase, was missing the changes to the macro names. Signed-off-by: Maarten Lankhorst Fixes: d555cb5827d6 ("drm/i915/skl+: use linetime latency if ddb size is not available") Cc: "Mahesh Kumar" Reported-by: David Binderman Cc: David Binderman Cc: # v4.13-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170717120230.2023-1-maarten.lankhorst@linux.intel.com Reviewed-by: Mahesh Kumar --- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ee2a349cfe68..48785ef75d33 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4459,8 +4459,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && (plane_bytes_per_line / 512 < 1)) selected_result = method2; - else if ((ddb_allocation && ddb_allocation / - fixed16_to_u32_round_up(plane_blocks_per_line)) >= 1) + else if (ddb_allocation >= + fixed16_to_u32_round_up(plane_blocks_per_line)) selected_result = min_fixed16(method1, method2); else if (latency >= linetime_us) selected_result = min_fixed16(method1, method2); -- cgit v1.2.3 From 5a9cfff46d193388749f2c4e6ec75f40b47942d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Jul 2017 09:50:22 +0100 Subject: drm/i915: Include mbox details for pcode read/write failures If we fail at punit communication, include both the mbox address and the value we tried to write so that we can identify the invalid sequence. Signed-off-by: Chris Wilson Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170728085022.1586-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 48785ef75d33..8711c1f04079 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8831,6 +8831,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) case GEN6_PCODE_SUCCESS: return 0; case GEN6_PCODE_UNIMPLEMENTED_CMD: + return -ENODEV; case GEN6_PCODE_ILLEGAL_CMD: return -ENXIO; case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: @@ -8878,7 +8879,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val */ if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { - DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n"); + DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n", + mbox, __builtin_return_address(0)); return -EAGAIN; } @@ -8889,7 +8891,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val if (__intel_wait_for_register_fw(dev_priv, GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 500, 0, NULL)) { - DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); + DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n", + mbox, __builtin_return_address(0)); return -ETIMEDOUT; } @@ -8902,8 +8905,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val status = gen6_check_mailbox_status(dev_priv); if (status) { - DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n", - status); + DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n", + mbox, __builtin_return_address(0), status); return status; } @@ -8923,7 +8926,8 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, */ if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { - DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n"); + DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n", + val, mbox, __builtin_return_address(0)); return -EAGAIN; } @@ -8934,7 +8938,8 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, if (__intel_wait_for_register_fw(dev_priv, GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 500, 0, NULL)) { - DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); + DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n", + val, mbox, __builtin_return_address(0)); return -ETIMEDOUT; } @@ -8946,8 +8951,8 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, status = gen6_check_mailbox_status(dev_priv); if (status) { - DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n", - status); + DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n", + val, mbox, __builtin_return_address(0), status); return status; } -- cgit v1.2.3 From 32087d1425887e2d51e8c77ff9849d73f6384457 Mon Sep 17 00:00:00 2001 From: Praveen Paneri Date: Thu, 3 Aug 2017 23:02:10 +0530 Subject: drm/i915: enable WaDisableDopClkGating for skl This WA is required when decoupled frequencies for slice and unslice are enabled. This disables DOP clock gating for skl. v2: enable the WA for all gen9 platforms (not just for SKL GT4 where the hang issue is originally reported) to avoid rare hangs (David) v3: as per WaDatabase, enable it only for SKL (Rodrigo) Cc: David Weinehall Reviewed-by: David Weinehall Signed-off-by: Praveen Paneri Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1501781530-8186-1-git-send-email-praveen.paneri@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8711c1f04079..6e393b217450 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -78,6 +78,12 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */ I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | ILK_DPFC_DISABLE_DUMMY0); + + if (IS_SKYLAKE(dev_priv)) { + /* WaDisableDopClockGating */ + I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL) + & ~GEN7_DOP_CLOCK_GATE_ENABLE); + } } static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) -- cgit v1.2.3 From 2e2adb05736c3101a0b301e39bf5adabb8b5fb22 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 1 Aug 2017 09:58:13 -0700 Subject: drm/i915: Add render decompression support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SKL+ display engine can scan out certain kinds of compressed surfaces produced by the render engine. This involved telling the display engine the location of the color control surfae (CCS) which describes which parts of the main surface are compressed and which are not. The location of CCS is provided by userspace as just another plane with its own offset. Add the required stuff to validate the user provided AUX plane metadata and convert the user provided linear offset into something the hardware can consume. Due to hardware limitations we require that the main surface and the AUX surface (CCS) be part of the same bo. The hardware also makes life hard by not allowing you to provide separate x/y offsets for the main and AUX surfaces (excpet with NV12), so finding suitable offsets for both requires a bit of work. Assuming we still want keep playing tricks with the offsets. I've just gone with a dumb "search backward for suitable offsets" approach, which is far from optimal, but it works. Also not all planes will be capable of scanning out compressed surfaces, and eg. 90/270 degree rotation is not supported in combination with decompression either. This patch may contain work from at least the following people: * Vandana Kannan * Daniel Vetter * Ben Widawsky v2: Deal with display workarounds 0390, 0531, 1125 (Paulo) v3: Pretend CCS tiles are regular 128 byte wide Y tiles (Jason) Put the AUX register defines to the correct place Fix up the slightly bogus rotation check v4: Use I915_WRITE_FW() due to plane update locking changes s/return -EINVAL/goto err/ in intel_framebuffer_init() Eliminate a bunch hardcoded numbers in CCS code v5: (By Ben) conflict resolution + - res_blocks += fixed_16_16_to_u32_round_up(y_tile_minimum); + res_blocks += fixed16_to_u32_round_up(y_tile_minimum); v6: (daniels) Fix botched commit message. Cc: Paulo Zanoni Cc: Daniel Vetter Cc: Ben Widawsky Cc: Jason Ekstrand Signed-off-by: Ville Syrjä Reviewed-by: Ben Widawsky (v1) Reviewed-by: Daniel Stone Signed-off-by: Ben Widawsky Signed-off-by: Daniel Stone Link: http://patchwork.freedesktop.org/patch/msgid/20170801165817.7063-1-ben@bwidawsk.net --- drivers/gpu/drm/i915/i915_reg.h | 23 ++++ drivers/gpu/drm/i915/intel_display.c | 233 ++++++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_pm.c | 29 ++++- drivers/gpu/drm/i915/intel_sprite.c | 5 + 4 files changed, 272 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 56df86ef5a4d..067f8f30c0b8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6199,6 +6199,10 @@ enum { #define _PLANE_KEYMSK_2_A 0x70298 #define _PLANE_KEYMAX_1_A 0x701a0 #define _PLANE_KEYMAX_2_A 0x702a0 +#define _PLANE_AUX_DIST_1_A 0x701c0 +#define _PLANE_AUX_DIST_2_A 0x702c0 +#define _PLANE_AUX_OFFSET_1_A 0x701c4 +#define _PLANE_AUX_OFFSET_2_A 0x702c4 #define _PLANE_COLOR_CTL_1_A 0x701CC /* GLK+ */ #define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ #define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ @@ -6305,6 +6309,24 @@ enum { #define PLANE_NV12_BUF_CFG(pipe, plane) \ _MMIO_PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe)) +#define _PLANE_AUX_DIST_1_B 0x711c0 +#define _PLANE_AUX_DIST_2_B 0x712c0 +#define _PLANE_AUX_DIST_1(pipe) \ + _PIPE(pipe, _PLANE_AUX_DIST_1_A, _PLANE_AUX_DIST_1_B) +#define _PLANE_AUX_DIST_2(pipe) \ + _PIPE(pipe, _PLANE_AUX_DIST_2_A, _PLANE_AUX_DIST_2_B) +#define PLANE_AUX_DIST(pipe, plane) \ + _MMIO_PLANE(plane, _PLANE_AUX_DIST_1(pipe), _PLANE_AUX_DIST_2(pipe)) + +#define _PLANE_AUX_OFFSET_1_B 0x711c4 +#define _PLANE_AUX_OFFSET_2_B 0x712c4 +#define _PLANE_AUX_OFFSET_1(pipe) \ + _PIPE(pipe, _PLANE_AUX_OFFSET_1_A, _PLANE_AUX_OFFSET_1_B) +#define _PLANE_AUX_OFFSET_2(pipe) \ + _PIPE(pipe, _PLANE_AUX_OFFSET_2_A, _PLANE_AUX_OFFSET_2_B) +#define PLANE_AUX_OFFSET(pipe, plane) \ + _MMIO_PLANE(plane, _PLANE_AUX_OFFSET_1(pipe), _PLANE_AUX_OFFSET_2(pipe)) + #define _PLANE_COLOR_CTL_1_B 0x711CC #define _PLANE_COLOR_CTL_2_B 0x712CC #define _PLANE_COLOR_CTL_3_B 0x713CC @@ -6788,6 +6810,7 @@ enum { # define CHICKEN3_DGMG_DONE_FIX_DISABLE (1 << 2) #define CHICKEN_PAR1_1 _MMIO(0x42080) +#define SKL_RC_HASH_OUTSIDE (1 << 15) #define DPA_MASK_VBLANK_SRD (1 << 15) #define FORCE_ARB_IDLE_PLANES (1 << 14) #define SKL_EDP_PSR_FIX_RDWRAP (1 << 3) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 51035ec1fe62..0e334a2e9d4b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1994,11 +1994,19 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int plane) return 128; else return 512; + case I915_FORMAT_MOD_Y_TILED_CCS: + if (plane == 1) + return 128; + /* fall through */ case I915_FORMAT_MOD_Y_TILED: if (IS_GEN2(dev_priv) || HAS_128_BYTE_Y_TILING(dev_priv)) return 128; else return 512; + case I915_FORMAT_MOD_Yf_TILED_CCS: + if (plane == 1) + return 128; + /* fall through */ case I915_FORMAT_MOD_Yf_TILED: switch (cpp) { case 1: @@ -2105,7 +2113,7 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, struct drm_i915_private *dev_priv = to_i915(fb->dev); /* AUX_DIST needs only 4K alignment */ - if (fb->format->format == DRM_FORMAT_NV12 && plane == 1) + if (plane == 1) return 4096; switch (fb->modifier) { @@ -2115,6 +2123,8 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, if (INTEL_GEN(dev_priv) >= 9) return 256 * 1024; return 0; + case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Yf_TILED_CCS: case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: return 1 * 1024 * 1024; @@ -2422,6 +2432,7 @@ static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier) case I915_FORMAT_MOD_X_TILED: return I915_TILING_X; case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_Y_TILED_CCS: return I915_TILING_Y; default: return I915_TILING_NONE; @@ -2486,6 +2497,36 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, intel_fb_offset_to_xy(&x, &y, fb, i); + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS) && i == 1) { + int hsub = fb->format->hsub; + int vsub = fb->format->vsub; + int tile_width, tile_height; + int main_x, main_y; + int ccs_x, ccs_y; + + intel_tile_dims(fb, i, &tile_width, &tile_height); + + ccs_x = (x * hsub) % (tile_width * hsub); + ccs_y = (y * vsub) % (tile_height * vsub); + main_x = intel_fb->normal[0].x % (tile_width * hsub); + main_y = intel_fb->normal[0].y % (tile_height * vsub); + + /* + * CCS doesn't have its own x/y offset register, so the intra CCS tile + * x/y offsets must match between CCS and the main surface. + */ + if (main_x != ccs_x || main_y != ccs_y) { + DRM_DEBUG_KMS("Bad CCS x/y (main %d,%d ccs %d,%d) full (main %d,%d ccs %d,%d)\n", + main_x, main_y, + ccs_x, ccs_y, + intel_fb->normal[0].x, + intel_fb->normal[0].y, + x, y); + return -EINVAL; + } + } + /* * The fence (if used) is aligned to the start of the object * so having the framebuffer wrap around across the edge of the @@ -2846,6 +2887,9 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane, break; } break; + case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Yf_TILED_CCS: + /* FIXME AUX plane? */ case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: switch (cpp) { @@ -2868,6 +2912,44 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane, return 2048; } +static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, + int main_x, int main_y, u32 main_offset) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + int hsub = fb->format->hsub; + int vsub = fb->format->vsub; + int aux_x = plane_state->aux.x; + int aux_y = plane_state->aux.y; + u32 aux_offset = plane_state->aux.offset; + u32 alignment = intel_surf_alignment(fb, 1); + + while (aux_offset >= main_offset && aux_y <= main_y) { + int x, y; + + if (aux_x == main_x && aux_y == main_y) + break; + + if (aux_offset == 0) + break; + + x = aux_x / hsub; + y = aux_y / vsub; + aux_offset = intel_adjust_tile_offset(&x, &y, plane_state, 1, + aux_offset, aux_offset - alignment); + aux_x = x * hsub + aux_x % hsub; + aux_y = y * vsub + aux_y % vsub; + } + + if (aux_x != main_x || aux_y != main_y) + return false; + + plane_state->aux.offset = aux_offset; + plane_state->aux.x = aux_x; + plane_state->aux.y = aux_y; + + return true; +} + static int skl_check_main_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; @@ -2910,7 +2992,7 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) while ((x + w) * cpp > fb->pitches[0]) { if (offset == 0) { - DRM_DEBUG_KMS("Unable to find suitable display surface offset\n"); + DRM_DEBUG_KMS("Unable to find suitable display surface offset due to X-tiling\n"); return -EINVAL; } @@ -2919,6 +3001,26 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) } } + /* + * CCS AUX surface doesn't have its own x/y offsets, we must make sure + * they match with the main surface x/y offsets. + */ + if (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS) { + while (!skl_check_main_ccs_coordinates(plane_state, x, y, offset)) { + if (offset == 0) + break; + + offset = intel_adjust_tile_offset(&x, &y, plane_state, 0, + offset, offset - alignment); + } + + if (x != plane_state->aux.x || y != plane_state->aux.y) { + DRM_DEBUG_KMS("Unable to find suitable display surface offset due to CCS\n"); + return -EINVAL; + } + } + plane_state->main.offset = offset; plane_state->main.x = x; plane_state->main.y = y; @@ -2955,6 +3057,49 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) return 0; } +static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc); + const struct drm_framebuffer *fb = plane_state->base.fb; + int src_x = plane_state->base.src.x1 >> 16; + int src_y = plane_state->base.src.y1 >> 16; + int hsub = fb->format->hsub; + int vsub = fb->format->vsub; + int x = src_x / hsub; + int y = src_y / vsub; + u32 offset; + + switch (plane->id) { + case PLANE_PRIMARY: + case PLANE_SPRITE0: + break; + default: + DRM_DEBUG_KMS("RC support only on plane 1 and 2\n"); + return -EINVAL; + } + + if (crtc->pipe == PIPE_C) { + DRM_DEBUG_KMS("No RC support on pipe C\n"); + return -EINVAL; + } + + if (plane_state->base.rotation & ~(DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180)) { + DRM_DEBUG_KMS("RC support only with 0/180 degree rotation %x\n", + plane_state->base.rotation); + return -EINVAL; + } + + intel_add_fb_offsets(&x, &y, plane_state, 1); + offset = intel_compute_tile_offset(&x, &y, plane_state, 1); + + plane_state->aux.offset = offset; + plane_state->aux.x = x * hsub + src_x % hsub; + plane_state->aux.y = y * vsub + src_y % vsub; + + return 0; +} + int skl_check_plane_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; @@ -2978,6 +3123,11 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) ret = skl_check_nv12_aux_surface(plane_state); if (ret) return ret; + } else if (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS) { + ret = skl_check_ccs_aux_surface(plane_state); + if (ret) + return ret; } else { plane_state->aux.offset = ~0xfff; plane_state->aux.x = 0; @@ -3284,8 +3434,12 @@ static u32 skl_plane_ctl_tiling(uint64_t fb_modifier) return PLANE_CTL_TILED_X; case I915_FORMAT_MOD_Y_TILED: return PLANE_CTL_TILED_Y; + case I915_FORMAT_MOD_Y_TILED_CCS: + return PLANE_CTL_TILED_Y | PLANE_CTL_DECOMPRESSION_ENABLE; case I915_FORMAT_MOD_Yf_TILED: return PLANE_CTL_TILED_YF; + case I915_FORMAT_MOD_Yf_TILED_CCS: + return PLANE_CTL_TILED_YF | PLANE_CTL_DECOMPRESSION_ENABLE; default: MISSING_CASE(fb_modifier); } @@ -3358,6 +3512,7 @@ static void skylake_update_primary_plane(struct intel_plane *plane, u32 plane_ctl = plane_state->ctl; unsigned int rotation = plane_state->base.rotation; u32 stride = skl_plane_stride(fb, 0, rotation); + u32 aux_stride = skl_plane_stride(fb, 1, rotation); u32 surf_addr = plane_state->main.offset; int scaler_id = plane_state->scaler_id; int src_x = plane_state->main.x; @@ -3394,6 +3549,10 @@ static void skylake_update_primary_plane(struct intel_plane *plane, I915_WRITE_FW(PLANE_OFFSET(pipe, plane_id), (src_y << 16) | src_x); I915_WRITE_FW(PLANE_STRIDE(pipe, plane_id), stride); I915_WRITE_FW(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); + I915_WRITE_FW(PLANE_AUX_DIST(pipe, plane_id), + (plane_state->aux.offset - surf_addr) | aux_stride); + I915_WRITE_FW(PLANE_AUX_OFFSET(pipe, plane_id), + (plane_state->aux.y << 16) | plane_state->aux.x); if (scaler_id >= 0) { uint32_t ps_ctrl = 0; @@ -8337,10 +8496,16 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, fb->modifier = I915_FORMAT_MOD_X_TILED; break; case PLANE_CTL_TILED_Y: - fb->modifier = I915_FORMAT_MOD_Y_TILED; + if (val & PLANE_CTL_DECOMPRESSION_ENABLE) + fb->modifier = I915_FORMAT_MOD_Y_TILED_CCS; + else + fb->modifier = I915_FORMAT_MOD_Y_TILED; break; case PLANE_CTL_TILED_YF: - fb->modifier = I915_FORMAT_MOD_Yf_TILED; + if (val & PLANE_CTL_DECOMPRESSION_ENABLE) + fb->modifier = I915_FORMAT_MOD_Yf_TILED_CCS; + else + fb->modifier = I915_FORMAT_MOD_Yf_TILED; break; default: MISSING_CASE(tiling); @@ -13506,10 +13671,12 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_framebuffer *fb = &intel_fb->base; struct drm_format_name_buf format_name; - u32 pitch_limit, stride_alignment; + u32 pitch_limit; unsigned int tiling, stride; int ret = -EINVAL; + int i; i915_gem_object_lock(obj); obj->framebuffer_references++; @@ -13538,6 +13705,19 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, /* Passed in modifier sanity checking. */ switch (mode_cmd->modifier[0]) { + case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Yf_TILED_CCS: + switch (mode_cmd->pixel_format) { + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + break; + default: + DRM_DEBUG_KMS("RC supported only with RGB8888 formats\n"); + goto err; + } + /* fall through */ case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: if (INTEL_GEN(dev_priv) < 9) { @@ -13642,25 +13822,46 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (mode_cmd->offsets[0] != 0) goto err; - drm_helper_mode_fill_fb_struct(&dev_priv->drm, - &intel_fb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); - stride_alignment = intel_fb_stride_alignment(&intel_fb->base, 0); - if (mode_cmd->pitches[0] & (stride_alignment - 1)) { - DRM_DEBUG_KMS("pitch (%d) must be at least %u byte aligned\n", - mode_cmd->pitches[0], stride_alignment); - goto err; + for (i = 0; i < fb->format->num_planes; i++) { + u32 stride_alignment; + + if (mode_cmd->handles[i] != mode_cmd->handles[0]) { + DRM_DEBUG_KMS("bad plane %d handle\n", i); + return -EINVAL; + } + + stride_alignment = intel_fb_stride_alignment(fb, i); + + /* + * Display WA #0531: skl,bxt,kbl,glk + * + * Render decompression and plane width > 3840 + * combined with horizontal panning requires the + * plane stride to be a multiple of 4. We'll just + * require the entire fb to accommodate that to avoid + * potential runtime errors at plane configuration time. + */ + if (IS_GEN9(dev_priv) && i == 0 && fb->width > 3840 && + (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS)) + stride_alignment *= 4; + + if (fb->pitches[i] & (stride_alignment - 1)) { + DRM_DEBUG_KMS("plane %d pitch (%d) must be at least %u byte aligned\n", + i, fb->pitches[i], stride_alignment); + goto err; + } } intel_fb->obj = obj; - ret = intel_fill_fb_info(dev_priv, &intel_fb->base); + ret = intel_fill_fb_info(dev_priv, fb); if (ret) goto err; - ret = drm_framebuffer_init(obj->base.dev, - &intel_fb->base, - &intel_fb_funcs); + ret = drm_framebuffer_init(&dev_priv->drm, fb, &intel_fb_funcs); if (ret) { DRM_ERROR("framebuffer init failed %d\n", ret); goto err; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6e393b217450..4a75b673b85f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -62,6 +62,20 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(CHICKEN_PAR1_1, I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); + /* + * Display WA#0390: skl,bxt,kbl,glk + * + * Must match Sampler, Pixel Back End, and Media + * (0xE194 bit 8, 0x7014 bit 13, 0x4DDC bits 27 and 31). + * + * Including bits outside the page in the hash would + * require 2 (or 4?) MiB alignment of resources. Just + * assume the defaul hashing mode which only uses bits + * within the page. + */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) & ~SKL_RC_HASH_OUTSIDE); + I915_WRITE(GEN8_CONFIG0, I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); @@ -4077,7 +4091,9 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, /* For Non Y-tile return 8-blocks */ if (fb->modifier != I915_FORMAT_MOD_Y_TILED && - fb->modifier != I915_FORMAT_MOD_Yf_TILED) + fb->modifier != I915_FORMAT_MOD_Yf_TILED && + fb->modifier != I915_FORMAT_MOD_Y_TILED_CCS && + fb->modifier != I915_FORMAT_MOD_Yf_TILED_CCS) return 8; /* @@ -4383,7 +4399,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || - fb->modifier == I915_FORMAT_MOD_Yf_TILED; + fb->modifier == I915_FORMAT_MOD_Yf_TILED || + fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; /* Display WA #1141: kbl,cfl */ @@ -4478,6 +4496,13 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, res_lines = div_round_up_fixed16(selected_result, plane_blocks_per_line); + /* Display WA #1125: skl,bxt,kbl,glk */ + if (level == 0 && + (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS)) + res_blocks += fixed16_to_u32_round_up(y_tile_minimum); + + /* Display WA #1126: skl,bxt,kbl,glk */ if (level >= 1 && level <= 7) { if (y_tiled) { res_blocks += fixed16_to_u32_round_up(y_tile_minimum); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 048a4cab5589..4bf0d3e4ad91 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -238,6 +238,7 @@ skl_update_plane(struct intel_plane *plane, u32 surf_addr = plane_state->main.offset; unsigned int rotation = plane_state->base.rotation; u32 stride = skl_plane_stride(fb, 0, rotation); + u32 aux_stride = skl_plane_stride(fb, 1, rotation); int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); @@ -272,6 +273,10 @@ skl_update_plane(struct intel_plane *plane, I915_WRITE_FW(PLANE_OFFSET(pipe, plane_id), (y << 16) | x); I915_WRITE_FW(PLANE_STRIDE(pipe, plane_id), stride); I915_WRITE_FW(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); + I915_WRITE_FW(PLANE_AUX_DIST(pipe, plane_id), + (plane_state->aux.offset - surf_addr) | aux_stride); + I915_WRITE_FW(PLANE_AUX_OFFSET(pipe, plane_id), + (plane_state->aux.y << 16) | plane_state->aux.x); /* program plane scaler */ if (plane_state->scaler_id >= 0) { -- cgit v1.2.3 From 50682ee63fa3480b0541d0a311239189634b68ab Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 9 Aug 2017 13:52:43 -0700 Subject: drm/i915/gen10+: use the SKL code for reading WM latencies Gen 10 should use the exact same code as Gen 9, so change the check to take this into consideration, and also assume that future platforms will run this code. Also add a MISSING_CASE(), just in case we do something wrong, instead of silently failing. Cc: Mahesh Kumar Cc: Maarten Lankhorst Signed-off-by: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20170809205248.11917-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 4a75b673b85f..04697faee4e6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2778,7 +2778,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) static void intel_read_wm_latency(struct drm_i915_private *dev_priv, uint16_t wm[8]) { - if (IS_GEN9(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { uint32_t val; int ret, i; int level, max_level = ilk_wm_max_level(dev_priv); @@ -2838,7 +2838,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, } /* - * WaWmMemoryReadLatency:skl,glk + * WaWmMemoryReadLatency:skl+,glk * * punit doesn't take into account the read latency so we need * to add 2us to the various latency levels we retrieve from the @@ -2877,6 +2877,8 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, wm[0] = 7; wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK; wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK; + } else { + MISSING_CASE(INTEL_DEVID(dev_priv)); } } -- cgit v1.2.3 From 019718196c594d2e33cc371cbbcccb84735e6ada Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 9 Aug 2017 13:52:44 -0700 Subject: drm/i915/cnl: Enable SAGV for Cannonlake. For now inherit from previous platforms. v2: Rebase on top of CFL. Cc: Mahesh Kumar Cc: Maarten Lankhorst Cc: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20170809205248.11917-2-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 04697faee4e6..52bf62b6e38c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3571,7 +3571,8 @@ static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) static bool intel_has_sagv(struct drm_i915_private *dev_priv) { - if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) + if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || + IS_CANNONLAKE(dev_priv)) return true; if (IS_SKYLAKE(dev_priv) && -- cgit v1.2.3 From fdd11c2bfce22e57145e861905b2753c0451df85 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 9 Aug 2017 13:52:45 -0700 Subject: drm/i915/gen10: fix the gen 10 SAGV block time A previous commit added CNL to intel_has_sagv(), but forgot to adjust the SAGV block time to gen 10 platforms. Cc: Mahesh Kumar Cc: Maarten Lankhorst Signed-off-by: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20170809205248.11917-3-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 52bf62b6e38c..f64fdae88b7d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3552,8 +3552,6 @@ bool ilk_disable_lp_wm(struct drm_device *dev) return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); } -#define SKL_SAGV_BLOCK_TIME 30 /* µs */ - /* * FIXME: We still don't have the proper code detect if we need to apply the WA, * so assume we'll always need it in order to avoid underruns. @@ -3678,12 +3676,13 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) struct intel_crtc_state *cstate; enum pipe pipe; int level, latency; + int sagv_block_time_us = IS_GEN9(dev_priv) ? 30 : 20; if (!intel_has_sagv(dev_priv)) return false; /* - * SKL workaround: bspec recommends we disable the SAGV when we have + * SKL+ workaround: bspec recommends we disable the SAGV when we have * more then one pipe enabled * * If there are no active CRTCs, no additional checks need be performed @@ -3722,11 +3721,11 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) latency += 15; /* - * If any of the planes on this pipe don't enable wm levels - * that incur memory latencies higher then 30µs we can't enable - * the SAGV + * If any of the planes on this pipe don't enable wm levels that + * incur memory latencies higher than sagv_block_time_us we + * can't enable the SAGV. */ - if (latency < SKL_SAGV_BLOCK_TIME) + if (latency < sagv_block_time_us) return false; } -- cgit v1.2.3 From dfc267ab5acb2ce73078097875f24985942765af Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 9 Aug 2017 13:52:46 -0700 Subject: drm/i915/gen10: fix WM latency printing Gen 10 is just like Gen 9, so let's consider that all the future platforms are going to be like gen 9 instead of being like gen8-. Cc: Mahesh Kumar Cc: Maarten Lankhorst Signed-off-by: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20170809205248.11917-4-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f64fdae88b7d..66495ad36973 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2934,7 +2934,7 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv, * - latencies are in us on gen9. * - before then, WM1+ latency values are in 0.5us units */ - if (IS_GEN9(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) latency *= 10; else if (level > 0) latency *= 5; -- cgit v1.2.3 From 6c64dd378aca528903cb9f7a60d04fc5c1a3bdbd Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 11 Aug 2017 16:38:25 -0700 Subject: drm/i915/gen10: implement gen 10 watermarks calculations They're slightly different than the gen 9 calculations. v2: Remove TODO comment. Code matches recent spec. v3: Rebase on top of latest skl code using new fp16.16 and fixing a logic issue. Auto rebase bot has apparently made some bad decisions that changed the logic of the code. (Noticed by Manesh, updated by Rodrigo). Cc: Mahesh Kumar Cc: Maarten Lankhorst Signed-off-by: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Mahesh Kumar Link: https://patchwork.freedesktop.org/patch/msgid/20170811233825.32083-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 66495ad36973..ed662937ec3c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4290,8 +4290,9 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * should allow pixel_rate up to ~2 GHz which seems sufficient since max * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. */ -static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, - uint32_t latency) +static uint_fixed_16_16_t +skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate, + uint8_t cpp, uint32_t latency) { uint32_t wm_intermediate_val; uint_fixed_16_16_t ret; @@ -4301,6 +4302,10 @@ static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, wm_intermediate_val = latency * pixel_rate * cpp; ret = div_fixed16(wm_intermediate_val, 1000 * 512); + + if (INTEL_GEN(dev_priv) >= 10) + ret = add_fixed16_u32(ret, 1); + return ret; } @@ -4456,9 +4461,13 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (y_tiled) { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line * y_min_scanlines, 512); + + if (INTEL_GEN(dev_priv) >= 10) + interm_pbpl++; + plane_blocks_per_line = div_fixed16(interm_pbpl, y_min_scanlines); - } else if (x_tiled) { + } else if (x_tiled && INTEL_GEN(dev_priv) == 9) { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512); plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } else { @@ -4466,7 +4475,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } - method1 = skl_wm_method1(plane_pixel_rate, cpp, latency); + method1 = skl_wm_method1(dev_priv, plane_pixel_rate, cpp, latency); method2 = skl_wm_method2(plane_pixel_rate, cstate->base.adjusted_mode.crtc_htotal, latency, -- cgit v1.2.3