diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-19 12:40:57 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-19 12:40:57 -0700 |
commit | 62c91ead977a5d5023be3d791cbff8535f7d5433 (patch) | |
tree | 32ecd56bf1cd66a1ef3043f47d8c9016d22e1585 /drivers/gpu/drm/i915/gt/intel_workarounds.c | |
parent | 672f9255a727d04c98b05ba7af625ab7032c8028 (diff) | |
parent | 8a7a3d1d0dcf2bb63dafe7275020420005e13e54 (diff) |
Merge tag 'drm-fixes-2020-06-19' of git://anongit.freedesktop.org/drm/drm
Pull drm fixes from Dave Airlie:
"Just i915 and amd here.
i915 has some workaround movement so they get applied at the right
times, and a timeslicing fix, along with some display fixes.
AMD has a few display floating point fix and a devcgroup fix for
amdkfd.
i915:
- Fix for timeslicing and virtual engines/unpremptable requests (+ 1
dependency patch)
- Fixes into TypeC register programming and interrupt storm detecting
- Disable DIP on MST ports with the transcoder clock still on
- Avoid missing GT workarounds at reset for HSW and older gens
- Fix for unwinding multiple requests missing force restore
- Fix encoder type check for DDI vswing sequence
- Build warning fixes
amdgpu:
- Fix kvfree/kfree mixup
- Fix hawaii device id in powertune configuration
- Display FP fixes
- Documentation fixes
amdkfd:
- devcgroup check fix"
* tag 'drm-fixes-2020-06-19' of git://anongit.freedesktop.org/drm/drm: (23 commits)
drm/amdgpu: fix documentation around busy_percentage
drm/amdgpu/pm: update comment to clarify Overdrive interfaces
drm/amdkfd: Use correct major in devcgroup check
drm/i915/display: Fix the encoder type check
drm/i915/icl+: Fix hotplug interrupt disabling after storm detection
drm/i915/gt: Move gen4 GT workarounds from init_clock_gating to workarounds
drm/i915/gt: Move ilk GT workarounds from init_clock_gating to workarounds
drm/i915/gt: Move snb GT workarounds from init_clock_gating to workarounds
drm/i915/gt: Move vlv GT workarounds from init_clock_gating to workarounds
drm/i915/gt: Move ivb GT workarounds from init_clock_gating to workarounds
drm/i915/gt: Move hsw GT workarounds from init_clock_gating to workarounds
drm/i915/icl: Disable DIP on MST ports with the transcoder clock still on
drm/i915/gt: Incrementally check for rewinding
drm/i915/tc: fix the reset of ln0
drm/i915/gt: Prevent timeslicing into unpreemptable requests
drm/i915/selftests: Restore to default heartbeat
drm/i915: work around false-positive maybe-uninitialized warning
drm/i915/pmu: avoid an maybe-uninitialized warning
drm/i915/gt: Incorporate the virtual engine into timeslicing
drm/amd/display: Rework dsc to isolate FPU operations
...
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_workarounds.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_workarounds.c | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 90a2b9e399b0..85d2bef51524 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -179,6 +179,12 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) } static void +wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) +{ + wa_write_masked_or(wal, reg, clr, 0); +} + +static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val); @@ -687,6 +693,227 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) } static void +gen4_gt_workarounds_init(struct drm_i915_private *i915, + struct i915_wa_list *wal) +{ + /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); +} + +static void +g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen4_gt_workarounds_init(i915, wal); + + /* WaDisableRenderCachePipelinedFlush:g4x,ilk */ + wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE); +} + +static void +ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + g4x_gt_workarounds_init(i915, wal); + + wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED); +} + +static void +snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ + wa_masked_en(wal, + _3D_CHICKEN, + _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB); + + /* WaDisable_RenderCache_OperationalFlush:snb */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, + GEN6_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB); + + wa_masked_en(wal, + _3D_CHICKEN3, + /* WaStripsFansDisableFastClipPerformanceFix:snb */ + _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL | + /* + * Bspec says: + * "This bit must be set if 3DSTATE_CLIP clip mode is set + * to normal and 3DSTATE_SF number of SF output attributes + * is more than 16." + */ + _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH); +} + +static void +ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableEarlyCull:ivb */ + wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); + + /* WaDisablePSDDualDispatchEnable:ivb */ + if (IS_IVB_GT1(i915)) + wa_masked_en(wal, + GEN7_HALF_SLICE_CHICKEN1, + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + + /* WaDisable_RenderCache_OperationalFlush:ivb */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ + wa_masked_dis(wal, + GEN7_COMMON_SLICE_CHICKEN1, + GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); + + /* WaApplyL3ControlAndL3ChickenMode:ivb */ + wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL); + wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); + + /* WaForceL3Serialization:ivb */ + wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); + + /* + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. + */ + wa_write_masked_or(wal, GEN7_FF_THREAD_MODE, + GEN7_FF_SCHED_MASK, + GEN7_FF_TS_SCHED_HW | + GEN7_FF_VS_SCHED_HW | + GEN7_FF_DS_SCHED_HW); + + if (0) { /* causes HiZ corruption on ivb:gt1 */ + /* enable HiZ Raw Stall Optimization */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + } + + /* WaDisable4x2SubspanOptimization:ivb */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); +} + +static void +vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableEarlyCull:vlv */ + wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); + + /* WaPsdDispatchEnable:vlv */ + /* WaDisablePSDDualDispatchEnable:vlv */ + wa_masked_en(wal, + GEN7_HALF_SLICE_CHICKEN1, + GEN7_MAX_PS_THREAD_DEP | + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + + /* WaDisable_RenderCache_OperationalFlush:vlv */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* WaForceL3Serialization:vlv */ + wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); + + /* + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. + */ + wa_write_masked_or(wal, + GEN7_FF_THREAD_MODE, + GEN7_FF_SCHED_MASK, + GEN7_FF_TS_SCHED_HW | + GEN7_FF_VS_SCHED_HW | + GEN7_FF_DS_SCHED_HW); + + /* + * BSpec says this must be set, even though + * WaDisable4x2SubspanOptimization isn't listed for VLV. + */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + /* + * WaIncreaseL3CreditsForVLVB0:vlv + * This is the hardware default actually. + */ + wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); +} + +static void +hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* L3 caching of data atomics doesn't work -- disable it. */ + wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); + + wa_add(wal, + HSW_ROW_CHICKEN3, 0, + _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), + 0 /* XXX does this reg exist? */); + + /* WaVSRefCountFullforceMissDisable:hsw */ + wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); + + wa_masked_dis(wal, + CACHE_MODE_0_GEN7, + /* WaDisable_RenderCache_OperationalFlush:hsw */ + RC_OP_FLUSH_ENABLE | + /* enable HiZ Raw Stall Optimization */ + HIZ_RAW_STALL_OPT_DISABLE); + + /* WaDisable4x2SubspanOptimization:hsw */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + /* WaSampleCChickenBitEnable:hsw */ + wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); +} + +static void gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { /* WaDisableKillLogic:bxt,skl,kbl */ @@ -963,6 +1190,20 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) bxt_gt_workarounds_init(i915, wal); else if (IS_SKYLAKE(i915)) skl_gt_workarounds_init(i915, wal); + else if (IS_HASWELL(i915)) + hsw_gt_workarounds_init(i915, wal); + else if (IS_VALLEYVIEW(i915)) + vlv_gt_workarounds_init(i915, wal); + else if (IS_IVYBRIDGE(i915)) + ivb_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 6)) + snb_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 5)) + ilk_gt_workarounds_init(i915, wal); + else if (IS_G4X(i915)) + g4x_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 4)) + gen4_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) return; else |