diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_workarounds.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_workarounds.c | 192 |
1 files changed, 152 insertions, 40 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index a05c4b99b3fb..e8111fce56d0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -9,6 +9,7 @@ #include "intel_engine_regs.h" #include "intel_gpu_commands.h" #include "intel_gt.h" +#include "intel_gt_mcr.h" #include "intel_gt_regs.h" #include "intel_ring.h" #include "intel_workarounds.h" @@ -688,6 +689,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + + /* Wa_15010599737:dg2 */ + wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, @@ -776,7 +780,9 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_DG2(i915)) + if (IS_PONTEVECCHIO(i915)) + ; /* noop; none at this time */ + else if (IS_DG2(i915)) dg2_ctx_workarounds_init(engine, wal); else if (IS_XEHPSDV(i915)) ; /* noop; none at this time */ @@ -948,8 +954,8 @@ gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) * on s/ss combo, the read should be done with read_subslice_reg. */ slice = ffs(sseu->slice_mask) - 1; - GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); - subslice = ffs(intel_sseu_get_subslices(sseu, slice)); + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask.hsw)); + subslice = ffs(intel_sseu_get_hsw_subslices(sseu, slice)); GEM_BUG_ON(!subslice); subslice--; @@ -1080,18 +1086,17 @@ static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal, gt->default_steering.instanceid = subslice; if (drm_debug_enabled(DRM_UT_DRIVER)) - intel_gt_report_steering(&p, gt, false); + intel_gt_mcr_report_steering(&p, gt, false); } static void icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) { const struct sseu_dev_info *sseu = >->info.sseu; - unsigned int slice, subslice; + unsigned int subslice; GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11); GEM_BUG_ON(hweight8(sseu->slice_mask) > 1); - slice = 0; /* * Although a platform may have subslices, we need to always steer @@ -1102,7 +1107,7 @@ icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) * one of the higher subslices, we run the risk of reading back 0's or * random garbage. */ - subslice = __ffs(intel_sseu_get_subslices(sseu, slice)); + subslice = __ffs(intel_sseu_get_hsw_subslices(sseu, 0)); /* * If the subslice we picked above also steers us to a valid L3 bank, @@ -1112,7 +1117,7 @@ icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) if (gt->info.l3bank_mask & BIT(subslice)) gt->steering_table[L3BANK] = NULL; - __add_mcr_wa(gt, wal, slice, subslice); + __add_mcr_wa(gt, wal, 0, subslice); } static void @@ -1120,7 +1125,6 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) { const struct sseu_dev_info *sseu = >->info.sseu; unsigned long slice, subslice = 0, slice_mask = 0; - u64 dss_mask = 0; u32 lncf_mask = 0; int i; @@ -1151,8 +1155,8 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) */ /* Find the potential gslice candidates */ - dss_mask = intel_sseu_get_subslices(sseu, 0); - slice_mask = intel_slicemask_from_dssmask(dss_mask, GEN_DSS_PER_GSLICE); + slice_mask = intel_slicemask_from_xehp_dssmask(sseu->subslice_mask, + GEN_DSS_PER_GSLICE); /* * Find the potential LNCF candidates. Either LNCF within a valid @@ -1177,9 +1181,8 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) } slice = __ffs(slice_mask); - subslice = __ffs(dss_mask >> (slice * GEN_DSS_PER_GSLICE)); - WARN_ON(subslice > GEN_DSS_PER_GSLICE); - WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0); + subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) % + GEN_DSS_PER_GSLICE; __add_mcr_wa(gt, wal, slice, subslice); @@ -1197,6 +1200,20 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) } static void +pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) +{ + unsigned int dss; + + /* + * Setup implicit steering for COMPUTE and DSS ranges to the first + * non-fused-off DSS. All other types of MCR registers will be + * explicitly steered. + */ + dss = intel_sseu_find_first_xehp_dss(>->info.sseu, 0, 0); + __add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE); +} + +static void icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; @@ -1487,6 +1504,18 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) * performance guide section. */ wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); + + /* Wa_14015795083 */ + wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); +} + +static void +pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) +{ + pvc_init_mcr(gt, wal); + + /* Wa_14015795083 */ + wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } static void @@ -1494,7 +1523,9 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; - if (IS_DG2(i915)) + if (IS_PONTEVECCHIO(i915)) + pvc_gt_workarounds_init(gt, wal); + else if (IS_DG2(i915)) dg2_gt_workarounds_init(gt, wal); else if (IS_XEHPSDV(i915)) xehpsdv_gt_workarounds_init(gt, wal); @@ -1596,13 +1627,13 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) u32 val, old = 0; /* open-coded rmw due to steering */ - old = wa->clr ? intel_gt_read_register_fw(gt, wa->reg) : 0; + old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0; val = (old & ~wa->clr) | wa->set; if (val != old || !wa->clr) intel_uncore_write_fw(uncore, wa->reg, val); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - wa_verify(wa, intel_gt_read_register_fw(gt, wa->reg), + wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg), wal->name, "application"); } @@ -1633,7 +1664,7 @@ static bool wa_list_verify(struct intel_gt *gt, for (i = 0, wa = wal->list; i < wal->count; i++, wa++) ok &= wa_verify(wa, - intel_gt_read_register_fw(gt, wa->reg), + intel_gt_mcr_read_any_fw(gt, wa->reg), wal->name, from); intel_uncore_forcewake_put__locked(uncore, fw); @@ -1924,6 +1955,32 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) } } +static void blacklist_trtt(struct intel_engine_cs *engine) +{ + struct i915_wa_list *w = &engine->whitelist; + + /* + * Prevent read/write access to [0x4400, 0x4600) which covers + * the TRTT range across all engines. Note that normally userspace + * cannot access the other engines' trtt control, but for simplicity + * we cover the entire range on each engine. + */ + whitelist_reg_ext(w, _MMIO(0x4400), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64); + whitelist_reg_ext(w, _MMIO(0x4500), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64); +} + +static void pvc_whitelist_build(struct intel_engine_cs *engine) +{ + allow_read_ctx_timestamp(engine); + + /* Wa_16014440446:pvc */ + blacklist_trtt(engine); +} + void intel_engine_init_whitelist(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1931,7 +1988,9 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, "whitelist", engine->name); - if (IS_DG2(i915)) + if (IS_PONTEVECCHIO(i915)) + pvc_whitelist_build(engine); + else if (IS_DG2(i915)) dg2_whitelist_build(engine); else if (IS_XEHPSDV(i915)) xehpsdv_whitelist_build(engine); @@ -1994,27 +2053,44 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine) static void engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - u8 mocs; + u8 mocs_w, mocs_r; /* - * RING_CMD_CCTL are need to be programed to un-cached - * for memory writes and reads outputted by Command - * Streamers on Gen12 onward platforms. + * RING_CMD_CCTL specifies the default MOCS entry that will be used + * by the command streamer when executing commands that don't have + * a way to explicitly specify a MOCS setting. The default should + * usually reference whichever MOCS entry corresponds to uncached + * behavior, although use of a WB cached entry is recommended by the + * spec in certain circumstances on specific platforms. */ if (GRAPHICS_VER(engine->i915) >= 12) { - mocs = engine->gt->mocs.uc_index; + mocs_r = engine->gt->mocs.uc_index; + mocs_w = engine->gt->mocs.uc_index; + + if (HAS_L3_CCS_READ(engine->i915) && + engine->class == COMPUTE_CLASS) { + mocs_r = engine->gt->mocs.wb_index; + + /* + * Even on the few platforms where MOCS 0 is a + * legitimate table entry, it's never the correct + * setting to use here; we can assume the MOCS init + * just forgot to initialize wb_index. + */ + drm_WARN_ON(&engine->i915->drm, mocs_r == 0); + } + wa_masked_field_set(wal, RING_CMD_CCTL(engine->mmio_base), CMD_CCTL_MOCS_MASK, - CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); + CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r)); } } static bool needs_wa_1308578152(struct intel_engine_cs *engine) { - u64 dss_mask = intel_sseu_get_subslices(&engine->gt->info.sseu, 0); - - return (dss_mask & GENMASK(GEN_DSS_PER_GSLICE - 1, 0)) == 0; + return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >= + GEN_DSS_PER_GSLICE; } static void @@ -2023,9 +2099,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; if (IS_DG2(i915)) { - /* Wa_14015227452:dg2 */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); - /* Wa_1509235366:dg2 */ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE); @@ -2036,12 +2109,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * performance guide section. */ wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); - - /* Wa_18018781329:dg2 */ - wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { @@ -2160,6 +2227,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); } + if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || + IS_DG2_G10(i915)) { + /* Wa_22014600077:dg2 */ + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), + 0 /* Wa_14012342262 :write-only reg, so skip + verification */, + true); + } + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) { /* @@ -2583,6 +2660,15 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } } +static void +ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) { + /* Wa_14014999345:pvc */ + wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC); + } +} + /* * The workarounds in this function apply to shared registers in * the general render reset domain that aren't tied to a @@ -2597,6 +2683,18 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li { struct drm_i915_private *i915 = engine->i915; + if (IS_PONTEVECCHIO(i915)) { + /* + * The following is not actually a "workaround" but rather + * a recommended tuning setting documented in the bspec's + * performance guide section. + */ + wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + + /* Wa_16016694945 */ + wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC); + } + if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ wa_masked_en(wal, @@ -2629,9 +2727,21 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li GLOBAL_INVALIDATION_MODE); } - if (IS_DG2(i915)) { - /* Wa_22014226127:dg2 */ + if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { + /* Wa_14015227452:dg2,pvc */ + wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + + /* Wa_22014226127:dg2,pvc */ wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); + + /* Wa_16015675438:dg2,pvc */ + wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); + + /* Wa_18018781329:dg2,pvc */ + wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } } @@ -2651,7 +2761,9 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) general_render_compute_wa_init(engine, wal); - if (engine->class == RENDER_CLASS) + if (engine->class == COMPUTE_CLASS) + ccs_engine_wa_init(engine, wal); + else if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else xcs_engine_wa_init(engine, wal); |