summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/display/dc/dml
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-03 19:52:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-03 19:52:08 -0700
commitb44f2fd87919b5ae6e1756d4c7ba2cbba22238e1 (patch)
tree01ce17e44375c3f7707640bb44d6e012bab878c4 /drivers/gpu/drm/amd/display/dc/dml
parent12b68040a5e468068fd7f4af1150eab8f6e96235 (diff)
parent5493ee1919eae4f49d62276cf5986b7f7c7aa8f6 (diff)
Merge tag 'drm-next-2022-08-03' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Highlights: - New driver for logicvc - which is a display IP core. - EDID parser rework to add new extensions - fbcon scrolling improvements - i915 has some more DG2 work but not enabled by default, but should have enough features for userspace to work now. Otherwise it's lots of work all over the place. Detailed summary: New driver: - logicvc vfio: - use aperture API core: - of: Add data-lane helpers and convert drivers - connector: Remove deprecated ida_simple_get() media: - Add various RGB666 and RGB888 format constants panel: - Add HannStar HSD101PWW - Add ETML0700Y5DHA dma-buf: - add sync-file API - set dma mask for udmabuf devices fbcon: - Improve scrolling performance - Sanitize input fbdev: - device unregistering fixes - vesa: Support COMPILE_TEST - Disable firmware-device registration when first native driver loads aperture: - fix segfault during hot-unplug - export for use with other subsystems client: - use driver validated modes dp: - aux: make probing more reliable - mst: Read extended DPCD capabilities during system resume - Support waiting for HDP signal - Port-validation fixes edid: - CEA data-block iterators - struct drm_edid introduction - implement HF-EEODB extension gem: - don't use fb format non-existing planes probe-helper: - use 640x480 as displayport fallback scheduler: - don't kill jobs in interrupt context bridge: - Add support for i.MX8qxp and i.MX8qm - lots of fixes/cleanups - Add TI-DLPC3433 - fy07024di26a30d: Optional GPIO reset - ldb: Add reg and reg-name properties to bindings, Kconfig fixes - lt9611: Fix display sensing; - tc358767: DSI/DPI refactoring and DSI-to-eDP support, DSI lane handling - tc358775: Fix clock settings - ti-sn65dsi83: Allow GPIO to sleep - adv7511: I2C fixes - anx7625: Fix error handling; DPI fixes; Implement HDP timeout via callback - fsl-ldb: Drop DE flip - ti-sn65dsi86: Convert to atomic modesetting amdgpu: - use atomic fence helpers in DM - fix VRAM address calculations - export CRTC bpc via debugfs - Initial devcoredump support - Enable high priority gfx queue on asics which support it - Adjust GART size on newer APUs for S/G display - Soft reset for GFX 11 / SDMA 6 - Add gfxoff status query for vangogh - Fix timestamps for cursor only commits - Adjust GART size on newer APUs for S/G display - fix buddy memory corruption amdkfd: - MMU notifier fixes - P2P DMA support using dma-buf - Add available memory IOCTL - HMM profiler support - Simplify GPUVM validation - Unified memory for CWSR save/restore area i915: - General driver clean-up - DG2 enabling (still under force probe) - DG2 small BAR memory support - HuC loading support - DG2 workarounds - DG2/ATS-M device IDs added - Ponte Vecchio prep work and new blitter engines - add Meteorlake support - Fix sparse warnings - DMC MMIO range checks - Audio related fixes - Runtime PM fixes - PSR fixes - Media freq factor and per-gt enhancements - DSI fixes for ICL+ - Disable DMC flip queue handlers - ADL_P voltage swing updates - Use more the VBT for panel information - Fix on Type-C ports with TBT mode - Improve fastset and allow seamless M/N changes - Accept more fixed modes with VRR/DMRRS panels - Disable connector polling for a headless SKU - ADL-S display PLL w/a - Enable THP on Icelake and beyond - Fix i915_gem_object_ggtt_pin_ww regression on old platforms - Expose per tile media freq factor in sysfs - Fix dma_resv fence handling in multi-batch execbuf - Improve on suspend / resume time with VT-d enabled - export CRTC bpc settings via debugfs msm: - gpu: a619 support - gpu: Fix for unclocked GMU register access - gpu: Devcore dump enhancements - client utilization via fdinfo support - fix fence rollover issue - gem: Lockdep false-positive warning fix - gem: Switch to pfn mappings - WB support on sc7180 - dp: dropped custom bulk clock implementation - fix link retraining on resolution change - hdmi: dropped obsolete GPIO support tegra: - context isolation for host1x engines - tegra234 soc support mediatek: - add vdosys0/1 for mt8195 - add MT8195 dp_intf driver exynos: - Fix resume function issue of exynos decon driver by calling clk_disable_unprepare() properly if clk_prepare_enable() failed. nouveau: - set of misc fixes/cleanups - display cleanups gma500: - Cleanup connector I2C handling hyperv: - Unify VRAM allocation of Gen1 and Gen2 meson: - Support YUV422 output; Refcount fixes mgag200: - Support damage clipping - Support gamma handling - Protect concurrent HW access - Fixes to connector - Store model-specific limits in device-info structure - fix PCI register init panfrost: - Valhall support r128: - Fix bit-shift overflow rockchip: - Locking fixes in error path ssd130x: - Fix built-in linkage udl: - Always advertize VGA connector ast: - Support multiple outputs - fix black screen on resume sun4i: - HDMI PHY cleanups vc4: - Add support for BCM2711 vkms: - Allocate output buffer with vmalloc() mcde: - Fix ref-count leak mxsfb/lcdif: - Support i.MX8MP LCD controller stm/ltdc: - Support dynamic Z order - Support mirroring ingenic: - Fix display at maximum resolution" * tag 'drm-next-2022-08-03' of git://anongit.freedesktop.org/drm/drm: (1480 commits) drm/amd/display: Fix a compilation failure on PowerPC caused by FPU code drm/amdgpu: enable support for psp 13.0.4 block drm/amdgpu: add files for PSP 13.0.4 drm/amdgpu: add header files for MP 13.0.4 drm/amdgpu: correct RLC_RLCS_BOOTLOAD_STATUS offset and index drm/amdgpu: send msg to IMU for the front-door loading drm/amdkfd: use time_is_before_jiffies(a + b) to replace "jiffies - a > b" drm/amdgpu: fix hive reference leak when reflecting psp topology info drm/amd/pm: enable GFX ULV feature support for SMU13.0.0 drm/amd/pm: update driver if header for SMU 13.0.0 drm/amdgpu: move mes self test after drm sched re-started drm/amdgpu: drop non-necessary call trace dump drm/amdgpu: enable VCN cg and JPEG cg/pg drm/amdgpu: vcn_4_0_2 video codec query drm/amdgpu: add VCN_4_0_2 firmware support drm/amdgpu: add VCN function in NBIO v7.7 drm/amdgpu: fix a vcn4 boot poll bug in emulation mode drm/amd/amdgpu: add memory training support for PSP_V13 drm/amdkfd: remove an unnecessary amdgpu_bo_ref drm/amd/pm: Add get_gfx_off_status interface for yellow carp ...
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml')
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dc_features.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c438
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c146
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c57
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c93
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c114
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c93
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c109
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c126
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c7420
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c1733
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h70
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c2291
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h74
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c3778
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h57
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c6175
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h1188
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c615
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h70
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c684
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h38
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h88
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h142
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c185
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h392
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c71
36 files changed, 25842 insertions, 535 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index a64b88ca01a9..359f6e9a1da0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -72,6 +72,11 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
@@ -93,6 +98,9 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags)
@@ -116,7 +124,10 @@ DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o
DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o
DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o
+DML += dcn32/display_mode_vba_32.o dcn32/display_rq_dlg_calc_32.o dcn32/display_mode_vba_util_32.o
DML += dcn31/dcn31_fpu.o
+DML += dcn32/dcn32_fpu.o
+DML += dcn321/dcn321_fpu.o
DML += dcn301/dcn301_fpu.o
DML += dcn302/dcn302_fpu.o
DML += dcn303/dcn303_fpu.o
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
index 2a1983324629..74e86732e301 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
@@ -29,7 +29,7 @@
#define DC__PRESENT 1
#define DC__PRESENT__1 1
#define DC__NUM_DPP 4
-#define DC__VOLTAGE_STATES 9
+#define DC__VOLTAGE_STATES 20
#define DC__NUM_DPP__4 1
#define DC__NUM_DPP__0_PRESENT 1
#define DC__NUM_DPP__1_PRESENT 1
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index f79dd40f8d81..ca44df4fca74 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -42,6 +42,9 @@
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
#endif
+/* Constant */
+#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */
+
/**
* DOC: DCN2x FPU manipulation Overview
*
@@ -650,6 +653,228 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
.num_states = 8
};
+struct wm_table ddr4_wm_table_gs = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 7.09,
+ .sr_enter_plus_exit_time_us = 8.14,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr4_wm_table_gs = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 5.32,
+ .sr_enter_plus_exit_time_us = 6.38,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.82,
+ .sr_enter_plus_exit_time_us = 11.196,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.89,
+ .sr_enter_plus_exit_time_us = 11.24,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.748,
+ .sr_enter_plus_exit_time_us = 11.102,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr4_wm_table_with_disabled_ppt = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 8.32,
+ .sr_enter_plus_exit_time_us = 9.38,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.82,
+ .sr_enter_plus_exit_time_us = 11.196,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.89,
+ .sr_enter_plus_exit_time_us = 11.24,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.748,
+ .sr_enter_plus_exit_time_us = 11.102,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table ddr4_wm_table_rn = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 11.90,
+ .sr_enter_plus_exit_time_us = 12.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table ddr4_1R_wm_table_rn = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 13.90,
+ .sr_enter_plus_exit_time_us = 14.80,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr4_wm_table_rn = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 7.32,
+ .sr_enter_plus_exit_time_us = 8.38,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.82,
+ .sr_enter_plus_exit_time_us = 11.196,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.89,
+ .sr_enter_plus_exit_time_us = 11.24,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 9.748,
+ .sr_enter_plus_exit_time_us = 11.102,
+ .valid = true,
+ },
+ }
+};
+
void dcn20_populate_dml_writeback_from_context(struct dc *dc,
struct resource_context *res_ctx,
display_e2e_pipe_params_st *pipes)
@@ -797,6 +1022,12 @@ void dcn20_calculate_dlg_params(
context->bw_ctx.bw.dcn.clk.p_state_change_support =
context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
!= dm_dram_clock_change_unsupported;
+
+ /* Pstate change might not be supported by hardware, but it might be
+ * possible with firmware driven vertical blank stretching.
+ */
+ context->bw_ctx.bw.dcn.clk.p_state_change_support |= context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching;
+
context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
@@ -811,9 +1042,14 @@ void dcn20_calculate_dlg_params(
pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
- context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes;
- context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode;
-
+ if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
+ context->res_ctx.pipe_ctx[i].unbounded_req = false;
+ } else {
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes;
+ context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode;
+ }
if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =
@@ -1013,6 +1249,8 @@ int dcn20_populate_dml_pipes_from_context(
pipes[pipe_cnt].pipe.dest.pixel_rate_mhz *= 2;
pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst;
pipes[pipe_cnt].dout.dp_lanes = 4;
+ if (res_ctx->pipe_ctx[i].stream->link)
+ pipes[pipe_cnt].dout.dp_rate = dm_dp_rate_na;
pipes[pipe_cnt].dout.is_virtual = 0;
pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min;
pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max;
@@ -1070,6 +1308,7 @@ int dcn20_populate_dml_pipes_from_context(
pipes[pipe_cnt].dout.is_virtual = 1;
pipes[pipe_cnt].dout.output_type = dm_dp;
pipes[pipe_cnt].dout.dp_lanes = 4;
+ pipes[pipe_cnt].dout.dp_rate = dm_dp_rate_hbr2;
}
switch (res_ctx->pipe_ctx[i].stream->timing.display_color_depth) {
@@ -1138,7 +1377,8 @@ int dcn20_populate_dml_pipes_from_context(
* bw calculations due to cursor on/off
*/
if (res_ctx->pipe_ctx[i].plane_state &&
- res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
+ (res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
+ res_ctx->pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM))
pipes[pipe_cnt].pipe.src.num_cursors = 0;
else
pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors;
@@ -1149,6 +1389,7 @@ int dcn20_populate_dml_pipes_from_context(
if (!res_ctx->pipe_ctx[i].plane_state) {
pipes[pipe_cnt].pipe.src.is_hsplit = pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled;
pipes[pipe_cnt].pipe.src.source_scan = dm_horz;
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_0;
pipes[pipe_cnt].pipe.src.sw_mode = dm_sw_4kb_s;
pipes[pipe_cnt].pipe.src.macro_tile_size = dm_64k_tile;
pipes[pipe_cnt].pipe.src.viewport_width = timing->h_addressable;
@@ -1201,8 +1442,26 @@ int dcn20_populate_dml_pipes_from_context(
pipes[pipe_cnt].pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90
|| pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz;
+ switch (pln->rotation) {
+ case ROTATION_ANGLE_0:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_0;
+ break;
+ case ROTATION_ANGLE_90:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_90;
+ break;
+ case ROTATION_ANGLE_180:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_180;
+ break;
+ case ROTATION_ANGLE_270:
+ pipes[pipe_cnt].pipe.src.source_rotation = dm_rotation_270;
+ break;
+ default:
+ break;
+ }
pipes[pipe_cnt].pipe.src.viewport_y_y = scl->viewport.y;
pipes[pipe_cnt].pipe.src.viewport_y_c = scl->viewport_c.y;
+ pipes[pipe_cnt].pipe.src.viewport_x_y = scl->viewport.x;
+ pipes[pipe_cnt].pipe.src.viewport_x_c = scl->viewport_c.x;
pipes[pipe_cnt].pipe.src.viewport_width = scl->viewport.width;
pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width;
pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height;
@@ -1428,21 +1687,20 @@ void dcn20_calculate_wm(
void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb,
struct pp_smu_nv_clock_table *max_clocks, unsigned int *uclk_states, unsigned int num_states)
{
- struct _vcs_dpi_voltage_scaling_st calculated_states[DC__VOLTAGE_STATES];
- int i;
int num_calculated_states = 0;
int min_dcfclk = 0;
+ int i;
dc_assert_fp_enabled();
if (num_states == 0)
return;
- memset(calculated_states, 0, sizeof(calculated_states));
+ memset(bb->clock_limits, 0, sizeof(bb->clock_limits));
- if (dc->bb_overrides.min_dcfclk_mhz > 0)
+ if (dc->bb_overrides.min_dcfclk_mhz > 0) {
min_dcfclk = dc->bb_overrides.min_dcfclk_mhz;
- else {
+ } else {
if (ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev))
min_dcfclk = 310;
else
@@ -1453,36 +1711,35 @@ void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s
for (i = 0; i < num_states; i++) {
int min_fclk_required_by_uclk;
- calculated_states[i].state = i;
- calculated_states[i].dram_speed_mts = uclk_states[i] * 16 / 1000;
+ bb->clock_limits[i].state = i;
+ bb->clock_limits[i].dram_speed_mts = uclk_states[i] * 16 / 1000;
// FCLK:UCLK ratio is 1.08
min_fclk_required_by_uclk = div_u64(((unsigned long long)uclk_states[i]) * 1080,
1000000);
- calculated_states[i].fabricclk_mhz = (min_fclk_required_by_uclk < min_dcfclk) ?
+ bb->clock_limits[i].fabricclk_mhz = (min_fclk_required_by_uclk < min_dcfclk) ?
min_dcfclk : min_fclk_required_by_uclk;
- calculated_states[i].socclk_mhz = (calculated_states[i].fabricclk_mhz > max_clocks->socClockInKhz / 1000) ?
- max_clocks->socClockInKhz / 1000 : calculated_states[i].fabricclk_mhz;
+ bb->clock_limits[i].socclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->socClockInKhz / 1000) ?
+ max_clocks->socClockInKhz / 1000 : bb->clock_limits[i].fabricclk_mhz;
- calculated_states[i].dcfclk_mhz = (calculated_states[i].fabricclk_mhz > max_clocks->dcfClockInKhz / 1000) ?
- max_clocks->dcfClockInKhz / 1000 : calculated_states[i].fabricclk_mhz;
+ bb->clock_limits[i].dcfclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->dcfClockInKhz / 1000) ?
+ max_clocks->dcfClockInKhz / 1000 : bb->clock_limits[i].fabricclk_mhz;
- calculated_states[i].dispclk_mhz = max_clocks->displayClockInKhz / 1000;
- calculated_states[i].dppclk_mhz = max_clocks->displayClockInKhz / 1000;
- calculated_states[i].dscclk_mhz = max_clocks->displayClockInKhz / (1000 * 3);
+ bb->clock_limits[i].dispclk_mhz = max_clocks->displayClockInKhz / 1000;
+ bb->clock_limits[i].dppclk_mhz = max_clocks->displayClockInKhz / 1000;
+ bb->clock_limits[i].dscclk_mhz = max_clocks->displayClockInKhz / (1000 * 3);
- calculated_states[i].phyclk_mhz = max_clocks->phyClockInKhz / 1000;
+ bb->clock_limits[i].phyclk_mhz = max_clocks->phyClockInKhz / 1000;
num_calculated_states++;
}
- calculated_states[num_calculated_states - 1].socclk_mhz = max_clocks->socClockInKhz / 1000;
- calculated_states[num_calculated_states - 1].fabricclk_mhz = max_clocks->socClockInKhz / 1000;
- calculated_states[num_calculated_states - 1].dcfclk_mhz = max_clocks->dcfClockInKhz / 1000;
+ bb->clock_limits[num_calculated_states - 1].socclk_mhz = max_clocks->socClockInKhz / 1000;
+ bb->clock_limits[num_calculated_states - 1].fabricclk_mhz = max_clocks->socClockInKhz / 1000;
+ bb->clock_limits[num_calculated_states - 1].dcfclk_mhz = max_clocks->dcfClockInKhz / 1000;
- memcpy(bb->clock_limits, calculated_states, sizeof(bb->clock_limits));
bb->num_states = num_calculated_states;
// Duplicate the last state, DML always an extra state identical to max state to work
@@ -1978,7 +2235,6 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
{
struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool);
struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
unsigned int i, closest_clk_lvl = 0, k = 0;
int j;
@@ -1990,9 +2246,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
ASSERT(clk_table->num_entries);
/* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */
- for (i = 0; i < dcn2_1_soc.num_states + 1; i++) {
- clock_limits[i] = dcn2_1_soc.clock_limits[i];
- }
+ memcpy(&dcn2_1_soc._clock_tmp, &dcn2_1_soc.clock_limits,
+ sizeof(dcn2_1_soc.clock_limits));
for (i = 0; i < clk_table->num_entries; i++) {
/* loop backwards*/
@@ -2007,24 +2262,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
if (i == 1)
k++;
- clock_limits[k].state = k;
- clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_limits[k].socclk_mhz = clk_table->entries[i].socclk_mhz;
- clock_limits[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
+ dcn2_1_soc._clock_tmp[k].state = k;
+ dcn2_1_soc._clock_tmp[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ dcn2_1_soc._clock_tmp[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ dcn2_1_soc._clock_tmp[k].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ dcn2_1_soc._clock_tmp[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
- clock_limits[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
- clock_limits[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
- clock_limits[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_limits[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_limits[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_limits[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_limits[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ dcn2_1_soc._clock_tmp[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+ dcn2_1_soc._clock_tmp[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+ dcn2_1_soc._clock_tmp[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ dcn2_1_soc._clock_tmp[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ dcn2_1_soc._clock_tmp[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ dcn2_1_soc._clock_tmp[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ dcn2_1_soc._clock_tmp[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
k++;
}
- for (i = 0; i < clk_table->num_entries + 1; i++)
- dcn2_1_soc.clock_limits[i] = clock_limits[i];
+
+ memcpy(&dcn2_1_soc.clock_limits, &dcn2_1_soc._clock_tmp,
+ sizeof(dcn2_1_soc.clock_limits));
+
if (clk_table->num_entries) {
dcn2_1_soc.num_states = clk_table->num_entries + 1;
/* fill in min DF PState */
@@ -2036,3 +2293,100 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21);
}
+
+void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params)
+{
+ dc_assert_fp_enabled();
+
+ bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY;
+ bw_params->wm_table.entries[WM_D].wm_inst = WM_D;
+ bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING;
+ bw_params->wm_table.entries[WM_D].valid = true;
+}
+
+void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc,
+ struct resource_context *res_ctx,
+ display_e2e_pipe_params_st *pipes)
+{
+ int pipe_cnt, i, j;
+ double max_calc_writeback_dispclk;
+ double writeback_dispclk;
+ struct writeback_st dout_wb;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream;
+
+ if (!stream)
+ continue;
+ max_calc_writeback_dispclk = 0;
+
+ /* Set writeback information */
+ pipes[pipe_cnt].dout.wb_enable = 0;
+ pipes[pipe_cnt].dout.num_active_wb = 0;
+ for (j = 0; j < stream->num_wb_info; j++) {
+ struct dc_writeback_info *wb_info = &stream->writeback_info[j];
+
+ if (wb_info->wb_enabled && wb_info->writeback_source_plane &&
+ (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) {
+ pipes[pipe_cnt].dout.wb_enable = 1;
+ pipes[pipe_cnt].dout.num_active_wb++;
+ dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_height :
+ wb_info->dwb_params.cnv_params.src_height;
+ dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_width :
+ wb_info->dwb_params.cnv_params.src_width;
+ dout_wb.wb_dst_width = wb_info->dwb_params.dest_width;
+ dout_wb.wb_dst_height = wb_info->dwb_params.dest_height;
+ dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps;
+ dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;
+ dout_wb.wb_htaps_chroma = wb_info->dwb_params.scaler_taps.h_taps_c;
+ dout_wb.wb_vtaps_chroma = wb_info->dwb_params.scaler_taps.v_taps_c;
+ dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_width /
+ (double)wb_info->dwb_params.dest_width :
+ (double)wb_info->dwb_params.cnv_params.src_width /
+ (double)wb_info->dwb_params.dest_width;
+ dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_height /
+ (double)wb_info->dwb_params.dest_height :
+ (double)wb_info->dwb_params.cnv_params.src_height /
+ (double)wb_info->dwb_params.dest_height;
+ if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) {
+ if (wb_info->dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC)
+ dout_wb.wb_pixel_format = dm_420_8;
+ else
+ dout_wb.wb_pixel_format = dm_420_10;
+ } else
+ dout_wb.wb_pixel_format = dm_444_32;
+
+ /* Workaround for cases where multiple writebacks are connected to same plane
+ * In which case, need to compute worst case and set the associated writeback parameters
+ * This workaround is necessary due to DML computation assuming only 1 set of writeback
+ * parameters per pipe */
+ writeback_dispclk = CalculateWriteBackDISPCLK(
+ dout_wb.wb_pixel_format,
+ pipes[pipe_cnt].pipe.dest.pixel_rate_mhz,
+ dout_wb.wb_hratio,
+ dout_wb.wb_vratio,
+ dout_wb.wb_htaps_luma,
+ dout_wb.wb_vtaps_luma,
+ dout_wb.wb_htaps_chroma,
+ dout_wb.wb_vtaps_chroma,
+ dout_wb.wb_dst_width,
+ pipes[pipe_cnt].pipe.dest.htotal,
+ 2);
+
+ if (writeback_dispclk > max_calc_writeback_dispclk) {
+ max_calc_writeback_dispclk = writeback_dispclk;
+ pipes[pipe_cnt].dout.wb = dout_wb;
+ }
+ }
+ }
+
+ pipe_cnt++;
+ }
+
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
index aa892193e485..c51badf7b68a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
@@ -82,4 +82,10 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
bool fast_validate);
void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params);
+
+void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc,
+ struct resource_context *res_ctx,
+ display_e2e_pipe_params_st *pipes);
+
#endif /* __DCN20_FPU_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
index 574676a0711a..e1e92daba668 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
@@ -29,7 +29,7 @@
#include "dcn20/dcn20_resource.h"
#include "dcn30/dcn30_resource.h"
-
+#include "clk_mgr/dcn30/dcn30_smu11_driver_if.h"
#include "display_mode_vba_30.h"
#include "dcn30_fpu.h"
@@ -181,7 +181,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = {
void optc3_fpu_set_vrr_m_const(struct timing_generator *optc,
double vtotal_avg)
{
-struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
double vtotal_min, vtotal_max;
double ratio, modulo, phase;
uint32_t vblank_start;
@@ -350,24 +350,24 @@ void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params,
int pipe_cnt,
int cur_pipe)
{
- int i;
+ int i;
dc_assert_fp_enabled();
- for (i = 0; i < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(wb_arb_params->cli_watermark); i++) {
wb_arb_params->cli_watermark[i] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000;
wb_arb_params->pstate_watermark[i] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
- }
+ }
- wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[cur_pipe] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */
+ wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[cur_pipe] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */
}
void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
{
-dc_assert_fp_enabled();
+ dc_assert_fp_enabled();
-if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
@@ -380,12 +380,12 @@ void dcn30_fpu_calculate_wm_and_dlg(
int pipe_cnt,
int vlevel)
{
-int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+ int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
int i, pipe_idx;
double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
-dc_assert_fp_enabled();
+ dc_assert_fp_enabled();
if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
@@ -529,6 +529,8 @@ dc_assert_fp_enabled();
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching)
+ dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context);
}
void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc)
@@ -614,4 +616,128 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
}
+/**
+ * Finds dummy_latency_index when MCLK switching using firmware based
+ * vblank stretch is enabled. This function will iterate through the
+ * table of dummy pstate latencies until the lowest value that allows
+ * dm_allow_self_refresh_and_mclk_switch to happen is found
+ */
+int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ const int max_latency_table_entries = 4;
+ int dummy_latency_index = 0;
+
+ dc_assert_fp_enabled();
+ while (dummy_latency_index < max_latency_table_entries) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+ dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
+
+ if (context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank ==
+ dm_allow_self_refresh_and_mclk_switch)
+ break;
+
+ dummy_latency_index++;
+ }
+
+ if (dummy_latency_index == max_latency_table_entries) {
+ ASSERT(dummy_latency_index != max_latency_table_entries);
+ /* If the execution gets here, it means dummy p_states are
+ * not possible. This should never happen and would mean
+ * something is severely wrong.
+ * Here we reset dummy_latency_index to 3, because it is
+ * better to have underflows than system crashes.
+ */
+ dummy_latency_index = 3;
+ }
+
+ return dummy_latency_index;
+}
+
+void dcn3_fpu_build_wm_range_table(struct clk_mgr *base)
+{
+ /* defaults */
+ double pstate_latency_us = base->ctx->dc->dml.soc.dram_clock_change_latency_us;
+ double sr_exit_time_us = base->ctx->dc->dml.soc.sr_exit_time_us;
+ double sr_enter_plus_exit_time_us = base->ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
+ uint16_t min_uclk_mhz = base->bw_params->clk_table.entries[0].memclk_mhz;
+
+ dc_assert_fp_enabled();
+
+ /* Set A - Normal - default values*/
+ base->bw_params->wm_table.nv_entries[WM_A].valid = true;
+ base->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
+ base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set B - Performance - higher minimum clocks */
+// base->bw_params->wm_table.nv_entries[WM_B].valid = true;
+// base->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
+// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
+// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE;
+// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
+ base->bw_params->wm_table.nv_entries[WM_C].valid = true;
+ base->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0;
+ base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
+ base->bw_params->dummy_pstate_table[0].dram_speed_mts = 1600;
+ base->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
+ base->bw_params->dummy_pstate_table[1].dram_speed_mts = 8000;
+ base->bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
+ base->bw_params->dummy_pstate_table[2].dram_speed_mts = 10000;
+ base->bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
+ base->bw_params->dummy_pstate_table[3].dram_speed_mts = 16000;
+ base->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
+
+ /* Set D - MALL - SR enter and exit times adjusted for MALL */
+ base->bw_params->wm_table.nv_entries[WM_D].valid = true;
+ base->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
+ base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
+ base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
+}
+
+void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
+ struct bp_soc_bb_info bb_info = {0};
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dcn3_0_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_0_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_0_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
index dedfe7b5f173..cab864095ce7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
@@ -63,5 +63,14 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
unsigned int *dcfclk_mhz,
unsigned int *dram_speed_mts);
+int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn3_fpu_build_wm_range_table(struct clk_mgr *base);
+
+void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip);
#endif /* __DCN30_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index f47d82da115c..876b321b30ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -438,8 +438,8 @@ static void UseMinimumDCFCLK(
int dpte_group_bytes[],
double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
- int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
int BytePerPixelY[],
int BytePerPixelC[],
int HTotal[],
@@ -712,18 +712,6 @@ static double CalculateUrgentLatency(
double UrgentLatencyAdjustmentFabricClockReference,
double FabricClockSingle);
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC);
-
void dml30_recalculate(struct display_mode_lib *mode_lib)
{
ModeSupportAndSystemConfiguration(mode_lib);
@@ -2095,7 +2083,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
DTRACE(" return_bus_bw = %f", v->ReturnBW);
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -3049,40 +3037,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
{
//Maximum Bandwidth Used
- double TotalWRBandwidth = 0;
- double MaxPerPlaneVActiveWRBandwidth = 0;
- double WRBandwidth = 0;
- double MaxUsedBW = 0;
- for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- if (v->WritebackEnable[k] == true
- && v->WritebackPixelFormat[k] == dm_444_32) {
- WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
- / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
- } else if (v->WritebackEnable[k] == true) {
- WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
- / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
- }
- TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
- MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
- }
-
v->TotalDataReadBandwidth = 0;
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
+ v->ReadBandwidthPlaneLuma[k]
+ v->ReadBandwidthPlaneChroma[k];
}
-
- {
- double MaxPerPlaneVActiveRDBandwidth = 0;
- for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- MaxPerPlaneVActiveRDBandwidth = dml_max(MaxPerPlaneVActiveRDBandwidth,
- v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
-
- }
- }
-
- MaxUsedBW = MaxTotalRDBandwidth + TotalWRBandwidth;
}
// VStartup Margin
@@ -3165,7 +3125,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
mode_lib->vba.SourcePixelFormat[k],
mode_lib->vba.SurfaceTiling[k],
&BytePerPixY[k],
@@ -3218,7 +3178,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
&dummysinglestring);
}
-static bool CalculateBytePerPixelAnd256BBlockSizes(
+void dml30_CalculateBytePerPixelAnd256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int *BytePerPixelY,
@@ -3305,7 +3265,6 @@ static bool CalculateBytePerPixelAnd256BBlockSizes(
*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
}
- return true;
}
static double CalculateTWait(
@@ -3709,7 +3668,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*Bandwidth Support Check*/
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -6696,8 +6655,8 @@ static void UseMinimumDCFCLK(
int dpte_group_bytes[],
double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
- int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
- int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
int BytePerPixelY[],
int BytePerPixelC[],
int HTotal[],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
index 4e249eaabfdb..daaf0883b84d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h
@@ -39,5 +39,16 @@ double dml30_CalculateWriteBackDISPCLK(
long WritebackDestinationWidth,
unsigned int HTotal,
unsigned int WritebackLineBufferSize);
+void dml30_CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC);
#endif /* __DML30_DISPLAY_MODE_VBA_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
index 747167083dea..8179be1f34bb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
@@ -29,6 +29,7 @@
#include "../display_mode_vba.h"
#include "../dml_inline_defs.h"
#include "display_rq_dlg_calc_30.h"
+#include "display_mode_vba_30.h"
static bool is_dual_plane(enum source_format_class source_format)
{
@@ -275,96 +276,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
full_swath_bytes_packed_c);
}
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC)
-{
- if (SourcePixelFormat == dm_444_64) {
- *BytePerPixelDETY = 8;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 8;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 4;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_16) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 2;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 1;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 1;
- *BytePerPixelY = 4;
- *BytePerPixelC = 1;
- } else if (SourcePixelFormat == dm_420_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 2;
- *BytePerPixelY = 1;
- *BytePerPixelC = 2;
- } else if (SourcePixelFormat == dm_420_12) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 4;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- } else {
- *BytePerPixelDETY = 4.0 / 3;
- *BytePerPixelDETC = 8.0 / 3;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- }
-
- if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
- || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
- || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
- || SourcePixelFormat == dm_rgbe)) {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- } else if (SourcePixelFormat == dm_444_64) {
- *BlockHeight256BytesY = 4;
- } else if (SourcePixelFormat == dm_444_8) {
- *BlockHeight256BytesY = 16;
- } else {
- *BlockHeight256BytesY = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockHeight256BytesC = 0;
- *BlockWidth256BytesC = 0;
- } else {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- *BlockHeight256BytesC = 1;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 16;
- } else if (SourcePixelFormat == dm_420_8) {
- *BlockHeight256BytesY = 16;
- *BlockHeight256BytesC = 8;
- } else {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
- }
- return true;
-}
-
static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
display_data_rq_dlg_params_st *rq_dlg_param,
display_data_rq_misc_params_st *rq_misc_param,
@@ -450,7 +361,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
double byte_per_pixel_det_y = 0;
double byte_per_pixel_det_c = 0;
- CalculateBytePerPixelAnd256BBlockSizes((enum source_format_class)(source_format),
+ dml30_CalculateBytePerPixelAnd256BBlockSizes((enum source_format_class)(source_format),
(enum dm_swizzle_mode)(tiling),
&bytes_per_element_y,
&bytes_per_element_c,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 0a7a33864973..7ef66e511ec8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -214,6 +214,80 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_01_soc = {
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
};
+struct wm_table ddr4_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 6.09,
+ .sr_enter_plus_exit_time_us = 7.14,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ }
+};
+
+struct wm_table lpddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 13.5,
+ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ }
+};
+
static void calculate_wm_set_for_vlevel(int vlevel,
struct wm_range_table_entry *table_entry,
struct dcn_watermarks *wm_set,
@@ -249,12 +323,14 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
{
struct dcn301_resource_pool *pool = TO_DCN301_RES_POOL(dc->res_pool);
struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
unsigned int i, closest_clk_lvl;
int j;
dc_assert_fp_enabled();
+ memcpy(&dcn3_01_soc._clock_tmp, &dcn3_01_soc.clock_limits,
+ sizeof(dcn3_01_soc.clock_limits));
+
/* Default clock levels are used for diags, which may lead to overclocking. */
if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
dcn3_01_ip.max_num_otg = pool->base.res_cap->num_timing_generator;
@@ -271,32 +347,32 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
}
}
- clock_limits[i].state = i;
- clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
- clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
-
- clock_limits[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
- clock_limits[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
- clock_limits[i].dram_bw_per_chan_gbps = dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_limits[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_limits[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_limits[i].phyclk_d18_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_limits[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ dcn3_01_soc._clock_tmp[i].state = i;
+ dcn3_01_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ dcn3_01_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ dcn3_01_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ dcn3_01_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
+
+ dcn3_01_soc._clock_tmp[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+ dcn3_01_soc._clock_tmp[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+ dcn3_01_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ dcn3_01_soc._clock_tmp[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ dcn3_01_soc._clock_tmp[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ dcn3_01_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ dcn3_01_soc._clock_tmp[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
}
- for (i = 0; i < clk_table->num_entries; i++)
- dcn3_01_soc.clock_limits[i] = clock_limits[i];
-
if (clk_table->num_entries) {
dcn3_01_soc.num_states = clk_table->num_entries;
/* duplicate last level */
- dcn3_01_soc.clock_limits[dcn3_01_soc.num_states] = dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1];
- dcn3_01_soc.clock_limits[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states;
+ dcn3_01_soc._clock_tmp[dcn3_01_soc.num_states] = dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1];
+ dcn3_01_soc._clock_tmp[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states;
}
}
+ memcpy(&dcn3_01_soc.clock_limits, &dcn3_01_soc._clock_tmp,
+ sizeof(dcn3_01_soc.clock_limits));
+
dcn3_01_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
index 8fb14baed208..3eb3a021ab7d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
@@ -202,7 +202,7 @@ void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p
unsigned int num_dcfclk_sta_targets = 4;
unsigned int num_uclk_states;
- dc_assert_fp_enabled();
+ dc_assert_fp_enabled();
if (dc->ctx->dc_bios->vram_info.num_chans)
dcn3_03_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
@@ -349,14 +349,11 @@ void dcn303_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info)
dc_assert_fp_enabled();
if (bb_info.dram_clock_change_latency_100ns > 0)
- dcn3_03_soc.dram_clock_change_latency_us =
- bb_info.dram_clock_change_latency_100ns * 10;
+ dcn3_03_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10;
- if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
- dcn3_03_soc.sr_enter_plus_exit_time_us =
- bb_info.dram_sr_enter_exit_latency_100ns * 10;
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_03_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10;
- if (bb_info.dram_sr_exit_latency_100ns > 0)
- dcn3_03_soc.sr_exit_time_us =
- bb_info.dram_sr_exit_latency_100ns * 10;
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_03_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index 54db2eca9e6b..e36cfa5985ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -201,7 +201,7 @@ struct _vcs_dpi_ip_params_st dcn3_15_ip = {
.hostvm_max_page_table_levels = 2,
.rob_buffer_size_kbytes = 64,
.det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE,
- .min_comp_buffer_size_kbytes = DCN3_15_MIN_COMPBUF_SIZE_KB,
+ .min_comp_buffer_size_kbytes = 64,
.config_return_buffer_size_in_kbytes = 1024,
.compressed_buffer_segment_size_in_kbytes = 64,
.meta_fifo_size_in_kentries = 32,
@@ -297,6 +297,7 @@ struct _vcs_dpi_ip_params_st dcn3_16_ip = {
.hostvm_max_page_table_levels = 2,
.rob_buffer_size_kbytes = 64,
.det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE,
+ .min_comp_buffer_size_kbytes = 64,
.config_return_buffer_size_in_kbytes = 1024,
.compressed_buffer_segment_size_in_kbytes = 64,
.meta_fifo_size_in_kentries = 32,
@@ -434,6 +435,26 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
};
+void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ dc_assert_fp_enabled();
+
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
+
+void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us;
+ }
+}
+
void dcn31_calculate_wm_and_dlg_fp(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
@@ -574,12 +595,14 @@ void dcn31_calculate_wm_and_dlg_fp(
void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
unsigned int i, closest_clk_lvl;
int j;
dc_assert_fp_enabled();
+ memcpy(&dcn3_1_soc._clock_tmp, &dcn3_1_soc.clock_limits,
+ sizeof(dcn3_1_soc.clock_limits));
+
// Default clock levels are used for diags, which may lead to overclocking.
if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
@@ -607,34 +630,35 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
}
}
- clock_limits[i].state = i;
+ dcn3_1_soc._clock_tmp[i].state = i;
/* Clocks dependent on voltage level. */
- clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
- clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+ dcn3_1_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ dcn3_1_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ dcn3_1_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ dcn3_1_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
/* Clocks independent of voltage level. */
- clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_1_soc._clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
- clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_1_soc._clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
- clock_limits[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_limits[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_limits[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_limits[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_limits[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ dcn3_1_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ dcn3_1_soc._clock_tmp[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ dcn3_1_soc._clock_tmp[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ dcn3_1_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ dcn3_1_soc._clock_tmp[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
}
- for (i = 0; i < clk_table->num_entries; i++)
- dcn3_1_soc.clock_limits[i] = clock_limits[i];
if (clk_table->num_entries) {
dcn3_1_soc.num_states = clk_table->num_entries;
}
}
+ memcpy(&dcn3_1_soc.clock_limits, &dcn3_1_soc._clock_tmp,
+ sizeof(dcn3_1_soc.clock_limits));
+
dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
@@ -701,13 +725,15 @@ void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
unsigned int i, closest_clk_lvl;
int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
int j;
dc_assert_fp_enabled();
+ memcpy(&dcn3_16_soc._clock_tmp, &dcn3_16_soc.clock_limits,
+ sizeof(dcn3_16_soc.clock_limits));
+
// Default clock levels are used for diags, which may lead to overclocking.
if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
@@ -739,39 +765,40 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
closest_clk_lvl = dcn3_16_soc.num_states - 1;
}
- clock_limits[i].state = i;
+ dcn3_16_soc._clock_tmp[i].state = i;
/* Clocks dependent on voltage level. */
- clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ dcn3_16_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
if (clk_table->num_entries == 1 &&
- clock_limits[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ dcn3_16_soc._clock_tmp[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
/*SMU fix not released yet*/
- clock_limits[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ dcn3_16_soc._clock_tmp[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
}
- clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
- clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+ dcn3_16_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ dcn3_16_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ dcn3_16_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
/* Clocks independent of voltage level. */
- clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_16_soc._clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
- clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_16_soc._clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
- clock_limits[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_limits[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_limits[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_limits[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_limits[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ dcn3_16_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ dcn3_16_soc._clock_tmp[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ dcn3_16_soc._clock_tmp[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ dcn3_16_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ dcn3_16_soc._clock_tmp[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
}
- for (i = 0; i < clk_table->num_entries; i++)
- dcn3_16_soc.clock_limits[i] = clock_limits[i];
if (clk_table->num_entries) {
dcn3_16_soc.num_states = clk_table->num_entries;
}
}
+ memcpy(&dcn3_16_soc.clock_limits, &dcn3_16_soc._clock_tmp,
+ sizeof(dcn3_16_soc.clock_limits));
+
if (max_dispclk_mhz) {
dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
index 24ac19c83687..4372f17b55d4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
@@ -31,6 +31,11 @@
#define DCN3_15_MIN_COMPBUF_SIZE_KB 128
#define DCN3_16_DEFAULT_DET_SIZE 192
+void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
+void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
+
void dcn31_calculate_wm_and_dlg_fp(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index e4b9fd31223c..3fab19134480 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -26,6 +26,7 @@
#include "dc.h"
#include "dc_link.h"
#include "../display_mode_lib.h"
+#include "dml/dcn30/display_mode_vba_30.h"
#include "display_mode_vba_31.h"
#include "../dml_inline_defs.h"
@@ -86,17 +87,6 @@ typedef struct {
#define BPP_INVALID 0
#define BPP_BLENDED_PIPE 0xffffffff
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC);
static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
static unsigned int dscceComputeDelay(
@@ -2220,7 +2210,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
DTRACE(" return_bus_bw = %f", v->ReturnBW);
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -2996,7 +2986,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->ImmediateFlipSupported)) ? true : false;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
- dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
+ dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required);
dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
@@ -3415,7 +3405,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&BytePerPixY[k],
@@ -3469,94 +3459,6 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
&dummysinglestring);
}
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC)
-{
- if (SourcePixelFormat == dm_444_64) {
- *BytePerPixelDETY = 8;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 8;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 4;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_16) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 2;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 1;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 1;
- *BytePerPixelY = 4;
- *BytePerPixelC = 1;
- } else if (SourcePixelFormat == dm_420_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 2;
- *BytePerPixelY = 1;
- *BytePerPixelC = 2;
- } else if (SourcePixelFormat == dm_420_12) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 4;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- } else {
- *BytePerPixelDETY = 4.0 / 3;
- *BytePerPixelDETC = 8.0 / 3;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- }
-
- if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
- || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- } else if (SourcePixelFormat == dm_444_64) {
- *BlockHeight256BytesY = 4;
- } else if (SourcePixelFormat == dm_444_8) {
- *BlockHeight256BytesY = 16;
- } else {
- *BlockHeight256BytesY = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockHeight256BytesC = 0;
- *BlockWidth256BytesC = 0;
- } else {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- *BlockHeight256BytesC = 1;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 16;
- } else if (SourcePixelFormat == dm_420_8) {
- *BlockHeight256BytesY = 16;
- *BlockHeight256BytesC = 8;
- } else {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
- }
- return true;
-}
-
static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
{
if (PrefetchMode == 0) {
@@ -4066,7 +3968,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*Bandwidth Support Check*/
for (k = 0; k < v->NumberOfActivePlanes; k++) {
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
v->SourcePixelFormat[k],
v->SurfaceTiling[k],
&v->BytePerPixelY[k],
@@ -4310,6 +4212,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
if (v->ODMCombinePolicy == dm_odm_combine_policy_none
|| !(v->Output[k] == dm_dp ||
+ v->Output[k] == dm_dp2p0 ||
v->Output[k] == dm_edp)) {
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
index 53d760e169e6..66b82e4f05c6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
@@ -27,94 +27,7 @@
#include "../display_mode_vba.h"
#include "../dml_inline_defs.h"
#include "display_rq_dlg_calc_31.h"
-
-static bool CalculateBytePerPixelAnd256BBlockSizes(
- enum source_format_class SourcePixelFormat,
- enum dm_swizzle_mode SurfaceTiling,
- unsigned int *BytePerPixelY,
- unsigned int *BytePerPixelC,
- double *BytePerPixelDETY,
- double *BytePerPixelDETC,
- unsigned int *BlockHeight256BytesY,
- unsigned int *BlockHeight256BytesC,
- unsigned int *BlockWidth256BytesY,
- unsigned int *BlockWidth256BytesC)
-{
- if (SourcePixelFormat == dm_444_64) {
- *BytePerPixelDETY = 8;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 8;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 4;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_16) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 2;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_444_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 0;
- *BytePerPixelY = 1;
- *BytePerPixelC = 0;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BytePerPixelDETY = 4;
- *BytePerPixelDETC = 1;
- *BytePerPixelY = 4;
- *BytePerPixelC = 1;
- } else if (SourcePixelFormat == dm_420_8) {
- *BytePerPixelDETY = 1;
- *BytePerPixelDETC = 2;
- *BytePerPixelY = 1;
- *BytePerPixelC = 2;
- } else if (SourcePixelFormat == dm_420_12) {
- *BytePerPixelDETY = 2;
- *BytePerPixelDETC = 4;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- } else {
- *BytePerPixelDETY = 4.0 / 3;
- *BytePerPixelDETC = 8.0 / 3;
- *BytePerPixelY = 2;
- *BytePerPixelC = 4;
- }
-
- if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
- || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- } else if (SourcePixelFormat == dm_444_64) {
- *BlockHeight256BytesY = 4;
- } else if (SourcePixelFormat == dm_444_8) {
- *BlockHeight256BytesY = 16;
- } else {
- *BlockHeight256BytesY = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockHeight256BytesC = 0;
- *BlockWidth256BytesC = 0;
- } else {
- if (SurfaceTiling == dm_sw_linear) {
- *BlockHeight256BytesY = 1;
- *BlockHeight256BytesC = 1;
- } else if (SourcePixelFormat == dm_rgbe_alpha) {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 16;
- } else if (SourcePixelFormat == dm_420_8) {
- *BlockHeight256BytesY = 16;
- *BlockHeight256BytesC = 8;
- } else {
- *BlockHeight256BytesY = 8;
- *BlockHeight256BytesC = 8;
- }
- *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
- *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
- }
- return true;
-}
+#include "dml/dcn30/display_mode_vba_30.h"
static bool is_dual_plane(enum source_format_class source_format)
{
@@ -467,7 +380,7 @@ static void get_meta_and_pte_attr(
double byte_per_pixel_det_y;
double byte_per_pixel_det_c;
- CalculateBytePerPixelAnd256BBlockSizes(
+ dml30_CalculateBytePerPixelAnd256BBlockSizes(
(enum source_format_class) (source_format),
(enum dm_swizzle_mode) (tiling),
&bytes_per_element_y,
@@ -953,7 +866,6 @@ static void dml_rq_dlg_get_dlg_params(
{
const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
- const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout;
const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth;
const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps;
@@ -1003,9 +915,6 @@ static void dml_rq_dlg_get_dlg_params(
unsigned int vupdate_width;
unsigned int vready_offset;
- unsigned int dppclk_delay_subtotal;
- unsigned int dispclk_delay_subtotal;
-
unsigned int vstartup_start;
unsigned int dst_x_after_scaler;
unsigned int dst_y_after_scaler;
@@ -1055,7 +964,6 @@ static void dml_rq_dlg_get_dlg_params(
float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
- int blank_lines;
memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs));
memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs));
@@ -1079,20 +987,9 @@ static void dml_rq_dlg_get_dlg_params(
min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
-
disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2));
- blank_lines = (dst->vblank_end + dst->vtotal_min - dst->vblank_start - dst->vstartup_start - 1);
- if (blank_lines < 0)
- blank_lines = 0;
- if (blank_lines != 0) {
- disp_dlg_regs->optimized_min_dst_y_next_start_us =
- ((unsigned int) blank_lines * dst->hactive) / (unsigned int) dst->pixel_rate_mhz;
- disp_dlg_regs->optimized_min_dst_y_next_start =
- (unsigned int)(((double) (dlg_vblank_start + blank_lines)) * dml_pow(2, 2));
- } else {
- // use unoptimized value
- disp_dlg_regs->optimized_min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start;
- }
+ disp_dlg_regs->optimized_min_dst_y_next_start_us = 0;
+ disp_dlg_regs->optimized_min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start;
ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank);
@@ -1136,21 +1033,6 @@ static void dml_rq_dlg_get_dlg_params(
vupdate_width = dst->vupdate_width;
vready_offset = dst->vready_offset;
- dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal;
- dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal;
-
- if (scl_enable)
- dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl;
- else
- dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only;
-
- dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor;
-
- if (dout->dsc_enable) {
- double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA
- dispclk_delay_subtotal += dsc_delay;
- }
-
vstartup_start = dst->vstartup_start;
if (interlaced) {
if (vstartup_start / 2.0 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end / 2.0)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
new file mode 100644
index 000000000000..fc4d7474c111
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -0,0 +1,7420 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#define UNIT_TEST 0
+#if !UNIT_TEST
+#include "dc.h"
+#include "dc_link.h"
+#endif
+#include "../display_mode_lib.h"
+#include "display_mode_vba_314.h"
+#include "../dml_inline_defs.h"
+
+/*
+ * NOTE:
+ * This file is gcc-parsable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
+#define BPP_INVALID 0
+#define BPP_BLENDED_PIPE 0xffffffff
+#define DCN314_MAX_DSC_IMAGE_WIDTH 5184
+#define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
+
+// For DML-C changes that hasn't been propagated to VBA yet
+//#define __DML_VBA_ALLOW_DELTA__
+
+// Move these to ip parameters/constant
+
+// At which vstartup the DML start to try if the mode can be supported
+#define __DML_VBA_MIN_VSTARTUP__ 9
+
+// Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
+#define __DML_ARB_TO_RET_DELAY__ (7 + 95)
+
+// fudge factor for min dcfclk calclation
+#define __DML_MIN_DCFCLK_FACTOR__ 1.15
+
+struct {
+ double DPPCLK;
+ double DISPCLK;
+ double PixelClock;
+ double DCFCLKDeepSleep;
+ unsigned int DPPPerPlane;
+ bool ScalerEnabled;
+ double VRatio;
+ double VRatioChroma;
+ enum scan_direction_class SourceScan;
+ unsigned int BlockWidth256BytesY;
+ unsigned int BlockHeight256BytesY;
+ unsigned int BlockWidth256BytesC;
+ unsigned int BlockHeight256BytesC;
+ unsigned int InterlaceEnable;
+ unsigned int NumberOfCursors;
+ unsigned int VBlank;
+ unsigned int HTotal;
+ unsigned int DCCEnable;
+ bool ODMCombineIsEnabled;
+ enum source_format_class SourcePixelFormat;
+ int BytePerPixelY;
+ int BytePerPixelC;
+ bool ProgressiveToInterlaceUnitInOPP;
+} Pipe;
+
+#define BPP_INVALID 0
+#define BPP_BLENDED_PIPE 0xffffffff
+
+static bool CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC);
+static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
+static unsigned int dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output);
+static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
+static bool CalculatePrefetchSchedule(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ Pipe *myPipe,
+ unsigned int DSCDelay,
+ double DPPCLKDelaySubtotalPlusCNVCFormater,
+ double DPPCLKDelaySCL,
+ double DPPCLKDelaySCLLBOnly,
+ double DPPCLKDelayCNVCCursor,
+ double DISPCLKDelaySubtotal,
+ unsigned int DPP_RECOUT_WIDTH,
+ enum output_format_class OutputFormat,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ unsigned int GPUVMPageTableLevels,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ double HostVMMinPageSize,
+ bool DynamicMetadataEnable,
+ bool DynamicMetadataVMEnabled,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int DynamicMetadataTransmittedBytes,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ double VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ double VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
+static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
+static void CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ double DETBufferSize,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum scan_direction_class ScanOrientation,
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma);
+static double CalculatePrefetchSourceLines(
+ struct display_mode_lib *mode_lib,
+ double VRatio,
+ double vtaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ unsigned int ViewportYStart,
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath);
+static unsigned int CalculateVMAndRowBytes(
+ struct display_mode_lib *mode_lib,
+ bool DCCEnable,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum scan_direction_class ScanDirection,
+ unsigned int SwathWidth,
+ unsigned int ViewportHeight,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMinPageSize,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int *MacroTileWidth,
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ bool *PTEBufferSizeNotExceeded,
+ int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ int *vm_group_bytes,
+ unsigned int *dpte_group_bytes,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ int *DPDE0BytesFrame,
+ int *MetaPTEBytesFrame);
+static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
+static void CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ double *meta_row_bw,
+ double *dpte_row_bw);
+
+static void CalculateFlipSchedule(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool GPUVMEnable,
+ double HostVMMinPageSize,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow,
+ double BandwidthAvailableForImmediateFlip,
+ unsigned int TotImmediateFlipBytes,
+ enum source_format_class SourcePixelFormat,
+ double LineTime,
+ double VRatio,
+ double VRatioChroma,
+ double Tno_bw,
+ bool DCCEnable,
+ unsigned int dpte_row_height,
+ unsigned int meta_row_height,
+ unsigned int dpte_row_height_chroma,
+ unsigned int meta_row_height_chroma,
+ double *DestinationLinesToRequestVMInImmediateFlip,
+ double *DestinationLinesToRequestRowInImmediateFlip,
+ double *final_flip_bw,
+ bool *ImmediateFlipSupportedForPipe);
+static double CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ int WritebackDestinationWidth,
+ int WritebackDestinationHeight,
+ int WritebackSourceHeight,
+ unsigned int HTotal);
+
+static void CalculateVupdateAndDynamicMetadataParameters(
+ int MaxInterDCNTileRepeaters,
+ double DPPCLK,
+ double DISPCLK,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ int HTotal,
+ int VBlank,
+ int DynamicMetadataTransmittedBytes,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+
+static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ struct display_mode_lib *mode_lib,
+ unsigned int PrefetchMode,
+ unsigned int NumberOfActivePlanes,
+ unsigned int MaxLineBufferLines,
+ unsigned int LineBufferSize,
+ unsigned int WritebackInterfaceBufferSize,
+ double DCFCLK,
+ double ReturnBW,
+ bool SynchronizedVBlank,
+ unsigned int dpte_group_bytes[],
+ unsigned int MetaChunkSize,
+ double UrgentLatency,
+ double ExtraLatency,
+ double WritebackLatency,
+ double WritebackChunkSize,
+ double SOCCLK,
+ double DRAMClockChangeLatency,
+ double SRExitTime,
+ double SREnterPlusExitTime,
+ double SRExitZ8Time,
+ double SREnterPlusExitZ8Time,
+ double DCFCLKDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int LBBitPerPixel[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int vtaps[],
+ unsigned int VTAPsChroma[],
+ double VRatio[],
+ double VRatioChroma[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ unsigned int BlendingAndTiming[],
+ unsigned int DPPPerPlane[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ double DSTXAfterScaler[],
+ double DSTYAfterScaler[],
+ bool WritebackEnable[],
+ enum source_format_class WritebackPixelFormat[],
+ double WritebackDestinationWidth[],
+ double WritebackDestinationHeight[],
+ double WritebackSourceHeight[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+ enum clock_change_support *DRAMClockChangeSupport,
+ double *UrgentWatermark,
+ double *WritebackUrgentWatermark,
+ double *DRAMClockChangeWatermark,
+ double *WritebackDRAMClockChangeWatermark,
+ double *StutterExitWatermark,
+ double *StutterEnterPlusExitWatermark,
+ double *Z8StutterExitWatermark,
+ double *Z8StutterEnterPlusExitWatermark,
+ double *MinActiveDRAMClockChangeLatencySupported);
+
+static void CalculateDCFCLKDeepSleep(
+ struct display_mode_lib *mode_lib,
+ unsigned int NumberOfActivePlanes,
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ int ReturnBusWidth,
+ double *DCFCLKDeepSleep);
+
+static void CalculateUrgentBurstFactor(
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ double DETBufferSizeY,
+ double DETBufferSizeC,
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding);
+
+static void UseMinimumDCFCLK(
+ struct display_mode_lib *mode_lib,
+ int MaxPrefetchMode,
+ int ReorderingBytes);
+
+static void CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActivePlanes,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[]);
+
+static void CalculateMetaAndPTETimes(
+ int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ int MetaChunkSize,
+ int MinMetaChunkSizeBytes,
+ int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ int dpte_row_height[],
+ int dpte_row_height_chroma[],
+ int meta_row_width[],
+ int meta_row_width_chroma[],
+ int meta_row_height[],
+ int meta_row_height_chroma[],
+ int meta_req_width[],
+ int meta_req_width_chroma[],
+ int meta_req_height[],
+ int meta_req_height_chroma[],
+ int dpte_group_bytes[],
+ int PTERequestSizeY[],
+ int PTERequestSizeC[],
+ int PixelPTEReqWidthY[],
+ int PixelPTEReqHeightY[],
+ int PixelPTEReqWidthC[],
+ int PixelPTEReqHeightC[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[]);
+
+static void CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ int meta_pte_bytes_per_frame_ub_l[],
+ int meta_pte_bytes_per_frame_ub_c[],
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[]);
+
+static void CalculateStutterEfficiency(
+ struct display_mode_lib *mode_lib,
+ int CompressedBufferSizeInkByte,
+ bool UnboundedRequestEnabled,
+ int ConfigReturnBufferSizeInKByte,
+ int MetaFIFOSizeInKEntries,
+ int ZeroSizeBufferEntries,
+ int NumberOfActivePlanes,
+ int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ double COMPBUF_RESERVED_SPACE_64B,
+ double COMPBUF_RESERVED_SPACE_ZS,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizedVBlank,
+ double Z8StutterEnterPlusExitWatermark,
+ double StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ int DPPPerPlane[],
+ unsigned int DETBufferSizeY[],
+ int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ int HTotal[],
+ int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum scan_direction_class SourceScan[],
+ int BlockHeight256BytesY[],
+ int BlockWidth256BytesY[],
+ int BlockHeight256BytesC[],
+ int BlockWidth256BytesC[],
+ int DCCYMaxUncompressedBlock[],
+ int DCCCMaxUncompressedBlock[],
+ int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthPlaneLuma[],
+ double ReadBandwidthPlaneChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod);
+
+static void CalculateSwathAndDETConfiguration(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ unsigned int DETBufferSizeInKByte,
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum scan_direction_class SourceScan[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BlendingAndTiming[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ int DPPPerPlane[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool ViewportSizeSupportPerPlane[],
+ bool *ViewportSizeSupport);
+static void CalculateSwathWidth(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ enum source_format_class SourcePixelFormat[],
+ enum scan_direction_class SourceScan[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ int BlendingAndTiming[],
+ int HActive[],
+ double HRatio[],
+ int DPPPerPlane[],
+ double SwathWidthSingleDPPY[],
+ double SwathWidthSingleDPPC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ int MaximumSwathHeightY[],
+ int MaximumSwathHeightC[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[]);
+
+static double CalculateExtraLatency(
+ int RoundTripPingLatencyCycles,
+ int ReorderingBytes,
+ double DCFCLK,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels);
+
+static double CalculateExtraLatencyBytes(
+ int ReorderingBytes,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels);
+
+static double CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClockSingle);
+
+static void CalculateUnboundedRequestAndCompressedBufferSize(
+ unsigned int DETBufferSizeInKByte,
+ int ConfigReturnBufferSizeInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ int TotalActiveDPP,
+ bool NoChromaPlanes,
+ int MaxNumDPP,
+ int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class *Output,
+ bool *UnboundedRequestEnabled,
+ int *CompressedBufferSizeInkByte);
+
+static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
+static unsigned int CalculateMaxVStartup(
+ unsigned int VTotal,
+ unsigned int VActive,
+ unsigned int VBlankNom,
+ unsigned int HTotal,
+ double PixelClock,
+ bool ProgressiveTointerlaceUnitinOPP,
+ bool Interlace,
+ unsigned int VBlankNomDefaultUS,
+ double WritebackDelayTime);
+
+void dml314_recalculate(struct display_mode_lib *mode_lib)
+{
+ ModeSupportAndSystemConfiguration(mode_lib);
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
+ DisplayPipeConfiguration(mode_lib);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
+#endif
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
+}
+
+static unsigned int dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output)
+{
+ // valid bpc = source bits per component in the set of {8, 10, 12}
+ // valid bpp = increments of 1/16 of a bit
+ // min = 6/7/8 in N420/N422/444, respectively
+ // max = such that compression is 1:1
+ //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
+ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
+ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
+
+ // fixed value
+ unsigned int rcModelSize = 8192;
+
+ // N422/N420 operate at 2 pixels per clock
+ unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
+
+ if (pixelFormat == dm_420)
+ pixelsPerClock = 2;
+ else if (pixelFormat == dm_444)
+ pixelsPerClock = 1;
+ else if (pixelFormat == dm_n422)
+ pixelsPerClock = 2;
+ // #all other modes operate at 1 pixel per clock
+ else
+ pixelsPerClock = 1;
+
+ //initial transmit delay as per PPS
+ initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
+
+ //compute ssm delay
+ if (bpc == 8)
+ D = 81;
+ else if (bpc == 10)
+ D = 89;
+ else
+ D = 113;
+
+ //divide by pixel per cycle to compute slice width as seen by DSC
+ w = sliceWidth / pixelsPerClock;
+
+ //422 mode has an additional cycle of delay
+ if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
+ s = 0;
+ else
+ s = 1;
+
+ //main calculation for the dscce
+ ix = initalXmitDelay + 45;
+ wx = (w + 2) / 3;
+ P = 3 * wx - w;
+ l0 = ix / w;
+ a = ix + P * l0;
+ ax = (a + 2) / 3 + D + 6 + 1;
+ L = (ax + wx - 1) / wx;
+ if ((ix % w) == 0 && P != 0)
+ lstall = 1;
+ else
+ lstall = 0;
+ Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
+
+ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
+ pixels = Delay * 3 * pixelsPerClock;
+ return pixels;
+}
+
+static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
+{
+ unsigned int Delay = 0;
+
+ if (pixelFormat == dm_420) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 2;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 13;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 3;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else if (pixelFormat == dm_n422) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 1;
+ // dscc - input deserializer
+ Delay = Delay + 5;
+ // dscc - input cdc fifo
+ Delay = Delay + 25;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 10;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // sft
+ Delay = Delay + 1;
+ }
+
+ return Delay;
+}
+
+static bool CalculatePrefetchSchedule(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ Pipe *myPipe,
+ unsigned int DSCDelay,
+ double DPPCLKDelaySubtotalPlusCNVCFormater,
+ double DPPCLKDelaySCL,
+ double DPPCLKDelaySCLLBOnly,
+ double DPPCLKDelayCNVCCursor,
+ double DISPCLKDelaySubtotal,
+ unsigned int DPP_RECOUT_WIDTH,
+ enum output_format_class OutputFormat,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ unsigned int GPUVMPageTableLevels,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ double HostVMMinPageSize,
+ bool DynamicMetadataEnable,
+ bool DynamicMetadataVMEnabled,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int DynamicMetadataTransmittedBytes,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ double VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ double VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ bool MyError = false;
+ unsigned int DPPCycles, DISPCLKCycles;
+ double DSTTotalPixelsAfterScaler;
+ double LineTime;
+ double dst_y_prefetch_equ;
+ double Tsw_oto;
+ double prefetch_bw_oto;
+ double prefetch_bw_pr;
+ double Tvm_oto;
+ double Tr0_oto;
+ double Tvm_oto_lines;
+ double Tr0_oto_lines;
+ double dst_y_prefetch_oto;
+ double TimeForFetchingMetaPTE = 0;
+ double TimeForFetchingRowInVBlank = 0;
+ double LinesToRequestPrefetchPixelData = 0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double trip_to_mem;
+ double Tvm_trips;
+ double Tr0_trips;
+ double Tvm_trips_rounded;
+ double Tr0_trips_rounded;
+ double Lsw_oto;
+ double Tpre_rounded;
+ double prefetch_bw_equ;
+ double Tvm_equ;
+ double Tr0_equ;
+ double Tdmbf;
+ double Tdmec;
+ double Tdmsks;
+ double prefetch_sw_bytes;
+ double bytes_pp;
+ double dep_bytes;
+ int max_vratio_pre = 4;
+ double min_Lsw;
+ double Tsw_est1 = 0;
+ double Tsw_est3 = 0;
+ double max_Tsw = 0;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ } else {
+ HostVMDynamicLevelsTrips = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
+#endif
+ CalculateVupdateAndDynamicMetadataParameters(
+ MaxInterDCNTileRepeaters,
+ myPipe->DPPCLK,
+ myPipe->DISPCLK,
+ myPipe->DCFCLKDeepSleep,
+ myPipe->PixelClock,
+ myPipe->HTotal,
+ myPipe->VBlank,
+ DynamicMetadataTransmittedBytes,
+ DynamicMetadataLinesBeforeActiveRequired,
+ myPipe->InterlaceEnable,
+ myPipe->ProgressiveToInterlaceUnitInOPP,
+ TSetup,
+ &Tdmbf,
+ &Tdmec,
+ &Tdmsks,
+ VUpdateOffsetPix,
+ VUpdateWidthPix,
+ VReadyOffsetPix);
+
+ LineTime = myPipe->HTotal / myPipe->PixelClock;
+ trip_to_mem = UrgentLatency;
+ Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
+#else
+ if (DynamicMetadataVMEnabled == true) {
+#endif
+ *Tdmdl = TWait + Tvm_trips + trip_to_mem;
+ } else {
+ *Tdmdl = TWait + UrgentExtraLatency;
+ }
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (DynamicMetadataEnable == false) {
+ *Tdmdl = 0.0;
+ }
+#endif
+
+ if (DynamicMetadataEnable == true) {
+ if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
+ *NotEnoughTimeForDynamicMetadata = true;
+ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
+ dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+
+ *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
+
+ if (myPipe->ScalerEnabled)
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
+ else
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
+
+ DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
+
+ DISPCLKCycles = DISPCLKDelaySubtotal;
+
+ if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
+ return true;
+
+ *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
+ dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
+ dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
+ dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
+ dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
+ dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
+ dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
+ dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
+#endif
+
+ *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
+
+ if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
+ *DSTYAfterScaler = 1;
+ else
+ *DSTYAfterScaler = 0;
+
+ DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
+ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
+ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
+#endif
+
+ MyError = false;
+
+ Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
+ Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (!myPipe->DCCEnable) {
+ Tr0_trips = 0.0;
+ Tr0_trips_rounded = 0.0;
+ }
+#endif
+
+ if (!GPUVMEnable) {
+ Tvm_trips = 0.0;
+ Tvm_trips_rounded = 0.0;
+ }
+
+ if (GPUVMEnable) {
+ if (GPUVMPageTableLevels >= 3) {
+ *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
+ } else {
+ *Tno_bw = 0;
+ }
+ } else if (!myPipe->DCCEnable) {
+ *Tno_bw = LineTime;
+ } else {
+ *Tno_bw = LineTime / 4;
+ }
+
+ if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
+ else
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
+ /*rev 99*/
+ prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
+ max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
+ prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
+ prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
+ prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
+
+ min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
+ Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
+ Tsw_oto = Lsw_oto * LineTime;
+
+ prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML: HTotal: %d\n", myPipe->HTotal);
+ dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
+ dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
+ dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
+ dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
+ dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
+#endif
+
+ if (GPUVMEnable == true)
+ Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
+ else
+ Tvm_oto = LineTime / 4.0;
+
+ if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
+ LineTime - Tvm_oto,
+ LineTime / 4);
+ } else {
+ Tr0_oto = (LineTime - Tvm_oto) / 2.0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
+#endif
+
+ Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
+ Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
+ dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
+ dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
+ dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ Tpre_rounded = dst_y_prefetch_equ * LineTime;
+
+ dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
+
+ if (prefetch_sw_bytes < dep_bytes)
+ prefetch_sw_bytes = 2 * dep_bytes;
+
+ dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
+ dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
+ dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
+ dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
+ dml_print("DML: LineTime: %f\n", LineTime);
+ dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
+
+ dml_print("DML: LineTime: %f\n", LineTime);
+ dml_print("DML: VStartup: %d\n", VStartup);
+ dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
+ dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
+ dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
+ dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
+ dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
+ dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
+ dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
+ dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
+ dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
+ dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
+ dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
+
+ *PrefetchBandwidth = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ if (dst_y_prefetch_equ > 1) {
+ double PrefetchBandwidth1;
+ double PrefetchBandwidth2;
+ double PrefetchBandwidth3;
+ double PrefetchBandwidth4;
+
+ if (Tpre_rounded - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
+ Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
+ } else {
+ PrefetchBandwidth1 = 0;
+ }
+
+ if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
+ }
+
+ if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
+ else
+ PrefetchBandwidth2 = 0;
+
+ if (Tpre_rounded - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
+ Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
+ } else {
+ PrefetchBandwidth3 = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
+ dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
+#endif
+ if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
+ }
+
+ if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
+ else
+ PrefetchBandwidth4 = 0;
+
+ {
+ bool Case1OK;
+ bool Case2OK;
+ bool Case3OK;
+
+ if (PrefetchBandwidth1 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
+ Case1OK = true;
+ } else {
+ Case1OK = false;
+ }
+ } else {
+ Case1OK = false;
+ }
+
+ if (PrefetchBandwidth2 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
+ Case2OK = true;
+ } else {
+ Case2OK = false;
+ }
+ } else {
+ Case2OK = false;
+ }
+
+ if (PrefetchBandwidth3 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
+ Case3OK = true;
+ } else {
+ Case3OK = false;
+ }
+ } else {
+ Case3OK = false;
+ }
+
+ if (Case1OK) {
+ prefetch_bw_equ = PrefetchBandwidth1;
+ } else if (Case2OK) {
+ prefetch_bw_equ = PrefetchBandwidth2;
+ } else if (Case3OK) {
+ prefetch_bw_equ = PrefetchBandwidth3;
+ } else {
+ prefetch_bw_equ = PrefetchBandwidth4;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
+ dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
+ dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
+#endif
+
+ if (prefetch_bw_equ > 0) {
+ if (GPUVMEnable == true) {
+ Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
+ } else {
+ Tvm_equ = LineTime / 4;
+ }
+
+ if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_equ = dml_max4(
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
+ Tr0_trips,
+ (LineTime - Tvm_equ) / 2,
+ LineTime / 4);
+ } else {
+ Tr0_equ = (LineTime - Tvm_equ) / 2;
+ }
+ } else {
+ Tvm_equ = 0;
+ Tr0_equ = 0;
+ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
+ }
+ }
+
+ if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
+ *DestinationLinesForPrefetch = dst_y_prefetch_oto;
+ TimeForFetchingMetaPTE = Tvm_oto;
+ TimeForFetchingRowInVBlank = Tr0_oto;
+ *PrefetchBandwidth = prefetch_bw_oto;
+ } else {
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ TimeForFetchingMetaPTE = Tvm_equ;
+ TimeForFetchingRowInVBlank = Tr0_equ;
+ *PrefetchBandwidth = prefetch_bw_equ;
+ }
+
+ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
+
+ *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
+ // See note above dated 5/30/2018
+ // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
+ - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
+#else
+ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
+#endif
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
+#endif
+
+ if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
+
+ *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
+ dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
+#endif
+ if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
+ *VRatioPrefetchY = dml_max(
+ (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
+ (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ *VRatioPrefetchY = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
+#endif
+ }
+
+ *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
+ dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
+#endif
+ if ((SwathHeightC > 4) || VInitPreFillC > 3) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
+ *VRatioPrefetchC = dml_max(
+ *VRatioPrefetchC,
+ (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ *VRatioPrefetchC = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
+ dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
+ dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+#endif
+
+ *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
+#endif
+
+ *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
+ / LineTime;
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ dml_print(
+ "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
+ (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
+ dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
+ dml_print(
+ "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
+ (double) LinesToRequestPrefetchPixelData * LineTime);
+ dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
+ dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
+ dml_print(
+ "DML: Tslack(pre): %fus - time left over in schedule\n",
+ VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
+ - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
+ dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
+
+ } else {
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ }
+
+ {
+ double prefetch_vm_bw;
+ double prefetch_row_bw;
+
+ if (PDEAndMetaPTEBytesFrame == 0) {
+ prefetch_vm_bw = 0;
+ } else if (*DestinationLinesToRequestVMInVBlank > 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+#endif
+ prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+#endif
+ } else {
+ prefetch_vm_bw = 0;
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ }
+
+ if (MetaRowByte + PixelPTEBytesPerRow == 0) {
+ prefetch_row_bw = 0;
+ } else if (*DestinationLinesToRequestRowInVBlank > 0) {
+ prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+#endif
+ } else {
+ prefetch_row_bw = 0;
+ MyError = true;
+ dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
+ }
+
+ *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
+ }
+
+ if (MyError) {
+ *PrefetchBandwidth = 0;
+ TimeForFetchingMetaPTE = 0;
+ TimeForFetchingRowInVBlank = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *DestinationLinesForPrefetch = 0;
+ LinesToRequestPrefetchPixelData = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ return MyError;
+}
+
+static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
+{
+ return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
+}
+
+static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
+{
+ return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
+}
+
+static void CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ double DETBufferSize,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum scan_direction_class ScanOrientation,
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma)
+{
+ int yuv420;
+ int horz_div_l;
+ int horz_div_c;
+ int vert_div_l;
+ int vert_div_c;
+
+ int swath_buf_size;
+ double detile_buf_vp_horz_limit;
+ double detile_buf_vp_vert_limit;
+
+ int MAS_vp_horz_limit;
+ int MAS_vp_vert_limit;
+ int max_vp_horz_width;
+ int max_vp_vert_height;
+ int eff_surf_width_l;
+ int eff_surf_width_c;
+ int eff_surf_height_l;
+ int eff_surf_height_c;
+
+ int full_swath_bytes_horz_wc_l;
+ int full_swath_bytes_horz_wc_c;
+ int full_swath_bytes_vert_wc_l;
+ int full_swath_bytes_vert_wc_c;
+ int req128_horz_wc_l;
+ int req128_horz_wc_c;
+ int req128_vert_wc_l;
+ int req128_vert_wc_c;
+ int segment_order_horz_contiguous_luma;
+ int segment_order_horz_contiguous_chroma;
+ int segment_order_vert_contiguous_luma;
+ int segment_order_vert_contiguous_chroma;
+
+ enum {
+ REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
+ } RequestType;
+ RequestType RequestLuma;
+ RequestType RequestChroma;
+
+ yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
+ horz_div_l = 1;
+ horz_div_c = 1;
+ vert_div_l = 1;
+ vert_div_c = 1;
+
+ if (BytePerPixelY == 1)
+ vert_div_l = 0;
+ if (BytePerPixelC == 1)
+ vert_div_c = 0;
+ if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
+ horz_div_l = 0;
+ if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
+ horz_div_c = 0;
+
+ if (BytePerPixelC == 0) {
+ swath_buf_size = DETBufferSize / 2 - 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
+ detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
+ } else {
+ swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size
+ / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
+ + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
+ detile_buf_vp_vert_limit = (double) swath_buf_size
+ / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
+ detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
+ }
+
+ detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
+ detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
+
+ MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
+ MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
+ max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
+ max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
+ eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
+ eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
+ eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
+ eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
+
+ full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
+ full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
+ if (BytePerPixelC > 0) {
+ full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
+ full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
+ } else {
+ full_swath_bytes_horz_wc_c = 0;
+ full_swath_bytes_vert_wc_c = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
+ full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
+ full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
+ full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
+ }
+
+ if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 0;
+ } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 1;
+ } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 0;
+ } else {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 1;
+ }
+
+ if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 0;
+ } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 1;
+ } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 0;
+ } else {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 1;
+ }
+
+ if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
+ segment_order_horz_contiguous_luma = 0;
+ } else {
+ segment_order_horz_contiguous_luma = 1;
+ }
+ if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
+ || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
+ segment_order_vert_contiguous_luma = 0;
+ } else {
+ segment_order_vert_contiguous_luma = 1;
+ }
+ if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
+ segment_order_horz_contiguous_chroma = 0;
+ } else {
+ segment_order_horz_contiguous_chroma = 1;
+ }
+ if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
+ || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
+ segment_order_vert_contiguous_chroma = 0;
+ } else {
+ segment_order_vert_contiguous_chroma = 1;
+ }
+
+ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
+ if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ } else if (ScanOrientation != dm_vert) {
+ if (req128_horz_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if (segment_order_horz_contiguous_luma == 0) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_horz_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if (segment_order_horz_contiguous_chroma == 0) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ } else {
+ if (req128_vert_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if (segment_order_vert_contiguous_luma == 0) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_vert_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if (segment_order_vert_contiguous_chroma == 0) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ }
+
+ if (RequestLuma == REQ_256Bytes) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 256;
+ *IndependentBlockLuma = 0;
+ } else if (RequestLuma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 128;
+ *IndependentBlockLuma = 128;
+ } else {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 64;
+ *IndependentBlockLuma = 64;
+ }
+
+ if (RequestChroma == REQ_256Bytes) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 256;
+ *IndependentBlockChroma = 0;
+ } else if (RequestChroma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 128;
+ *IndependentBlockChroma = 128;
+ } else {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 64;
+ *IndependentBlockChroma = 64;
+ }
+
+ if (DCCEnabled != true || BytePerPixelC == 0) {
+ *MaxUncompressedBlockChroma = 0;
+ *MaxCompressedBlockChroma = 0;
+ *IndependentBlockChroma = 0;
+ }
+
+ if (DCCEnabled != true) {
+ *MaxUncompressedBlockLuma = 0;
+ *MaxCompressedBlockLuma = 0;
+ *IndependentBlockLuma = 0;
+ }
+}
+
+static double CalculatePrefetchSourceLines(
+ struct display_mode_lib *mode_lib,
+ double VRatio,
+ double vtaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ unsigned int ViewportYStart,
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int MaxPartialSwath;
+
+ if (ProgressiveToInterlaceUnitInOPP)
+ *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
+ else
+ *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
+
+ if (!v->IgnoreViewportPositioning) {
+
+ *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
+
+ if (*VInitPreFill > 1.0)
+ MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
+ else
+ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
+ MaxPartialSwath = dml_max(1U, MaxPartialSwath);
+
+ } else {
+
+ if (ViewportYStart != 0)
+ dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
+
+ *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
+
+ if (*VInitPreFill > 1.0)
+ MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
+ else
+ MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
+ dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
+ dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
+ dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
+ dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
+ dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
+ dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
+ dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
+ dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
+#endif
+ return *MaxNumSwath * SwathHeight + MaxPartialSwath;
+}
+
+static unsigned int CalculateVMAndRowBytes(
+ struct display_mode_lib *mode_lib,
+ bool DCCEnable,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum scan_direction_class ScanDirection,
+ unsigned int SwathWidth,
+ unsigned int ViewportHeight,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMinPageSize,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int *MacroTileWidth,
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ bool *PTEBufferSizeNotExceeded,
+ int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ int *vm_group_bytes,
+ unsigned int *dpte_group_bytes,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ int *DPDE0BytesFrame,
+ int *MetaPTEBytesFrame)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int MPDEBytesFrame;
+ unsigned int DCCMetaSurfaceBytes;
+ unsigned int MacroTileSizeBytes;
+ unsigned int MacroTileHeight;
+ unsigned int ExtraDPDEBytesFrame;
+ unsigned int PDEAndMetaPTEBytesFrame;
+ unsigned int PixelPTEReqHeightPTEs = 0;
+ unsigned int HostVMDynamicLevels = 0;
+ double FractionOfPTEReturnDrop;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048) {
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ } else {
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ }
+ }
+
+ *MetaRequestHeight = 8 * BlockHeight256Bytes;
+ *MetaRequestWidth = 8 * BlockWidth256Bytes;
+ if (ScanDirection != dm_vert) {
+ *meta_row_height = *MetaRequestHeight;
+ *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
+ *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
+ } else {
+ *meta_row_height = *MetaRequestWidth;
+ *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
+ *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
+ }
+ DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
+ if (GPUVMEnable == true) {
+ *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
+ MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
+ } else {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ }
+
+ if (DCCEnable != true) {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ *MetaRowByte = 0;
+ }
+
+ if (SurfaceTiling == dm_sw_linear) {
+ MacroTileSizeBytes = 256;
+ MacroTileHeight = BlockHeight256Bytes;
+ } else {
+ MacroTileSizeBytes = 65536;
+ MacroTileHeight = 16 * BlockHeight256Bytes;
+ }
+ *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
+
+ if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
+ if (ScanDirection != dm_vert) {
+ *DPDE0BytesFrame = 64
+ * (dml_ceil(
+ ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
+ / (8 * 2097152),
+ 1) + 1);
+ } else {
+ *DPDE0BytesFrame = 64
+ * (dml_ceil(
+ ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
+ / (8 * 2097152),
+ 1) + 1);
+ }
+ ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
+ } else {
+ *DPDE0BytesFrame = 0;
+ ExtraDPDEBytesFrame = 0;
+ }
+
+ PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
+ dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
+ dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
+ dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+#endif
+
+ if (HostVMEnable == true) {
+ PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+#endif
+
+ if (SurfaceTiling == dm_sw_linear) {
+ PixelPTEReqHeightPTEs = 1;
+ *PixelPTEReqHeight = 1;
+ *PixelPTEReqWidth = 32768.0 / BytePerPixel;
+ *PTERequestSize = 64;
+ FractionOfPTEReturnDrop = 0;
+ } else if (MacroTileSizeBytes == 4096) {
+ PixelPTEReqHeightPTEs = 1;
+ *PixelPTEReqHeight = MacroTileHeight;
+ *PixelPTEReqWidth = 8 * *MacroTileWidth;
+ *PTERequestSize = 64;
+ if (ScanDirection != dm_vert)
+ FractionOfPTEReturnDrop = 0;
+ else
+ FractionOfPTEReturnDrop = 7 / 8;
+ } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
+ PixelPTEReqHeightPTEs = 16;
+ *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
+ *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
+ *PTERequestSize = 128;
+ FractionOfPTEReturnDrop = 0;
+ } else {
+ PixelPTEReqHeightPTEs = 1;
+ *PixelPTEReqHeight = MacroTileHeight;
+ *PixelPTEReqWidth = 8 * *MacroTileWidth;
+ *PTERequestSize = 64;
+ FractionOfPTEReturnDrop = 0;
+ }
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else if (ScanDirection != dm_vert) {
+ *dpte_row_height = *PixelPTEReqHeight;
+ *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else {
+ *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
+ *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
+ }
+
+ if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
+ *PTEBufferSizeNotExceeded = true;
+ } else {
+ *PTEBufferSizeNotExceeded = false;
+ }
+
+ if (GPUVMEnable != true) {
+ *PixelPTEBytesPerRow = 0;
+ *PTEBufferSizeNotExceeded = true;
+ }
+
+ dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
+
+ if (HostVMEnable == true) {
+ *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
+ }
+
+ if (HostVMEnable == true) {
+ *vm_group_bytes = 512;
+ *dpte_group_bytes = 512;
+ } else if (GPUVMEnable == true) {
+ *vm_group_bytes = 2048;
+ if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
+ *dpte_group_bytes = 512;
+ } else {
+ *dpte_group_bytes = 2048;
+ }
+ } else {
+ *vm_group_bytes = 0;
+ *dpte_group_bytes = 0;
+ }
+ return PDEAndMetaPTEBytesFrame;
+}
+
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int j, k;
+ double HostVMInefficiencyFactor = 1.0;
+ bool NoChromaPlanes = true;
+ int ReorderBytes;
+ double VMDataOnlyReturnBW;
+ double MaxTotalRDBandwidth = 0;
+ int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
+
+ v->WritebackDISPCLK = 0.0;
+ v->DISPCLKWithRamping = 0;
+ v->DISPCLKWithoutRamping = 0;
+ v->GlobalDPPCLK = 0.0;
+ /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
+ {
+ double IdealFabricAndSDPPortBandwidthPerState = dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
+ v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
+ double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
+
+ if (v->HostVMEnable != true) {
+ v->ReturnBW = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
+ } else {
+ v->ReturnBW = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
+ }
+ }
+ /* End DAL custom code */
+
+ // DISPCLK and DPPCLK Calculation
+ //
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k]) {
+ v->WritebackDISPCLK = dml_max(
+ v->WritebackDISPCLK,
+ dml314_CalculateWriteBackDISPCLK(
+ v->WritebackPixelFormat[k],
+ v->PixelClock[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackHTaps[k],
+ v->WritebackVTaps[k],
+ v->WritebackSourceWidth[k],
+ v->WritebackDestinationWidth[k],
+ v->HTotal[k],
+ v->WritebackLineBufferSize));
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->HRatio[k] > 1) {
+ v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
+ } else {
+ v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+
+ v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
+ * dml_max(
+ v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
+ dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
+
+ if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
+ v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
+ }
+
+ if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
+ && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
+ v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
+ v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
+ } else {
+ if (v->HRatioChroma[k] > 1) {
+ v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
+ } else {
+ v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+ v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
+ * dml_max3(
+ v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
+ v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
+ 1.0);
+
+ if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
+ v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
+ }
+
+ v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] != k)
+ continue;
+ if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
+ v->DISPCLKWithRamping = dml_max(
+ v->DISPCLKWithRamping,
+ v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
+ * (1 + v->DISPCLKRampingMargin / 100));
+ v->DISPCLKWithoutRamping = dml_max(
+ v->DISPCLKWithoutRamping,
+ v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
+ } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
+ v->DISPCLKWithRamping = dml_max(
+ v->DISPCLKWithRamping,
+ v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
+ * (1 + v->DISPCLKRampingMargin / 100));
+ v->DISPCLKWithoutRamping = dml_max(
+ v->DISPCLKWithoutRamping,
+ v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
+ } else {
+ v->DISPCLKWithRamping = dml_max(
+ v->DISPCLKWithRamping,
+ v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
+ v->DISPCLKWithoutRamping = dml_max(
+ v->DISPCLKWithoutRamping,
+ v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
+ }
+ }
+
+ v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
+ v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
+
+ ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
+ v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
+ v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
+ v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
+ v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
+ v->DISPCLKDPPCLKVCOSpeed);
+ if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
+ v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
+ } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
+ v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
+ } else {
+ v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
+ }
+ v->DISPCLK = v->DISPCLK_calculated;
+ DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
+ }
+ v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
+ DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->DPPCLK[k] = v->DPPCLK_calculated[k];
+ }
+
+ // Urgent and B P-State/DRAM Clock Change Watermark
+ DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
+ DTRACE(" return_bus_bw = %f", v->ReturnBW);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateBytePerPixelAnd256BBlockSizes(
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ &v->BytePerPixelY[k],
+ &v->BytePerPixelC[k],
+ &v->BytePerPixelDETY[k],
+ &v->BytePerPixelDETC[k],
+ &v->BlockHeight256BytesY[k],
+ &v->BlockHeight256BytesC[k],
+ &v->BlockWidth256BytesY[k],
+ &v->BlockWidth256BytesC[k]);
+ }
+
+ CalculateSwathWidth(
+ false,
+ v->NumberOfActivePlanes,
+ v->SourcePixelFormat,
+ v->SourceScan,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ v->ODMCombineEnabled,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlendingAndTiming,
+ v->HActive,
+ v->HRatio,
+ v->DPPPerPlane,
+ v->SwathWidthSingleDPPY,
+ v->SwathWidthSingleDPPC,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ v->dummyinteger3,
+ v->dummyinteger4,
+ v->swath_width_luma_ub,
+ v->swath_width_chroma_ub);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
+ * v->VRatio[k];
+ v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
+ * v->VRatioChroma[k];
+ DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
+ }
+
+ // DCFCLK Deep Sleep
+ CalculateDCFCLKDeepSleep(
+ mode_lib,
+ v->NumberOfActivePlanes,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->VRatio,
+ v->VRatioChroma,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ v->DPPPerPlane,
+ v->HRatio,
+ v->HRatioChroma,
+ v->PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ v->DPPCLK,
+ v->ReadBandwidthPlaneLuma,
+ v->ReadBandwidthPlaneChroma,
+ v->ReturnBusWidth,
+ &v->DCFCLKDeepSleep);
+
+ // DSCCLK
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
+ v->DSCCLK_calculated[k] = 0.0;
+ } else {
+ if (v->OutputFormat[k] == dm_420)
+ v->DSCFormatFactor = 2;
+ else if (v->OutputFormat[k] == dm_444)
+ v->DSCFormatFactor = 1;
+ else if (v->OutputFormat[k] == dm_n422)
+ v->DSCFormatFactor = 2;
+ else
+ v->DSCFormatFactor = 1;
+ if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
+ v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
+ / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
+ v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
+ / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else
+ v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
+ / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ }
+ }
+
+ // DSC Delay
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double BPP = v->OutputBpp[k];
+
+ if (v->DSCEnabled[k] && BPP != 0) {
+ if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
+ v->DSCDelay[k] = dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ BPP,
+ dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
+ v->NumberOfDSCSlices[k],
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
+ } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
+ v->DSCDelay[k] = 2
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ BPP,
+ dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
+ v->NumberOfDSCSlices[k] / 2.0,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ } else {
+ v->DSCDelay[k] = 4
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ BPP,
+ dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
+ v->NumberOfDSCSlices[k] / 4.0,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ }
+ v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
+ } else {
+ v->DSCDelay[k] = 0;
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k)
+ for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
+ if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
+ v->DSCDelay[k] = v->DSCDelay[j];
+
+ // Prefetch
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ unsigned int PDEAndMetaPTEBytesFrameY;
+ unsigned int PixelPTEBytesPerRowY;
+ unsigned int MetaRowByteY;
+ unsigned int MetaRowByteC;
+ unsigned int PDEAndMetaPTEBytesFrameC;
+ unsigned int PixelPTEBytesPerRowC;
+ bool PTEBufferSizeNotExceededY;
+ bool PTEBufferSizeNotExceededC;
+
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
+ || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+ if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
+ v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
+ }
+
+ PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->BlockHeight256BytesC[k],
+ v->BlockWidth256BytesC[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelC[k],
+ v->SourceScan[k],
+ v->SwathWidthC[k],
+ v->ViewportHeightChroma[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForChroma,
+ v->PitchC[k],
+ v->DCCMetaPitchC[k],
+ &v->MacroTileWidthC[k],
+ &MetaRowByteC,
+ &PixelPTEBytesPerRowC,
+ &PTEBufferSizeNotExceededC,
+ &v->dpte_row_width_chroma_ub[k],
+ &v->dpte_row_height_chroma[k],
+ &v->meta_req_width_chroma[k],
+ &v->meta_req_height_chroma[k],
+ &v->meta_row_width_chroma[k],
+ &v->meta_row_height_chroma[k],
+ &v->dummyinteger1,
+ &v->dummyinteger2,
+ &v->PixelPTEReqWidthC[k],
+ &v->PixelPTEReqHeightC[k],
+ &v->PTERequestSizeC[k],
+ &v->dpde0_bytes_per_frame_ub_c[k],
+ &v->meta_pte_bytes_per_frame_ub_c[k]);
+
+ v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatioChroma[k],
+ v->VTAPsChroma[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightC[k],
+ v->ViewportYStartC[k],
+ &v->VInitPreFillC[k],
+ &v->MaxNumSwathC[k]);
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
+ v->PTEBufferSizeInRequestsForChroma = 0;
+ PixelPTEBytesPerRowC = 0;
+ PDEAndMetaPTEBytesFrameC = 0;
+ MetaRowByteC = 0;
+ v->MaxNumSwathC[k] = 0;
+ v->PrefetchSourceLinesC[k] = 0;
+ }
+
+ PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->BlockHeight256BytesY[k],
+ v->BlockWidth256BytesY[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->SourceScan[k],
+ v->SwathWidthY[k],
+ v->ViewportHeight[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForLuma,
+ v->PitchY[k],
+ v->DCCMetaPitchY[k],
+ &v->MacroTileWidthY[k],
+ &MetaRowByteY,
+ &PixelPTEBytesPerRowY,
+ &PTEBufferSizeNotExceededY,
+ &v->dpte_row_width_luma_ub[k],
+ &v->dpte_row_height[k],
+ &v->meta_req_width[k],
+ &v->meta_req_height[k],
+ &v->meta_row_width[k],
+ &v->meta_row_height[k],
+ &v->vm_group_bytes[k],
+ &v->dpte_group_bytes[k],
+ &v->PixelPTEReqWidthY[k],
+ &v->PixelPTEReqHeightY[k],
+ &v->PTERequestSizeY[k],
+ &v->dpde0_bytes_per_frame_ub_l[k],
+ &v->meta_pte_bytes_per_frame_ub_l[k]);
+
+ v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatio[k],
+ v->vtaps[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightY[k],
+ v->ViewportYStartY[k],
+ &v->VInitPreFillY[k],
+ &v->MaxNumSwathY[k]);
+ v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
+ v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
+ v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
+
+ CalculateRowBandwidth(
+ v->GPUVMEnable,
+ v->SourcePixelFormat[k],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->DCCEnable[k],
+ v->HTotal[k] / v->PixelClock[k],
+ MetaRowByteY,
+ MetaRowByteC,
+ v->meta_row_height[k],
+ v->meta_row_height_chroma[k],
+ PixelPTEBytesPerRowY,
+ PixelPTEBytesPerRowC,
+ v->dpte_row_height[k],
+ v->dpte_row_height_chroma[k],
+ &v->meta_row_bw[k],
+ &v->dpte_row_bw[k]);
+ }
+
+ v->TotalDCCActiveDPP = 0;
+ v->TotalActiveDPP = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
+ if (v->DCCEnable[k])
+ v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
+ || v->SourcePixelFormat[k] == dm_rgbe_alpha)
+ NoChromaPlanes = false;
+ }
+
+ ReorderBytes = v->NumberOfChannels
+ * dml_max3(
+ v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ VMDataOnlyReturnBW = dml_min(
+ dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
+ * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
+ * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
+ dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
+ dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
+ dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
+ dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
+ dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
+ dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
+ dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
+ dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
+#endif
+
+ if (v->GPUVMEnable && v->HostVMEnable)
+ HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
+
+ v->UrgentExtraLatency = CalculateExtraLatency(
+ v->RoundTripPingLatencyCycles,
+ ReorderBytes,
+ v->DCFCLK,
+ v->TotalActiveDPP,
+ v->PixelChunkSizeInKByte,
+ v->TotalDCCActiveDPP,
+ v->MetaChunkSize,
+ v->ReturnBW,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->NumberOfActivePlanes,
+ v->DPPPerPlane,
+ v->dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ v->HostVMMinPageSize,
+ v->HostVMMaxNonCachedPageTableLevels);
+
+ v->TCalc = 24.0 / v->DCFCLKDeepSleep;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackVTaps[k],
+ v->WritebackDestinationWidth[k],
+ v->WritebackDestinationHeight[k],
+ v->WritebackSourceHeight[k],
+ v->HTotal[k]) / v->DISPCLK;
+ } else
+ v->WritebackDelay[v->VoltageLevel][k] = 0;
+ for (j = 0; j < v->NumberOfActivePlanes; ++j) {
+ if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
+ v->WritebackDelay[v->VoltageLevel][k] = dml_max(
+ v->WritebackDelay[v->VoltageLevel][k],
+ v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[j],
+ v->WritebackHRatio[j],
+ v->WritebackVRatio[j],
+ v->WritebackVTaps[j],
+ v->WritebackDestinationWidth[j],
+ v->WritebackDestinationHeight[j],
+ v->WritebackSourceHeight[j],
+ v->HTotal[k]) / v->DISPCLK);
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k)
+ for (j = 0; j < v->NumberOfActivePlanes; ++j)
+ if (v->BlendingAndTiming[k] == j)
+ v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->MaxVStartupLines[k] =
+ CalculateMaxVStartup(
+ v->VTotal[k],
+ v->VActive[k],
+ v->VBlankNom[k],
+ v->HTotal[k],
+ v->PixelClock[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->Interlace[k],
+ v->ip.VBlankNomDefaultUS,
+ v->WritebackDelay[v->VoltageLevel][k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
+ dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
+#endif
+ }
+
+ v->MaximumMaxVStartupLines = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k)
+ v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
+
+ // VBA_DELTA
+ // We don't really care to iterate between the various prefetch modes
+ //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
+
+ v->UrgentLatency = CalculateUrgentLatency(
+ v->UrgentLatencyPixelDataOnly,
+ v->UrgentLatencyPixelMixedWithVMData,
+ v->UrgentLatencyVMDataOnly,
+ v->DoUrgentLatencyAdjustment,
+ v->UrgentLatencyAdjustmentFabricClockComponent,
+ v->UrgentLatencyAdjustmentFabricClockReference,
+ v->FabricClock);
+
+ v->FractionOfUrgentBandwidth = 0.0;
+ v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
+
+ v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
+
+ do {
+ double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
+ bool DestinationLineTimesForPrefetchLessThan2 = false;
+ bool VRatioPrefetchMoreThan4 = false;
+ double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
+
+ MaxTotalRDBandwidth = 0;
+
+ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ Pipe myPipe;
+
+ myPipe.DPPCLK = v->DPPCLK[k];
+ myPipe.DISPCLK = v->DISPCLK;
+ myPipe.PixelClock = v->PixelClock[k];
+ myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
+ myPipe.DPPPerPlane = v->DPPPerPlane[k];
+ myPipe.ScalerEnabled = v->ScalerEnabled[k];
+ myPipe.VRatio = v->VRatio[k];
+ myPipe.VRatioChroma = v->VRatioChroma[k];
+ myPipe.SourceScan = v->SourceScan[k];
+ myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
+ myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
+ myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
+ myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
+ myPipe.InterlaceEnable = v->Interlace[k];
+ myPipe.NumberOfCursors = v->NumberOfCursors[k];
+ myPipe.VBlank = v->VTotal[k] - v->VActive[k];
+ myPipe.HTotal = v->HTotal[k];
+ myPipe.DCCEnable = v->DCCEnable[k];
+ myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
+ || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
+ myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
+ myPipe.BytePerPixelY = v->BytePerPixelY[k];
+ myPipe.BytePerPixelC = v->BytePerPixelC[k];
+ myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
+ v->ErrorResult[k] = CalculatePrefetchSchedule(
+ mode_lib,
+ HostVMInefficiencyFactor,
+ &myPipe,
+ v->DSCDelay[k],
+ v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
+ v->DPPCLKDelaySCL,
+ v->DPPCLKDelaySCLLBOnly,
+ v->DPPCLKDelayCNVCCursor,
+ v->DISPCLKDelaySubtotal,
+ (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
+ v->OutputFormat[k],
+ v->MaxInterDCNTileRepeaters,
+ dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
+ v->MaxVStartupLines[k],
+ v->GPUVMMaxPageTableLevels,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->HostVMMinPageSize,
+ v->DynamicMetadataEnable[k],
+ v->DynamicMetadataVMEnabled,
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->UrgentLatency,
+ v->UrgentExtraLatency,
+ v->TCalc,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k],
+ v->PrefetchSourceLinesY[k],
+ v->SwathWidthY[k],
+ v->VInitPreFillY[k],
+ v->MaxNumSwathY[k],
+ v->PrefetchSourceLinesC[k],
+ v->SwathWidthC[k],
+ v->VInitPreFillC[k],
+ v->MaxNumSwathC[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
+ TWait,
+ &v->DSTXAfterScaler[k],
+ &v->DSTYAfterScaler[k],
+ &v->DestinationLinesForPrefetch[k],
+ &v->PrefetchBandwidth[k],
+ &v->DestinationLinesToRequestVMInVBlank[k],
+ &v->DestinationLinesToRequestRowInVBlank[k],
+ &v->VRatioPrefetchY[k],
+ &v->VRatioPrefetchC[k],
+ &v->RequiredPrefetchPixDataBWLuma[k],
+ &v->RequiredPrefetchPixDataBWChroma[k],
+ &v->NotEnoughTimeForDynamicMetadata[k],
+ &v->Tno_bw[k],
+ &v->prefetch_vmrow_bw[k],
+ &v->Tdmdl_vm[k],
+ &v->Tdmdl[k],
+ &v->TSetup[k],
+ &v->VUpdateOffsetPix[k],
+ &v->VUpdateWidthPix[k],
+ &v->VReadyOffsetPix[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
+#endif
+ v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
+ }
+
+ v->NoEnoughUrgentLatencyHiding = false;
+ v->NoEnoughUrgentLatencyHidingPre = false;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
+ v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
+
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgentLatency,
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ v->DETBufferSizeY[k],
+ v->DETBufferSizeC[k],
+ &v->UrgBurstFactorCursor[k],
+ &v->UrgBurstFactorLuma[k],
+ &v->UrgBurstFactorChroma[k],
+ &v->NoUrgentLatencyHiding[k]);
+
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgentLatency,
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatioPrefetchY[k],
+ v->VRatioPrefetchC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ v->DETBufferSizeY[k],
+ v->DETBufferSizeC[k],
+ &v->UrgBurstFactorCursorPre[k],
+ &v->UrgBurstFactorLumaPre[k],
+ &v->UrgBurstFactorChromaPre[k],
+ &v->NoUrgentLatencyHidingPre[k]);
+
+ MaxTotalRDBandwidth = MaxTotalRDBandwidth
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
+ + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
+ + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
+ + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
+ v->DPPPerPlane[k]
+ * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
+
+ MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
+ + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
+ v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
+ + v->cursor_bw_pre[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
+
+ dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
+ dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
+
+ dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
+ dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
+ dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
+ dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
+#endif
+
+ if (v->DestinationLinesForPrefetch[k] < 2)
+ DestinationLineTimesForPrefetchLessThan2 = true;
+
+ if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
+ VRatioPrefetchMoreThan4 = true;
+
+ if (v->NoUrgentLatencyHiding[k] == true)
+ v->NoEnoughUrgentLatencyHiding = true;
+
+ if (v->NoUrgentLatencyHidingPre[k] == true)
+ v->NoEnoughUrgentLatencyHidingPre = true;
+ }
+
+ v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
+ dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
+#endif
+
+ if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
+ && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
+ v->PrefetchModeSupported = true;
+ else {
+ v->PrefetchModeSupported = false;
+ dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
+ dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
+ dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
+ dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
+ }
+
+ // PREVIOUS_ERROR
+ // This error result check was done after the PrefetchModeSupported. So we will
+ // still try to calculate flip schedule even prefetch mode not supported
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
+ v->PrefetchModeSupported = false;
+ dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
+ }
+ }
+
+ if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
+ v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
+ - dml_max(
+ v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
+ + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
+ + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
+ v->DPPPerPlane[k]
+ * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
+ }
+
+ v->TotImmediateFlipBytes = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
+ + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateFlipSchedule(
+ mode_lib,
+ HostVMInefficiencyFactor,
+ v->UrgentExtraLatency,
+ v->UrgentLatency,
+ v->GPUVMMaxPageTableLevels,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMEnable,
+ v->HostVMMinPageSize,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k],
+ v->BandwidthAvailableForImmediateFlip,
+ v->TotImmediateFlipBytes,
+ v->SourcePixelFormat[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->Tno_bw[k],
+ v->DCCEnable[k],
+ v->dpte_row_height[k],
+ v->meta_row_height[k],
+ v->dpte_row_height_chroma[k],
+ v->meta_row_height_chroma[k],
+ &v->DestinationLinesToRequestVMInImmediateFlip[k],
+ &v->DestinationLinesToRequestRowInImmediateFlip[k],
+ &v->final_flip_bw[k],
+ &v->ImmediateFlipSupportedForPipe[k]);
+ }
+
+ v->total_dcn_read_bw_with_flip = 0.0;
+ v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->DPPPerPlane[k] * v->final_flip_bw[k]
+ + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
+ + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
+ + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
+ v->DPPPerPlane[k]
+ * (v->final_flip_bw[k]
+ + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
+ v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
+ + dml_max3(
+ v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
+ v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
+ + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
+ v->DPPPerPlane[k]
+ * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
+ + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
+ }
+ v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
+
+ v->ImmediateFlipSupported = true;
+ if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
+#endif
+ v->ImmediateFlipSupported = false;
+ v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ImmediateFlipSupportedForPipe[k] == false) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
+#endif
+ v->ImmediateFlipSupported = false;
+ }
+ }
+ } else {
+ v->ImmediateFlipSupported = false;
+ }
+
+ v->PrefetchAndImmediateFlipSupported =
+ (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
+ && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
+ v->ImmediateFlipSupported)) ? true : false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
+ dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
+ dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
+ dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
+ dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
+ dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
+#endif
+ dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
+
+ v->VStartupLines = v->VStartupLines + 1;
+ } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
+ ASSERT(v->PrefetchAndImmediateFlipSupported);
+
+ // Unbounded Request Enabled
+ CalculateUnboundedRequestAndCompressedBufferSize(
+ v->DETBufferSizeInKByte[0],
+ v->ConfigReturnBufferSizeInKByte,
+ v->UseUnboundedRequesting,
+ v->TotalActiveDPP,
+ NoChromaPlanes,
+ v->MaxNumDPP,
+ v->CompressedBufferSegmentSizeInkByte,
+ v->Output,
+ &v->UnboundedRequestEnabled,
+ &v->CompressedBufferSizeInkByte);
+
+ //Watermarks and NB P-State/DRAM Clock Change Support
+ {
+ enum clock_change_support DRAMClockChangeSupport; // dummy
+
+ CalculateWatermarksAndDRAMSpeedChangeSupport(
+ mode_lib,
+ PrefetchMode,
+ v->NumberOfActivePlanes,
+ v->MaxLineBufferLines,
+ v->LineBufferSize,
+ v->WritebackInterfaceBufferSize,
+ v->DCFCLK,
+ v->ReturnBW,
+ v->SynchronizedVBlank,
+ v->dpte_group_bytes,
+ v->MetaChunkSize,
+ v->UrgentLatency,
+ v->UrgentExtraLatency,
+ v->WritebackLatency,
+ v->WritebackChunkSize,
+ v->SOCCLK,
+ v->DRAMClockChangeLatency,
+ v->SRExitTime,
+ v->SREnterPlusExitTime,
+ v->SRExitZ8Time,
+ v->SREnterPlusExitZ8Time,
+ v->DCFCLKDeepSleep,
+ v->DETBufferSizeY,
+ v->DETBufferSizeC,
+ v->SwathHeightY,
+ v->SwathHeightC,
+ v->LBBitPerPixel,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ v->HRatio,
+ v->HRatioChroma,
+ v->vtaps,
+ v->VTAPsChroma,
+ v->VRatio,
+ v->VRatioChroma,
+ v->HTotal,
+ v->PixelClock,
+ v->BlendingAndTiming,
+ v->DPPPerPlane,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+ v->DSTXAfterScaler,
+ v->DSTYAfterScaler,
+ v->WritebackEnable,
+ v->WritebackPixelFormat,
+ v->WritebackDestinationWidth,
+ v->WritebackDestinationHeight,
+ v->WritebackSourceHeight,
+ v->UnboundedRequestEnabled,
+ v->CompressedBufferSizeInkByte,
+ &DRAMClockChangeSupport,
+ &v->UrgentWatermark,
+ &v->WritebackUrgentWatermark,
+ &v->DRAMClockChangeWatermark,
+ &v->WritebackDRAMClockChangeWatermark,
+ &v->StutterExitWatermark,
+ &v->StutterEnterPlusExitWatermark,
+ &v->Z8StutterExitWatermark,
+ &v->Z8StutterEnterPlusExitWatermark,
+ &v->MinActiveDRAMClockChangeLatencySupported);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
+ 0,
+ v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
+ } else {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
+ }
+ }
+ }
+
+ //Display Pipeline Delivery Time in Prefetch, Groups
+ CalculatePixelDeliveryTimes(
+ v->NumberOfActivePlanes,
+ v->VRatio,
+ v->VRatioChroma,
+ v->VRatioPrefetchY,
+ v->VRatioPrefetchC,
+ v->swath_width_luma_ub,
+ v->swath_width_chroma_ub,
+ v->DPPPerPlane,
+ v->HRatio,
+ v->HRatioChroma,
+ v->PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ v->DPPCLK,
+ v->BytePerPixelC,
+ v->SourceScan,
+ v->NumberOfCursors,
+ v->CursorWidth,
+ v->CursorBPP,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlockHeight256BytesC,
+ v->DisplayPipeLineDeliveryTimeLuma,
+ v->DisplayPipeLineDeliveryTimeChroma,
+ v->DisplayPipeLineDeliveryTimeLumaPrefetch,
+ v->DisplayPipeLineDeliveryTimeChromaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeLuma,
+ v->DisplayPipeRequestDeliveryTimeChroma,
+ v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
+ v->CursorRequestDeliveryTime,
+ v->CursorRequestDeliveryTimePrefetch);
+
+ CalculateMetaAndPTETimes(
+ v->NumberOfActivePlanes,
+ v->GPUVMEnable,
+ v->MetaChunkSize,
+ v->MinMetaChunkSizeBytes,
+ v->HTotal,
+ v->VRatio,
+ v->VRatioChroma,
+ v->DestinationLinesToRequestRowInVBlank,
+ v->DestinationLinesToRequestRowInImmediateFlip,
+ v->DCCEnable,
+ v->PixelClock,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->SourceScan,
+ v->dpte_row_height,
+ v->dpte_row_height_chroma,
+ v->meta_row_width,
+ v->meta_row_width_chroma,
+ v->meta_row_height,
+ v->meta_row_height_chroma,
+ v->meta_req_width,
+ v->meta_req_width_chroma,
+ v->meta_req_height,
+ v->meta_req_height_chroma,
+ v->dpte_group_bytes,
+ v->PTERequestSizeY,
+ v->PTERequestSizeC,
+ v->PixelPTEReqWidthY,
+ v->PixelPTEReqHeightY,
+ v->PixelPTEReqWidthC,
+ v->PixelPTEReqHeightC,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->DST_Y_PER_PTE_ROW_NOM_L,
+ v->DST_Y_PER_PTE_ROW_NOM_C,
+ v->DST_Y_PER_META_ROW_NOM_L,
+ v->DST_Y_PER_META_ROW_NOM_C,
+ v->TimePerMetaChunkNominal,
+ v->TimePerChromaMetaChunkNominal,
+ v->TimePerMetaChunkVBlank,
+ v->TimePerChromaMetaChunkVBlank,
+ v->TimePerMetaChunkFlip,
+ v->TimePerChromaMetaChunkFlip,
+ v->time_per_pte_group_nom_luma,
+ v->time_per_pte_group_vblank_luma,
+ v->time_per_pte_group_flip_luma,
+ v->time_per_pte_group_nom_chroma,
+ v->time_per_pte_group_vblank_chroma,
+ v->time_per_pte_group_flip_chroma);
+
+ CalculateVMGroupAndRequestTimes(
+ v->NumberOfActivePlanes,
+ v->GPUVMEnable,
+ v->GPUVMMaxPageTableLevels,
+ v->HTotal,
+ v->BytePerPixelC,
+ v->DestinationLinesToRequestVMInVBlank,
+ v->DestinationLinesToRequestVMInImmediateFlip,
+ v->DCCEnable,
+ v->PixelClock,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->vm_group_bytes,
+ v->dpde0_bytes_per_frame_ub_l,
+ v->dpde0_bytes_per_frame_ub_c,
+ v->meta_pte_bytes_per_frame_ub_l,
+ v->meta_pte_bytes_per_frame_ub_c,
+ v->TimePerVMGroupVBlank,
+ v->TimePerVMGroupFlip,
+ v->TimePerVMRequestVBlank,
+ v->TimePerVMRequestFlip);
+
+ // Min TTUVBlank
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (PrefetchMode == 0) {
+ v->AllowDRAMClockChangeDuringVBlank[k] = true;
+ v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
+ v->MinTTUVBlank[k] = dml_max(
+ v->DRAMClockChangeWatermark,
+ dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
+ } else if (PrefetchMode == 1) {
+ v->AllowDRAMClockChangeDuringVBlank[k] = false;
+ v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
+ v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
+ } else {
+ v->AllowDRAMClockChangeDuringVBlank[k] = false;
+ v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
+ v->MinTTUVBlank[k] = v->UrgentWatermark;
+ }
+ if (!v->DynamicMetadataEnable[k])
+ v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
+ }
+
+ // DCC Configuration
+ v->ActiveDPPs = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
+ v->SourcePixelFormat[k],
+ v->SurfaceWidthY[k],
+ v->SurfaceWidthC[k],
+ v->SurfaceHeightY[k],
+ v->SurfaceHeightC[k],
+ v->DETBufferSizeInKByte[0] * 1024,
+ v->BlockHeight256BytesY[k],
+ v->BlockHeight256BytesC[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->BytePerPixelC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ v->SourceScan[k],
+ &v->DCCYMaxUncompressedBlock[k],
+ &v->DCCCMaxUncompressedBlock[k],
+ &v->DCCYMaxCompressedBlock[k],
+ &v->DCCCMaxCompressedBlock[k],
+ &v->DCCYIndependentBlock[k],
+ &v->DCCCIndependentBlock[k]);
+ }
+
+ // VStartup Adjustment
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ bool isInterlaceTiming;
+ double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
+#endif
+
+ v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
+ dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
+#endif
+
+ v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
+ if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
+ v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
+ }
+
+ isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
+ v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
+ if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
+ v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0);
+ } else {
+ v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
+ }
+ v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / (double)v->HTotal[k] / v->PixelClock[k], 1.0) / 4.0;
+ if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
+ <= (isInterlaceTiming ?
+ dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
+ (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
+ } else {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
+ dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
+ dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
+ dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
+ dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
+ dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
+ dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
+ dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
+#endif
+ }
+
+ {
+ //Maximum Bandwidth Used
+ double TotalWRBandwidth = 0;
+ double MaxPerPlaneVActiveWRBandwidth = 0;
+ double WRBandwidth = 0;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
+ WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
+ } else if (v->WritebackEnable[k] == true) {
+ WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
+ }
+ TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
+ MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
+ }
+
+ v->TotalDataReadBandwidth = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
+ }
+ }
+ // Stutter Efficiency
+ CalculateStutterEfficiency(
+ mode_lib,
+ v->CompressedBufferSizeInkByte,
+ v->UnboundedRequestEnabled,
+ v->ConfigReturnBufferSizeInKByte,
+ v->MetaFIFOSizeInKEntries,
+ v->ZeroSizeBufferEntries,
+ v->NumberOfActivePlanes,
+ v->ROBBufferSizeInKByte,
+ v->TotalDataReadBandwidth,
+ v->DCFCLK,
+ v->ReturnBW,
+ v->COMPBUF_RESERVED_SPACE_64B,
+ v->COMPBUF_RESERVED_SPACE_ZS,
+ v->SRExitTime,
+ v->SRExitZ8Time,
+ v->SynchronizedVBlank,
+ v->StutterEnterPlusExitWatermark,
+ v->Z8StutterEnterPlusExitWatermark,
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->Interlace,
+ v->MinTTUVBlank,
+ v->DPPPerPlane,
+ v->DETBufferSizeY,
+ v->BytePerPixelY,
+ v->BytePerPixelDETY,
+ v->SwathWidthY,
+ v->SwathHeightY,
+ v->SwathHeightC,
+ v->DCCRateLuma,
+ v->DCCRateChroma,
+ v->DCCFractionOfZeroSizeRequestsLuma,
+ v->DCCFractionOfZeroSizeRequestsChroma,
+ v->HTotal,
+ v->VTotal,
+ v->PixelClock,
+ v->VRatio,
+ v->SourceScan,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesC,
+ v->DCCYMaxUncompressedBlock,
+ v->DCCCMaxUncompressedBlock,
+ v->VActive,
+ v->DCCEnable,
+ v->WritebackEnable,
+ v->ReadBandwidthPlaneLuma,
+ v->ReadBandwidthPlaneChroma,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ &v->StutterEfficiencyNotIncludingVBlank,
+ &v->StutterEfficiency,
+ &v->NumberOfStutterBurstsPerFrame,
+ &v->Z8StutterEfficiencyNotIncludingVBlank,
+ &v->Z8StutterEfficiency,
+ &v->Z8NumberOfStutterBurstsPerFrame,
+ &v->StutterPeriod);
+}
+
+static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ // Display Pipe Configuration
+ double BytePerPixDETY[DC__NUM_DPP__MAX];
+ double BytePerPixDETC[DC__NUM_DPP__MAX];
+ int BytePerPixY[DC__NUM_DPP__MAX];
+ int BytePerPixC[DC__NUM_DPP__MAX];
+ int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
+ int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
+ int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
+ int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
+ double dummy1[DC__NUM_DPP__MAX];
+ double dummy2[DC__NUM_DPP__MAX];
+ double dummy3[DC__NUM_DPP__MAX];
+ double dummy4[DC__NUM_DPP__MAX];
+ int dummy5[DC__NUM_DPP__MAX];
+ int dummy6[DC__NUM_DPP__MAX];
+ bool dummy7[DC__NUM_DPP__MAX];
+ bool dummysinglestring;
+
+ unsigned int k;
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+
+ CalculateBytePerPixelAnd256BBlockSizes(
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ &BytePerPixY[k],
+ &BytePerPixC[k],
+ &BytePerPixDETY[k],
+ &BytePerPixDETC[k],
+ &Read256BytesBlockHeightY[k],
+ &Read256BytesBlockHeightC[k],
+ &Read256BytesBlockWidthY[k],
+ &Read256BytesBlockWidthC[k]);
+ }
+
+ CalculateSwathAndDETConfiguration(
+ false,
+ v->NumberOfActivePlanes,
+ v->DETBufferSizeInKByte[0],
+ dummy1,
+ dummy2,
+ v->SourceScan,
+ v->SourcePixelFormat,
+ v->SurfaceTiling,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ Read256BytesBlockHeightY,
+ Read256BytesBlockHeightC,
+ Read256BytesBlockWidthY,
+ Read256BytesBlockWidthC,
+ v->ODMCombineEnabled,
+ v->BlendingAndTiming,
+ BytePerPixY,
+ BytePerPixC,
+ BytePerPixDETY,
+ BytePerPixDETC,
+ v->HActive,
+ v->HRatio,
+ v->HRatioChroma,
+ v->DPPPerPlane,
+ dummy5,
+ dummy6,
+ dummy3,
+ dummy4,
+ v->SwathHeightY,
+ v->SwathHeightC,
+ v->DETBufferSizeY,
+ v->DETBufferSizeC,
+ dummy7,
+ &dummysinglestring);
+}
+
+static bool CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC)
+{
+ if (SourcePixelFormat == dm_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dm_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = 4.0 / 3;
+ *BytePerPixelDETC = 8.0 / 3;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+
+ if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
+ || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ } else if (SourcePixelFormat == dm_444_64) {
+ *BlockHeight256BytesY = 4;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BlockHeight256BytesY = 16;
+ } else {
+ *BlockHeight256BytesY = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+ return true;
+}
+
+static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
+{
+ if (PrefetchMode == 0) {
+ return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
+ } else if (PrefetchMode == 1) {
+ return dml_max(SREnterPlusExitTime, UrgentLatency);
+ } else {
+ return UrgentLatency;
+ }
+}
+
+double dml314_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ long WritebackSourceWidth,
+ long WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize)
+{
+ double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
+
+ DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
+ DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
+ DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
+ return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
+}
+
+static double CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ int WritebackDestinationWidth,
+ int WritebackDestinationHeight,
+ int WritebackSourceHeight,
+ unsigned int HTotal)
+{
+ double CalculateWriteBackDelay;
+ double Line_length;
+ double Output_lines_last_notclamped;
+ double WritebackVInit;
+
+ WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
+ Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
+ Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
+ if (Output_lines_last_notclamped < 0) {
+ CalculateWriteBackDelay = 0;
+ } else {
+ CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
+ }
+ return CalculateWriteBackDelay;
+}
+
+static void CalculateVupdateAndDynamicMetadataParameters(
+ int MaxInterDCNTileRepeaters,
+ double DPPCLK,
+ double DISPCLK,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ int HTotal,
+ int VBlank,
+ int DynamicMetadataTransmittedBytes,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ double TotalRepeaterDelayTime;
+
+ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
+ *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
+ *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
+ *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
+ *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
+ *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
+ *Tdmec = HTotal / PixelClock;
+ if (DynamicMetadataLinesBeforeActiveRequired == 0) {
+ *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
+ } else {
+ *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
+ }
+ if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
+ *Tdmsks = *Tdmsks / 2;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
+ dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
+ dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
+#endif
+}
+
+static void CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ double *meta_row_bw,
+ double *dpte_row_bw)
+{
+ if (DCCEnable != true) {
+ *meta_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
+ } else {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
+ }
+
+ if (GPUVMEnable != true) {
+ *dpte_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
+ + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
+ } else {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
+ }
+}
+
+static void CalculateFlipSchedule(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool GPUVMEnable,
+ double HostVMMinPageSize,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow,
+ double BandwidthAvailableForImmediateFlip,
+ unsigned int TotImmediateFlipBytes,
+ enum source_format_class SourcePixelFormat,
+ double LineTime,
+ double VRatio,
+ double VRatioChroma,
+ double Tno_bw,
+ bool DCCEnable,
+ unsigned int dpte_row_height,
+ unsigned int meta_row_height,
+ unsigned int dpte_row_height_chroma,
+ unsigned int meta_row_height_chroma,
+ double *DestinationLinesToRequestVMInImmediateFlip,
+ double *DestinationLinesToRequestRowInImmediateFlip,
+ double *final_flip_bw,
+ bool *ImmediateFlipSupportedForPipe)
+{
+ double min_row_time = 0.0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double TimeForFetchingMetaPTEImmediateFlip;
+ double TimeForFetchingRowInVBlankImmediateFlip;
+ double ImmediateFlipBW;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ } else {
+ HostVMDynamicLevelsTrips = 0;
+ }
+
+ if (GPUVMEnable == true || DCCEnable == true) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ }
+
+ if (GPUVMEnable == true) {
+ TimeForFetchingMetaPTEImmediateFlip = dml_max3(
+ Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ LineTime / 4.0);
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ }
+
+ *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
+ if ((GPUVMEnable == true || DCCEnable == true)) {
+ TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
+ UrgentLatency * (HostVMDynamicLevelsTrips + 1),
+ LineTime / 4);
+ } else {
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ }
+
+ *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
+
+ if (GPUVMEnable == true) {
+ *final_flip_bw = dml_max(
+ PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
+ } else if ((GPUVMEnable == true || DCCEnable == true)) {
+ *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
+ } else {
+ *final_flip_bw = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
+ } else {
+ min_row_time = dml_min4(
+ dpte_row_height * LineTime / VRatio,
+ meta_row_height * LineTime / VRatio,
+ dpte_row_height_chroma * LineTime / VRatioChroma,
+ meta_row_height_chroma * LineTime / VRatioChroma);
+ }
+ } else {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dpte_row_height * LineTime / VRatio;
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = meta_row_height * LineTime / VRatio;
+ } else {
+ min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
+ }
+ }
+
+ if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
+ || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
+ *ImmediateFlipSupportedForPipe = false;
+ } else {
+ *ImmediateFlipSupportedForPipe = true;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
+ dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
+#endif
+
+}
+
+static double TruncToValidBPP(
+ double LinkBitRate,
+ int Lanes,
+ int HTotal,
+ int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum output_encoder_class Output,
+ enum output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ int DSCSlices,
+ int AudioRate,
+ int AudioLayout,
+ enum odm_combine_mode ODMCombine)
+{
+ double MaxLinkBPP;
+ int MinDSCBPP;
+ double MaxDSCBPP;
+ int NonDSCBPP0;
+ int NonDSCBPP1;
+ int NonDSCBPP2;
+
+ if (Format == dm_420) {
+ NonDSCBPP0 = 12;
+ NonDSCBPP1 = 15;
+ NonDSCBPP2 = 18;
+ MinDSCBPP = 6;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
+ } else if (Format == dm_444) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 30;
+ NonDSCBPP2 = 36;
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
+ } else {
+
+ NonDSCBPP0 = 16;
+ NonDSCBPP1 = 20;
+ NonDSCBPP2 = 24;
+
+ if (Format == dm_n422) {
+ MinDSCBPP = 7;
+ MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
+ } else {
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
+ }
+ }
+
+ if (DSCEnable && Output == dm_dp) {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
+ } else {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
+ }
+
+ if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
+ MaxLinkBPP = 16;
+ } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
+ MaxLinkBPP = 32;
+ }
+
+ if (DesiredBPP == 0) {
+ if (DSCEnable) {
+ if (MaxLinkBPP < MinDSCBPP) {
+ return BPP_INVALID;
+ } else if (MaxLinkBPP >= MaxDSCBPP) {
+ return MaxDSCBPP;
+ } else {
+ return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
+ }
+ } else {
+ if (MaxLinkBPP >= NonDSCBPP2) {
+ return NonDSCBPP2;
+ } else if (MaxLinkBPP >= NonDSCBPP1) {
+ return NonDSCBPP1;
+ } else if (MaxLinkBPP >= NonDSCBPP0) {
+ return 16.0;
+ } else {
+ return BPP_INVALID;
+ }
+ }
+ } else {
+ if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
+ || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
+ return BPP_INVALID;
+ } else {
+ return DesiredBPP;
+ }
+ }
+ return BPP_INVALID;
+}
+
+static noinline void CalculatePrefetchSchedulePerPlane(
+ struct display_mode_lib *mode_lib,
+ double HostVMInefficiencyFactor,
+ int i,
+ unsigned int j,
+ unsigned int k)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ Pipe myPipe;
+
+ myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
+ myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
+ myPipe.PixelClock = v->PixelClock[k];
+ myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
+ myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
+ myPipe.ScalerEnabled = v->ScalerEnabled[k];
+ myPipe.VRatio = mode_lib->vba.VRatio[k];
+ myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
+
+ myPipe.SourceScan = v->SourceScan[k];
+ myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
+ myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
+ myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
+ myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
+ myPipe.InterlaceEnable = v->Interlace[k];
+ myPipe.NumberOfCursors = v->NumberOfCursors[k];
+ myPipe.VBlank = v->VTotal[k] - v->VActive[k];
+ myPipe.HTotal = v->HTotal[k];
+ myPipe.DCCEnable = v->DCCEnable[k];
+ myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
+ || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
+ myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
+ myPipe.BytePerPixelY = v->BytePerPixelY[k];
+ myPipe.BytePerPixelC = v->BytePerPixelC[k];
+ myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
+ v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
+ mode_lib,
+ HostVMInefficiencyFactor,
+ &myPipe,
+ v->DSCDelayPerState[i][k],
+ v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
+ v->DPPCLKDelaySCL,
+ v->DPPCLKDelaySCLLBOnly,
+ v->DPPCLKDelayCNVCCursor,
+ v->DISPCLKDelaySubtotal,
+ v->SwathWidthYThisState[k] / v->HRatio[k],
+ v->OutputFormat[k],
+ v->MaxInterDCNTileRepeaters,
+ dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
+ v->MaximumVStartup[i][j][k],
+ v->GPUVMMaxPageTableLevels,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->HostVMMinPageSize,
+ v->DynamicMetadataEnable[k],
+ v->DynamicMetadataVMEnabled,
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->UrgLatency[i],
+ v->ExtraLatency,
+ v->TimeCalc,
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ v->MetaRowBytes[i][j][k],
+ v->DPTEBytesPerRow[i][j][k],
+ v->PrefetchLinesY[i][j][k],
+ v->SwathWidthYThisState[k],
+ v->PrefillY[k],
+ v->MaxNumSwY[k],
+ v->PrefetchLinesC[i][j][k],
+ v->SwathWidthCThisState[k],
+ v->PrefillC[k],
+ v->MaxNumSwC[k],
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k],
+ v->TWait,
+ &v->DSTXAfterScaler[k],
+ &v->DSTYAfterScaler[k],
+ &v->LineTimesForPrefetch[k],
+ &v->PrefetchBW[k],
+ &v->LinesForMetaPTE[k],
+ &v->LinesForMetaAndDPTERow[k],
+ &v->VRatioPreY[i][j][k],
+ &v->VRatioPreC[i][j][k],
+ &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
+ &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
+ &v->NoTimeForDynamicMetadata[i][j][k],
+ &v->Tno_bw[k],
+ &v->prefetch_vmrow_bw[k],
+ &v->dummy7[k],
+ &v->dummy8[k],
+ &v->dummy13[k],
+ &v->VUpdateOffsetPix[k],
+ &v->VUpdateWidthPix[k],
+ &v->VReadyOffsetPix[k]);
+}
+
+void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+
+ int i, j;
+ unsigned int k, m;
+ int ReorderingBytes;
+ int MinPrefetchMode = 0, MaxPrefetchMode = 2;
+ bool NoChroma = true;
+ bool EnoughWritebackUnits = true;
+ bool P2IWith420 = false;
+ bool DSCOnlyIfNecessaryWithBPP = false;
+ bool DSC422NativeNotSupported = false;
+ double MaxTotalVActiveRDBandwidth;
+ bool ViewportExceedsSurface = false;
+ bool FMTBufferExceeded = false;
+
+ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+
+ CalculateMinAndMaxPrefetchMode(
+ mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
+ &MinPrefetchMode, &MaxPrefetchMode);
+
+ /*Scale Ratio, taps Support Check*/
+
+ v->ScaleRatioAndTapsSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->ScalerEnabled[k] == false
+ && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
+ && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
+ && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
+ && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
+ || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
+ v->ScaleRatioAndTapsSupport = false;
+ } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
+ || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
+ || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
+ || v->VRatio[k] > v->vtaps[k]
+ || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
+ && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
+ && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
+ && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
+ || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
+ || v->HRatioChroma[k] > v->MaxHSCLRatio
+ || v->VRatioChroma[k] > v->MaxVSCLRatio
+ || v->HRatioChroma[k] > v->HTAPsChroma[k]
+ || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
+ v->ScaleRatioAndTapsSupport = false;
+ }
+ }
+ /*Source Format, Pixel Format and Scan Support Check*/
+
+ v->SourceFormatPixelAndScanSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
+ || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
+ || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
+ v->SourceFormatPixelAndScanSupport = false;
+ }
+ }
+ /*Bandwidth Support Check*/
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculateBytePerPixelAnd256BBlockSizes(
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ &v->BytePerPixelY[k],
+ &v->BytePerPixelC[k],
+ &v->BytePerPixelInDETY[k],
+ &v->BytePerPixelInDETC[k],
+ &v->Read256BlockHeightY[k],
+ &v->Read256BlockHeightC[k],
+ &v->Read256BlockWidthY[k],
+ &v->Read256BlockWidthC[k]);
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->SourceScan[k] != dm_vert) {
+ v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
+ v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
+ } else {
+ v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
+ v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
+ v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
+ v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
+ } else if (v->WritebackEnable[k] == true) {
+ v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
+ } else {
+ v->WriteBandwidth[k] = 0.0;
+ }
+ }
+
+ /*Writeback Latency support check*/
+
+ v->WritebackLatencySupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
+ v->WritebackLatencySupport = false;
+ }
+ }
+
+ /*Writeback Mode Support Check*/
+
+ v->TotalNumberOfActiveWriteback = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true) {
+ v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
+ }
+ }
+
+ if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
+ EnoughWritebackUnits = false;
+ }
+
+ /*Writeback Scale Ratio and Taps Support Check*/
+
+ v->WritebackScaleRatioAndTapsSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true) {
+ if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
+ || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
+ || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
+ || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
+ || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
+ || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
+ || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
+ v->WritebackScaleRatioAndTapsSupport = false;
+ }
+ if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
+ v->WritebackScaleRatioAndTapsSupport = false;
+ }
+ }
+ }
+ /*Maximum DISPCLK/DPPCLK Support check*/
+
+ v->WritebackRequiredDISPCLK = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackRequiredDISPCLK = dml_max(
+ v->WritebackRequiredDISPCLK,
+ dml314_CalculateWriteBackDISPCLK(
+ v->WritebackPixelFormat[k],
+ v->PixelClock[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackHTaps[k],
+ v->WritebackVTaps[k],
+ v->WritebackSourceWidth[k],
+ v->WritebackDestinationWidth[k],
+ v->HTotal[k],
+ v->WritebackLineBufferSize));
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->HRatio[k] > 1.0) {
+ v->PSCL_FACTOR[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
+ } else {
+ v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+ if (v->BytePerPixelC[k] == 0.0) {
+ v->PSCL_FACTOR_CHROMA[k] = 0.0;
+ v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
+ * dml_max3(
+ v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
+ v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
+ 1.0);
+ if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
+ v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
+ }
+ } else {
+ if (v->HRatioChroma[k] > 1.0) {
+ v->PSCL_FACTOR_CHROMA[k] = dml_min(
+ v->MaxDCHUBToPSCLThroughput,
+ v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
+ } else {
+ v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
+ }
+ v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
+ * dml_max5(
+ v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
+ v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
+ v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
+ v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
+ 1.0);
+ if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
+ && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
+ v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
+ }
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ int MaximumSwathWidthSupportLuma;
+ int MaximumSwathWidthSupportChroma;
+
+ if (v->SurfaceTiling[k] == dm_sw_linear) {
+ MaximumSwathWidthSupportLuma = 8192.0;
+ } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
+ MaximumSwathWidthSupportLuma = 2880.0;
+ } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+ MaximumSwathWidthSupportLuma = 3840.0;
+ } else {
+ MaximumSwathWidthSupportLuma = 5760.0;
+ }
+
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
+ MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
+ } else {
+ MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
+ }
+ v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
+ / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
+ if (v->BytePerPixelC[k] == 0.0) {
+ v->MaximumSwathWidthInLineBufferChroma = 0;
+ } else {
+ v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
+ / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
+ }
+ v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
+ v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
+ }
+
+ CalculateSwathAndDETConfiguration(
+ true,
+ v->NumberOfActivePlanes,
+ v->DETBufferSizeInKByte[0],
+ v->MaximumSwathWidthLuma,
+ v->MaximumSwathWidthChroma,
+ v->SourceScan,
+ v->SourcePixelFormat,
+ v->SurfaceTiling,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ v->Read256BlockHeightY,
+ v->Read256BlockHeightC,
+ v->Read256BlockWidthY,
+ v->Read256BlockWidthC,
+ v->odm_combine_dummy,
+ v->BlendingAndTiming,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+ v->HActive,
+ v->HRatio,
+ v->HRatioChroma,
+ v->NoOfDPPThisState,
+ v->swath_width_luma_ub_this_state,
+ v->swath_width_chroma_ub_this_state,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->SingleDPPViewportSizeSupportPerPlane,
+ &v->ViewportSizeSupport[0][0]);
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
+ v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
+ v->RequiredDISPCLK[i][j] = 0.0;
+ v->DISPCLK_DPPCLK_Support[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1.0 + v->DISPCLKRampingMargin / 100.0);
+ if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
+ && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1 + v->DISPCLKRampingMargin / 100.0);
+ if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
+ && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1 + v->DISPCLKRampingMargin / 100.0);
+ if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
+ && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+
+ if (v->ODMCombinePolicy == dm_odm_combine_policy_none
+ || !(v->Output[k] == dm_dp ||
+ v->Output[k] == dm_dp2p0 ||
+ v->Output[k] == dm_edp)) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
+
+ if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
+ FMTBufferExceeded = true;
+ } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
+ || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
+ } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ } else {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
+ }
+ if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
+ && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
+ if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
+ } else {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ }
+ }
+ if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
+ && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
+ if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
+
+ if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
+ FMTBufferExceeded = true;
+ } else {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
+ }
+ }
+ if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 4;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
+ } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 2;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
+ } else if ((v->WhenToDoMPCCombine == dm_mpc_never
+ || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 1;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ } else {
+ v->MPCCombine[i][j][k] = true;
+ v->NoOfDPP[i][j][k] = 2;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
+ }
+ v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
+ if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ > v->MaxDppclkRoundedDownToDFSGranularity)
+ || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
+ v->DISPCLK_DPPCLK_Support[i][j] = false;
+ }
+ }
+ v->TotalNumberOfActiveDPP[i][j] = 0;
+ v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
+ if (v->NoOfDPP[i][j][k] == 1)
+ v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
+ || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
+ NoChroma = false;
+ }
+
+ // UPTO
+ if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
+ && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
+ while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
+ double BWOfNonSplitPlaneOfMaximumBandwidth;
+ unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
+
+ BWOfNonSplitPlaneOfMaximumBandwidth = 0;
+ NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
+ && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
+ BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
+ NumberOfNonSplitPlaneOfMaximumBandwidth = k;
+ }
+ }
+ v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
+ v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
+ v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
+ v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
+ * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
+ v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
+ v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
+ }
+ }
+ if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
+ v->RequiredDISPCLK[i][j] = 0.0;
+ v->DISPCLK_DPPCLK_Support[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
+ v->MPCCombine[i][j][k] = true;
+ v->NoOfDPP[i][j][k] = 2;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
+ * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
+ } else {
+ v->MPCCombine[i][j][k] = false;
+ v->NoOfDPP[i][j][k] = 1;
+ v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
+ * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
+ && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
+ v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ * (1.0 + v->DISPCLKRampingMargin / 100.0);
+ } else {
+ v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ }
+ v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
+ if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
+ > v->MaxDppclkRoundedDownToDFSGranularity)
+ || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
+ v->DISPCLK_DPPCLK_Support[i][j] = false;
+ }
+ }
+ v->TotalNumberOfActiveDPP[i][j] = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
+ }
+ }
+ v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
+ if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
+ v->DISPCLK_DPPCLK_Support[i][j] = false;
+ }
+ }
+ }
+
+ /*Total Available Pipes Support Check*/
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
+ v->TotalAvailablePipesSupport[i][j] = true;
+ } else {
+ v->TotalAvailablePipesSupport[i][j] = false;
+ }
+ }
+ }
+ /*Display IO and DSC Support Check*/
+
+ v->NonsupportedDSCInputBPC = false;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
+ || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
+ v->NonsupportedDSCInputBPC = true;
+ }
+ }
+
+ /*Number Of DSC Slices*/
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->PixelClockBackEnd[k] > 3200) {
+ v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
+ } else if (v->PixelClockBackEnd[k] > 1360) {
+ v->NumberOfDSCSlices[k] = 8;
+ } else if (v->PixelClockBackEnd[k] > 680) {
+ v->NumberOfDSCSlices[k] = 4;
+ } else if (v->PixelClockBackEnd[k] > 340) {
+ v->NumberOfDSCSlices[k] = 2;
+ } else {
+ v->NumberOfDSCSlices[k] = 1;
+ }
+ } else {
+ v->NumberOfDSCSlices[k] = 0;
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->RequiresDSC[i][k] = false;
+ v->RequiresFEC[i][k] = false;
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->Output[k] == dm_hdmi) {
+ v->RequiresDSC[i][k] = false;
+ v->RequiresFEC[i][k] = false;
+ v->OutputBppPerState[i][k] = TruncToValidBPP(
+ dml_min(600.0, v->PHYCLKPerState[i]) * 10,
+ 3,
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ false,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
+ if (v->DSCEnable[k] == true) {
+ v->RequiresDSC[i][k] = true;
+ v->LinkDSCEnable = true;
+ if (v->Output[k] == dm_dp) {
+ v->RequiresFEC[i][k] = true;
+ } else {
+ v->RequiresFEC[i][k] = false;
+ }
+ } else {
+ v->RequiresDSC[i][k] = false;
+ v->LinkDSCEnable = false;
+ v->RequiresFEC[i][k] = false;
+ }
+
+ v->Outbpp = BPP_INVALID;
+ if (v->PHYCLKPerState[i] >= 270.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 2700,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 5400,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 8100,
+ v->OutputLinkDPLanes[k],
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ // TODO: Need some other way to handle this nonsense
+ // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
+ v->Outbpp = TruncToValidBPP(
+ (1.0 - v->Downspreading / 100.0) * 10000,
+ 4,
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
+ }
+ if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
+ v->Outbpp = TruncToValidBPP(
+ 12000,
+ 4,
+ v->HTotal[k],
+ v->HActive[k],
+ v->PixelClockBackEnd[k],
+ v->ForcedOutputLinkBPP[k],
+ v->LinkDSCEnable,
+ v->Output[k],
+ v->OutputFormat[k],
+ v->DSCInputBitPerComponent[k],
+ v->NumberOfDSCSlices[k],
+ v->AudioSampleRate[k],
+ v->AudioSampleLayout[k],
+ v->ODMCombineEnablePerState[i][k]);
+ v->OutputBppPerState[i][k] = v->Outbpp;
+ //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
+ }
+ }
+ } else {
+ v->OutputBppPerState[i][k] = 0;
+ }
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ v->LinkCapacitySupport[i] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->BlendingAndTiming[k] == k
+ && (v->Output[k] == dm_dp ||
+ v->Output[k] == dm_edp ||
+ v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
+ v->LinkCapacitySupport[i] = false;
+ }
+ }
+ }
+
+ // UPTO 2172
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k
+ && (v->Output[k] == dm_dp ||
+ v->Output[k] == dm_edp ||
+ v->Output[k] == dm_hdmi)) {
+ if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
+ P2IWith420 = true;
+ }
+ if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
+ && !v->DSC422NativeSupport) {
+ DSC422NativeNotSupported = true;
+ }
+ }
+ }
+
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ v->ODMCombine4To1SupportCheckOK[i] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
+ && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
+ || v->Output[k] == dm_hdmi)) {
+ v->ODMCombine4To1SupportCheckOK[i] = false;
+ }
+ }
+ }
+
+ /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ v->NotEnoughDSCUnits[i] = false;
+ v->TotalDSCUnitsRequired = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->RequiresDSC[i][k] == true) {
+ if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
+ } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
+ } else {
+ v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
+ }
+ }
+ }
+ if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
+ v->NotEnoughDSCUnits[i] = true;
+ }
+ }
+ /*DSC Delay per state*/
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->OutputBppPerState[i][k] == BPP_INVALID) {
+ v->BPP = 0.0;
+ } else {
+ v->BPP = v->OutputBppPerState[i][k];
+ }
+ if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
+ if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
+ v->DSCDelayPerState[i][k] = dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ v->BPP,
+ dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
+ v->NumberOfDSCSlices[k],
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
+ } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ v->DSCDelayPerState[i][k] = 2.0
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ v->BPP,
+ dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
+ v->NumberOfDSCSlices[k] / 2,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ } else {
+ v->DSCDelayPerState[i][k] = 4.0
+ * (dscceComputeDelay(
+ v->DSCInputBitPerComponent[k],
+ v->BPP,
+ dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
+ v->NumberOfDSCSlices[k] / 4,
+ v->OutputFormat[k],
+ v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
+ }
+ v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
+ } else {
+ v->DSCDelayPerState[i][k] = 0.0;
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ for (m = 0; m < v->NumberOfActivePlanes; m++) {
+ if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
+ v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
+ }
+ }
+ }
+ }
+
+ //Calculate Swath, DET Configuration, DCFCLKDeepSleep
+ //
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
+ v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
+ v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
+ }
+
+ CalculateSwathAndDETConfiguration(
+ false,
+ v->NumberOfActivePlanes,
+ v->DETBufferSizeInKByte[0],
+ v->MaximumSwathWidthLuma,
+ v->MaximumSwathWidthChroma,
+ v->SourceScan,
+ v->SourcePixelFormat,
+ v->SurfaceTiling,
+ v->ViewportWidth,
+ v->ViewportHeight,
+ v->SurfaceWidthY,
+ v->SurfaceWidthC,
+ v->SurfaceHeightY,
+ v->SurfaceHeightC,
+ v->Read256BlockHeightY,
+ v->Read256BlockHeightC,
+ v->Read256BlockWidthY,
+ v->Read256BlockWidthC,
+ v->ODMCombineEnableThisState,
+ v->BlendingAndTiming,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+ v->HActive,
+ v->HRatio,
+ v->HRatioChroma,
+ v->NoOfDPPThisState,
+ v->swath_width_luma_ub_this_state,
+ v->swath_width_chroma_ub_this_state,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->dummystring,
+ &v->ViewportSizeSupport[i][j]);
+
+ CalculateDCFCLKDeepSleep(
+ mode_lib,
+ v->NumberOfActivePlanes,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->VRatio,
+ v->VRatioChroma,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->NoOfDPPThisState,
+ v->HRatio,
+ v->HRatioChroma,
+ v->PixelClock,
+ v->PSCL_FACTOR,
+ v->PSCL_FACTOR_CHROMA,
+ v->RequiredDPPCLKThisState,
+ v->ReadBandwidthLuma,
+ v->ReadBandwidthChroma,
+ v->ReturnBusWidth,
+ &v->ProjectedDCFCLKDeepSleep[i][j]);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
+ v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
+ v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
+ v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
+ v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
+ v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
+ v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
+ v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
+ }
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
+ }
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
+ v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
+ v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
+ v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
+ v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
+ v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
+ v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
+ v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
+ }
+
+ v->TotalNumberOfDCCActiveDPP[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->DCCEnable[k] == true) {
+ v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
+ || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+
+ if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
+ && v->SourceScan[k] != dm_vert) {
+ v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
+ / 2;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
+ v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
+ }
+
+ v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->Read256BlockHeightC[k],
+ v->Read256BlockWidthC[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelC[k],
+ v->SourceScan[k],
+ v->SwathWidthCThisState[k],
+ v->ViewportHeightChroma[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForChroma,
+ v->PitchC[k],
+ 0.0,
+ &v->MacroTileWidthC[k],
+ &v->MetaRowBytesC,
+ &v->DPTEBytesPerRowC,
+ &v->PTEBufferSizeNotExceededC[i][j][k],
+ &v->dummyinteger7,
+ &v->dpte_row_height_chroma[k],
+ &v->dummyinteger28,
+ &v->dummyinteger26,
+ &v->dummyinteger23,
+ &v->meta_row_height_chroma[k],
+ &v->dummyinteger8,
+ &v->dummyinteger9,
+ &v->dummyinteger19,
+ &v->dummyinteger20,
+ &v->dummyinteger17,
+ &v->dummyinteger10,
+ &v->dummyinteger11);
+
+ v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatioChroma[k],
+ v->VTAPsChroma[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightCThisState[k],
+ v->ViewportYStartC[k],
+ &v->PrefillC[k],
+ &v->MaxNumSwC[k]);
+ } else {
+ v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
+ v->PTEBufferSizeInRequestsForChroma = 0;
+ v->PDEAndMetaPTEBytesPerFrameC = 0.0;
+ v->MetaRowBytesC = 0.0;
+ v->DPTEBytesPerRowC = 0.0;
+ v->PrefetchLinesC[i][j][k] = 0.0;
+ v->PTEBufferSizeNotExceededC[i][j][k] = true;
+ }
+ v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
+ mode_lib,
+ v->DCCEnable[k],
+ v->Read256BlockHeightY[k],
+ v->Read256BlockWidthY[k],
+ v->SourcePixelFormat[k],
+ v->SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->SourceScan[k],
+ v->SwathWidthYThisState[k],
+ v->ViewportHeight[k],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMMinPageSize,
+ v->HostVMMinPageSize,
+ v->PTEBufferSizeInRequestsForLuma,
+ v->PitchY[k],
+ v->DCCMetaPitchY[k],
+ &v->MacroTileWidthY[k],
+ &v->MetaRowBytesY,
+ &v->DPTEBytesPerRowY,
+ &v->PTEBufferSizeNotExceededY[i][j][k],
+ &v->dummyinteger7,
+ &v->dpte_row_height[k],
+ &v->dummyinteger29,
+ &v->dummyinteger27,
+ &v->dummyinteger24,
+ &v->meta_row_height[k],
+ &v->dummyinteger25,
+ &v->dpte_group_bytes[k],
+ &v->dummyinteger21,
+ &v->dummyinteger22,
+ &v->dummyinteger18,
+ &v->dummyinteger5,
+ &v->dummyinteger6);
+ v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
+ mode_lib,
+ v->VRatio[k],
+ v->vtaps[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->SwathHeightYThisState[k],
+ v->ViewportYStartY[k],
+ &v->PrefillY[k],
+ &v->MaxNumSwY[k]);
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
+ v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
+ v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
+
+ CalculateRowBandwidth(
+ v->GPUVMEnable,
+ v->SourcePixelFormat[k],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->DCCEnable[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->MetaRowBytesY,
+ v->MetaRowBytesC,
+ v->meta_row_height[k],
+ v->meta_row_height_chroma[k],
+ v->DPTEBytesPerRowY,
+ v->DPTEBytesPerRowC,
+ v->dpte_row_height[k],
+ v->dpte_row_height_chroma[k],
+ &v->meta_row_bandwidth[i][j][k],
+ &v->dpte_row_bandwidth[i][j][k]);
+ }
+ /*
+ * DCCMetaBufferSizeSupport(i, j) = True
+ * For k = 0 To NumberOfActivePlanes - 1
+ * If MetaRowBytes(i, j, k) > 24064 Then
+ * DCCMetaBufferSizeSupport(i, j) = False
+ * End If
+ * Next k
+ */
+ v->DCCMetaBufferSizeSupport[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->MetaRowBytes[i][j][k] > 24064)
+ v->DCCMetaBufferSizeSupport[i][j] = false;
+ }
+ v->UrgLatency[i] = CalculateUrgentLatency(
+ v->UrgentLatencyPixelDataOnly,
+ v->UrgentLatencyPixelMixedWithVMData,
+ v->UrgentLatencyVMDataOnly,
+ v->DoUrgentLatencyAdjustment,
+ v->UrgentLatencyAdjustmentFabricClockComponent,
+ v->UrgentLatencyAdjustmentFabricClockReference,
+ v->FabricClockPerState[i]);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgLatency[i],
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->BytePerPixelInDETY[k],
+ v->BytePerPixelInDETC[k],
+ v->DETBufferSizeYThisState[k],
+ v->DETBufferSizeCThisState[k],
+ &v->UrgentBurstFactorCursor[k],
+ &v->UrgentBurstFactorLuma[k],
+ &v->UrgentBurstFactorChroma[k],
+ &NotUrgentLatencyHiding[k]);
+ }
+
+ v->NotEnoughUrgentLatencyHidingA[i][j] = false;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ v->NotEnoughUrgentLatencyHidingA[i][j] = true;
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
+ + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
+ v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
+ }
+
+ v->TotalVActivePixelBandwidth[i][j] = 0;
+ v->TotalVActiveCursorBandwidth[i][j] = 0;
+ v->TotalMetaRowBandwidth[i][j] = 0;
+ v->TotalDPTERowBandwidth[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
+ v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
+ v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
+ v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
+ }
+ }
+ }
+
+ //Calculate Return BW
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->BlendingAndTiming[k] == k) {
+ if (v->WritebackEnable[k] == true) {
+ v->WritebackDelayTime[k] = v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[k],
+ v->WritebackHRatio[k],
+ v->WritebackVRatio[k],
+ v->WritebackVTaps[k],
+ v->WritebackDestinationWidth[k],
+ v->WritebackDestinationHeight[k],
+ v->WritebackSourceHeight[k],
+ v->HTotal[k]) / v->RequiredDISPCLK[i][j];
+ } else {
+ v->WritebackDelayTime[k] = 0.0;
+ }
+ for (m = 0; m < v->NumberOfActivePlanes; m++) {
+ if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
+ v->WritebackDelayTime[k] = dml_max(
+ v->WritebackDelayTime[k],
+ v->WritebackLatency
+ + CalculateWriteBackDelay(
+ v->WritebackPixelFormat[m],
+ v->WritebackHRatio[m],
+ v->WritebackVRatio[m],
+ v->WritebackVTaps[m],
+ v->WritebackDestinationWidth[m],
+ v->WritebackDestinationHeight[m],
+ v->WritebackSourceHeight[m],
+ v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
+ }
+ }
+ }
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ for (m = 0; m < v->NumberOfActivePlanes; m++) {
+ if (v->BlendingAndTiming[k] == m) {
+ v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
+ }
+ }
+ }
+ v->MaxMaxVStartup[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->MaximumVStartup[i][j][k] =
+ CalculateMaxVStartup(
+ v->VTotal[k],
+ v->VActive[k],
+ v->VBlankNom[k],
+ v->HTotal[k],
+ v->PixelClock[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ v->Interlace[k],
+ v->ip.VBlankNomDefaultUS,
+ v->WritebackDelayTime[k]);
+ v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
+ }
+ }
+ }
+
+ ReorderingBytes = v->NumberOfChannels
+ * dml_max3(
+ v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
+ }
+ }
+
+ if (v->UseMinimumRequiredDCFCLK == true)
+ UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double IdealFabricAndSDPPortBandwidthPerState = dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[i][j],
+ v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
+ double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
+ double PixelDataOnlyReturnBWPerState = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
+ double PixelMixedWithVMDataReturnBWPerState = dml_min(
+ IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
+
+ if (v->HostVMEnable != true) {
+ v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
+ } else {
+ v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
+ }
+ }
+ }
+
+ //Re-ordering Buffer Support Check
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
+ > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
+ v->ROBSupport[i][j] = true;
+ } else {
+ v->ROBSupport[i][j] = false;
+ }
+ }
+ }
+
+ //Vertical Active BW support check
+
+ MaxTotalVActiveRDBandwidth = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
+ }
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
+ dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[i][j],
+ v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
+ * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
+ v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
+ * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
+
+ if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
+ v->TotalVerticalActiveBandwidthSupport[i][j] = true;
+ } else {
+ v->TotalVerticalActiveBandwidthSupport[i][j] = false;
+ }
+ }
+ }
+
+ v->UrgentLatency = CalculateUrgentLatency(
+ v->UrgentLatencyPixelDataOnly,
+ v->UrgentLatencyPixelMixedWithVMData,
+ v->UrgentLatencyVMDataOnly,
+ v->DoUrgentLatencyAdjustment,
+ v->UrgentLatencyAdjustmentFabricClockComponent,
+ v->UrgentLatencyAdjustmentFabricClockReference,
+ v->FabricClock);
+ //Prefetch Check
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double VMDataOnlyReturnBWPerState;
+ double HostVMInefficiencyFactor = 1;
+ int NextPrefetchModeState = MinPrefetchMode;
+ bool UnboundedRequestEnabledThisState = false;
+ int CompressedBufferSizeInkByteThisState = 0;
+ double dummy;
+
+ v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
+
+ v->BandwidthWithoutPrefetchSupported[i][j] = true;
+ if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
+ + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
+ v->BandwidthWithoutPrefetchSupported[i][j] = false;
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
+ v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
+ v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
+ v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
+ v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
+ v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
+ v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
+ v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
+ v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
+ }
+
+ VMDataOnlyReturnBWPerState = dml_min(
+ dml_min(
+ v->ReturnBusWidth * v->DCFCLKState[i][j],
+ v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
+ * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
+ v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
+ * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
+ if (v->GPUVMEnable && v->HostVMEnable)
+ HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
+
+ v->ExtraLatency = CalculateExtraLatency(
+ v->RoundTripPingLatencyCycles,
+ ReorderingBytes,
+ v->DCFCLKState[i][j],
+ v->TotalNumberOfActiveDPP[i][j],
+ v->PixelChunkSizeInKByte,
+ v->TotalNumberOfDCCActiveDPP[i][j],
+ v->MetaChunkSize,
+ v->ReturnBWPerState[i][j],
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->NumberOfActivePlanes,
+ v->NoOfDPPThisState,
+ v->dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ v->HostVMMinPageSize,
+ v->HostVMMaxNonCachedPageTableLevels);
+
+ v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
+ do {
+ v->PrefetchModePerState[i][j] = NextPrefetchModeState;
+ v->MaxVStartup = v->NextMaxVStartup;
+
+ v->TWait = CalculateTWait(
+ v->PrefetchModePerState[i][j],
+ v->DRAMClockChangeLatency,
+ v->UrgLatency[i],
+ v->SREnterPlusExitTime);
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculatePrefetchSchedulePerPlane(mode_lib,
+ HostVMInefficiencyFactor,
+ i, j, k);
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculateUrgentBurstFactor(
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->UrgentLatency,
+ v->CursorBufferSize,
+ v->CursorWidth[k][0],
+ v->CursorBPP[k][0],
+ v->VRatioPreY[i][j][k],
+ v->VRatioPreC[i][j][k],
+ v->BytePerPixelInDETY[k],
+ v->BytePerPixelInDETC[k],
+ v->DETBufferSizeYThisState[k],
+ v->DETBufferSizeCThisState[k],
+ &v->UrgentBurstFactorCursorPre[k],
+ &v->UrgentBurstFactorLumaPre[k],
+ &v->UrgentBurstFactorChroma[k],
+ &v->NotUrgentLatencyHidingPre[k]);
+ }
+
+ v->MaximumReadBandwidthWithPrefetch = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
+ / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
+
+ v->MaximumReadBandwidthWithPrefetch =
+ v->MaximumReadBandwidthWithPrefetch
+ + dml_max3(
+ v->VActivePixelBandwidth[i][j][k]
+ + v->VActiveCursorBandwidth[i][j][k]
+ + v->NoOfDPP[i][j][k]
+ * (v->meta_row_bandwidth[i][j][k]
+ + v->dpte_row_bandwidth[i][j][k]),
+ v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
+ v->NoOfDPP[i][j][k]
+ * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
+ * v->UrgentBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
+ * v->UrgentBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
+ }
+
+ v->NotEnoughUrgentLatencyHidingPre = false;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->NotUrgentLatencyHidingPre[k] == true) {
+ v->NotEnoughUrgentLatencyHidingPre = true;
+ }
+ }
+
+ v->PrefetchSupported[i][j] = true;
+ if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
+ || v->NotEnoughUrgentLatencyHidingPre == 1) {
+ v->PrefetchSupported[i][j] = false;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
+ || v->NoTimeForPrefetch[i][j][k] == true) {
+ v->PrefetchSupported[i][j] = false;
+ }
+ }
+
+ v->DynamicMetadataSupported[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
+ v->DynamicMetadataSupported[i][j] = false;
+ }
+ }
+
+ v->VRatioInPrefetchSupported[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
+ v->VRatioInPrefetchSupported[i][j] = false;
+ }
+ }
+ v->AnyLinesForVMOrRowTooLarge = false;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
+ v->AnyLinesForVMOrRowTooLarge = true;
+ }
+ }
+
+ v->NextPrefetchMode = v->NextPrefetchMode + 1;
+
+ if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
+ v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
+ - dml_max(
+ v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
+ v->NoOfDPP[i][j][k]
+ * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
+ * v->UrgentBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
+ * v->UrgentBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
+ }
+ v->TotImmediateFlipBytes = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
+ + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
+ + v->DPTEBytesPerRow[i][j][k];
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculateFlipSchedule(
+ mode_lib,
+ HostVMInefficiencyFactor,
+ v->ExtraLatency,
+ v->UrgLatency[i],
+ v->GPUVMMaxPageTableLevels,
+ v->HostVMEnable,
+ v->HostVMMaxNonCachedPageTableLevels,
+ v->GPUVMEnable,
+ v->HostVMMinPageSize,
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ v->MetaRowBytes[i][j][k],
+ v->DPTEBytesPerRow[i][j][k],
+ v->BandwidthAvailableForImmediateFlip,
+ v->TotImmediateFlipBytes,
+ v->SourcePixelFormat[k],
+ v->HTotal[k] / v->PixelClock[k],
+ v->VRatio[k],
+ v->VRatioChroma[k],
+ v->Tno_bw[k],
+ v->DCCEnable[k],
+ v->dpte_row_height[k],
+ v->meta_row_height[k],
+ v->dpte_row_height_chroma[k],
+ v->meta_row_height_chroma[k],
+ &v->DestinationLinesToRequestVMInImmediateFlip[k],
+ &v->DestinationLinesToRequestRowInImmediateFlip[k],
+ &v->final_flip_bw[k],
+ &v->ImmediateFlipSupportedForPipe[k]);
+ }
+ v->total_dcn_read_bw_with_flip = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
+ + dml_max3(
+ v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
+ v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
+ + v->VActiveCursorBandwidth[i][j][k],
+ v->NoOfDPP[i][j][k]
+ * (v->final_flip_bw[k]
+ + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
+ * v->UrgentBurstFactorLumaPre[k]
+ + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
+ * v->UrgentBurstFactorChromaPre[k])
+ + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
+ }
+ v->ImmediateFlipSupportedForState[i][j] = true;
+ if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
+ v->ImmediateFlipSupportedForState[i][j] = false;
+ }
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->ImmediateFlipSupportedForPipe[k] == false) {
+ v->ImmediateFlipSupportedForState[i][j] = false;
+ }
+ }
+ } else {
+ v->ImmediateFlipSupportedForState[i][j] = false;
+ }
+
+ if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
+ v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
+ NextPrefetchModeState = NextPrefetchModeState + 1;
+ } else {
+ v->NextMaxVStartup = v->NextMaxVStartup - 1;
+ }
+ v->NextPrefetchMode = v->NextPrefetchMode + 1;
+ } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
+ && ((v->HostVMEnable == false &&
+ v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
+ || v->ImmediateFlipSupportedForState[i][j] == true))
+ || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
+
+ CalculateUnboundedRequestAndCompressedBufferSize(
+ v->DETBufferSizeInKByte[0],
+ v->ConfigReturnBufferSizeInKByte,
+ v->UseUnboundedRequesting,
+ v->TotalNumberOfActiveDPP[i][j],
+ NoChroma,
+ v->MaxNumDPP,
+ v->CompressedBufferSegmentSizeInkByte,
+ v->Output,
+ &UnboundedRequestEnabledThisState,
+ &CompressedBufferSizeInkByteThisState);
+
+ CalculateWatermarksAndDRAMSpeedChangeSupport(
+ mode_lib,
+ v->PrefetchModePerState[i][j],
+ v->NumberOfActivePlanes,
+ v->MaxLineBufferLines,
+ v->LineBufferSize,
+ v->WritebackInterfaceBufferSize,
+ v->DCFCLKState[i][j],
+ v->ReturnBWPerState[i][j],
+ v->SynchronizedVBlank,
+ v->dpte_group_bytes,
+ v->MetaChunkSize,
+ v->UrgLatency[i],
+ v->ExtraLatency,
+ v->WritebackLatency,
+ v->WritebackChunkSize,
+ v->SOCCLKPerState[i],
+ v->DRAMClockChangeLatency,
+ v->SRExitTime,
+ v->SREnterPlusExitTime,
+ v->SRExitZ8Time,
+ v->SREnterPlusExitZ8Time,
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->LBBitPerPixel,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+ v->HRatio,
+ v->HRatioChroma,
+ v->vtaps,
+ v->VTAPsChroma,
+ v->VRatio,
+ v->VRatioChroma,
+ v->HTotal,
+ v->PixelClock,
+ v->BlendingAndTiming,
+ v->NoOfDPPThisState,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+ v->DSTXAfterScaler,
+ v->DSTYAfterScaler,
+ v->WritebackEnable,
+ v->WritebackPixelFormat,
+ v->WritebackDestinationWidth,
+ v->WritebackDestinationHeight,
+ v->WritebackSourceHeight,
+ UnboundedRequestEnabledThisState,
+ CompressedBufferSizeInkByteThisState,
+ &v->DRAMClockChangeSupport[i][j],
+ &v->UrgentWatermark,
+ &v->WritebackUrgentWatermark,
+ &v->DRAMClockChangeWatermark,
+ &v->WritebackDRAMClockChangeWatermark,
+ &dummy,
+ &dummy,
+ &dummy,
+ &dummy,
+ &v->MinActiveDRAMClockChangeLatencySupported);
+ }
+ }
+
+ /*PTE Buffer Size Check*/
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ v->PTEBufferSizeNotExceeded[i][j] = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
+ v->PTEBufferSizeNotExceeded[i][j] = false;
+ }
+ }
+ }
+ }
+
+ /*Cursor Support Check*/
+ v->CursorSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->CursorWidth[k][0] > 0.0) {
+ if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
+ v->CursorSupport = false;
+ }
+ }
+ }
+
+ /*Valid Pitch Check*/
+ v->PitchSupport = true;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
+ if (v->DCCEnable[k] == true) {
+ v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
+ } else {
+ v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
+ }
+ if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
+ && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
+ && v->SourcePixelFormat[k] != dm_mono_8) {
+ v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
+ if (v->DCCEnable[k] == true) {
+ v->AlignedDCCMetaPitchC[k] = dml_ceil(
+ dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
+ 64.0 * v->Read256BlockWidthC[k]);
+ } else {
+ v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
+ }
+ } else {
+ v->AlignedCPitch[k] = v->PitchC[k];
+ v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
+ }
+ if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
+ || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
+ v->PitchSupport = false;
+ }
+ }
+
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
+ ViewportExceedsSurface = true;
+ if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
+ && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
+ && v->SourcePixelFormat[k] != dm_rgbe) {
+ if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
+ || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
+ ViewportExceedsSurface = true;
+ }
+ }
+ }
+ }
+
+ /*Mode Support, Voltage State and SOC Configuration*/
+ for (i = v->soc.num_states - 1; i >= 0; i--) {
+ for (j = 0; j < 2; j++) {
+ if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
+ && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
+ && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
+ && v->DTBCLKRequiredMoreThanSupported[i] == false
+ && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
+ && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
+ && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
+ && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
+ && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
+ && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
+ && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
+ && ((v->HostVMEnable == false
+ && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
+ || v->ImmediateFlipSupportedForState[i][j] == true)
+ && FMTBufferExceeded == false) {
+ v->ModeSupport[i][j] = true;
+ } else {
+ v->ModeSupport[i][j] = false;
+ }
+ }
+ }
+
+ {
+ unsigned int MaximumMPCCombine = 0;
+
+ for (i = v->soc.num_states; i >= 0; i--) {
+ if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
+ v->VoltageLevel = i;
+ v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
+ if (v->ModeSupport[i][0] == true) {
+ MaximumMPCCombine = 0;
+ } else {
+ MaximumMPCCombine = 1;
+ }
+ }
+ }
+ v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
+ v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
+ v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
+ }
+ v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
+ v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
+ v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
+ v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
+ v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
+ v->maxMpcComb = MaximumMPCCombine;
+ }
+}
+
+static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ struct display_mode_lib *mode_lib,
+ unsigned int PrefetchMode,
+ unsigned int NumberOfActivePlanes,
+ unsigned int MaxLineBufferLines,
+ unsigned int LineBufferSize,
+ unsigned int WritebackInterfaceBufferSize,
+ double DCFCLK,
+ double ReturnBW,
+ bool SynchronizedVBlank,
+ unsigned int dpte_group_bytes[],
+ unsigned int MetaChunkSize,
+ double UrgentLatency,
+ double ExtraLatency,
+ double WritebackLatency,
+ double WritebackChunkSize,
+ double SOCCLK,
+ double DRAMClockChangeLatency,
+ double SRExitTime,
+ double SREnterPlusExitTime,
+ double SRExitZ8Time,
+ double SREnterPlusExitZ8Time,
+ double DCFCLKDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int LBBitPerPixel[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int vtaps[],
+ unsigned int VTAPsChroma[],
+ double VRatio[],
+ double VRatioChroma[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ unsigned int BlendingAndTiming[],
+ unsigned int DPPPerPlane[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ double DSTXAfterScaler[],
+ double DSTYAfterScaler[],
+ bool WritebackEnable[],
+ enum source_format_class WritebackPixelFormat[],
+ double WritebackDestinationWidth[],
+ double WritebackDestinationHeight[],
+ double WritebackSourceHeight[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+ enum clock_change_support *DRAMClockChangeSupport,
+ double *UrgentWatermark,
+ double *WritebackUrgentWatermark,
+ double *DRAMClockChangeWatermark,
+ double *WritebackDRAMClockChangeWatermark,
+ double *StutterExitWatermark,
+ double *StutterEnterPlusExitWatermark,
+ double *Z8StutterExitWatermark,
+ double *Z8StutterEnterPlusExitWatermark,
+ double *MinActiveDRAMClockChangeLatencySupported)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ double EffectiveLBLatencyHidingY;
+ double EffectiveLBLatencyHidingC;
+ double LinesInDETY[DC__NUM_DPP__MAX];
+ double LinesInDETC;
+ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETCRoundedDownToSwath;
+ double FullDETBufferingTimeY;
+ double FullDETBufferingTimeC;
+ double ActiveDRAMClockChangeLatencyMarginY;
+ double ActiveDRAMClockChangeLatencyMarginC;
+ double WritebackDRAMClockChangeLatencyMargin;
+ double PlaneWithMinActiveDRAMClockChangeMargin;
+ double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
+ double WritebackDRAMClockChangeLatencyHiding;
+ double TotalPixelBW = 0.0;
+ int k, j;
+
+ *UrgentWatermark = UrgentLatency + ExtraLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+ dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
+#endif
+
+ *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
+#endif
+
+ v->TotalActiveWriteback = 0;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (WritebackEnable[k] == true) {
+ v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
+ }
+ }
+
+ if (v->TotalActiveWriteback <= 1) {
+ *WritebackUrgentWatermark = WritebackLatency;
+ } else {
+ *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+
+ if (v->TotalActiveWriteback <= 1) {
+ *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
+ } else {
+ *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ TotalPixelBW = TotalPixelBW
+ + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
+ / (HTotal[k] / PixelClock[k]);
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ double EffectiveDETBufferSizeY = DETBufferSizeY[k];
+
+ v->LBLatencyHidingSourceLinesY = dml_min(
+ (double) MaxLineBufferLines,
+ dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
+
+ v->LBLatencyHidingSourceLinesC = dml_min(
+ (double) MaxLineBufferLines,
+ dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
+
+ EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
+
+ EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
+
+ if (UnboundedRequestEnabled) {
+ EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
+ + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
+ }
+
+ LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
+ if (BytePerPixelDETC[k] > 0) {
+ LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
+ } else {
+ LinesInDETC = 0;
+ FullDETBufferingTimeC = 999999;
+ }
+
+ ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
+ - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
+
+ if (NumberOfActivePlanes > 1) {
+ ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
+ - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
+ }
+
+ if (BytePerPixelDETC[k] > 0) {
+ ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
+ - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
+
+ if (NumberOfActivePlanes > 1) {
+ ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
+ - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
+ }
+ v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
+ } else {
+ v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
+ }
+
+ if (WritebackEnable[k] == true) {
+ WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
+ / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
+ if (WritebackPixelFormat[k] == dm_444_64) {
+ WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
+ }
+ WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
+ v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
+ }
+ }
+
+ v->MinActiveDRAMClockChangeMargin = 999999;
+ PlaneWithMinActiveDRAMClockChangeMargin = 0;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
+ v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
+ if (BlendingAndTiming[k] == k) {
+ PlaneWithMinActiveDRAMClockChangeMargin = k;
+ } else {
+ for (j = 0; j < NumberOfActivePlanes; ++j) {
+ if (BlendingAndTiming[k] == j) {
+ PlaneWithMinActiveDRAMClockChangeMargin = j;
+ }
+ }
+ }
+ }
+ }
+
+ *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
+
+ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
+ && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
+ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
+ }
+ }
+
+ v->TotalNumberOfActiveOTG = 0;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (BlendingAndTiming[k] == k) {
+ v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
+ }
+ }
+
+ if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
+ } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
+ || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
+ } else {
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ }
+
+ *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
+ *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
+ dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
+#endif
+}
+
+static void CalculateDCFCLKDeepSleep(
+ struct display_mode_lib *mode_lib,
+ unsigned int NumberOfActivePlanes,
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ int ReturnBusWidth,
+ double *DCFCLKDeepSleep)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ double DisplayPipeLineDeliveryTimeLuma;
+ double DisplayPipeLineDeliveryTimeChroma;
+ double ReadBandwidth = 0.0;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
+ }
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
+ }
+ }
+
+ if (BytePerPixelC[k] > 0) {
+ v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
+ __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
+ } else {
+ v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
+ }
+ v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
+
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+
+ *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
+ }
+}
+
+static void CalculateUrgentBurstFactor(
+ int swath_width_luma_ub,
+ int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ double DETBufferSizeY,
+ double DETBufferSizeC,
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding)
+{
+ double LinesInDETLuma;
+ double LinesInDETChroma;
+ unsigned int LinesInCursorBuffer;
+ double CursorBufferSizeInTime;
+ double DETBufferSizeInTimeLuma;
+ double DETBufferSizeInTimeChroma;
+
+ *NotEnoughUrgentLatencyHiding = 0;
+
+ if (CursorWidth > 0) {
+ LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
+ if (VRatio > 0) {
+ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
+ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorCursor = 0;
+ } else {
+ *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorCursor = 1;
+ }
+ }
+
+ LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
+ if (VRatio > 0) {
+ DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
+ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorLuma = 0;
+ } else {
+ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorLuma = 1;
+ }
+
+ if (BytePerPixelInDETC > 0) {
+ LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
+ if (VRatio > 0) {
+ DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
+ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorChroma = 0;
+ } else {
+ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorChroma = 1;
+ }
+ }
+}
+
+static void CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActivePlanes,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerPlane[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double DPPCLK[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[])
+{
+ double req_per_swath_ub;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma[k] = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
+ }
+ }
+
+ if (VRatioPrefetchY[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (VRatioPrefetchC[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
+ }
+ }
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (SourceScan[k] != dm_vert) {
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
+ } else {
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
+ }
+ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (SourceScan[k] != dm_vert) {
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
+ } else {
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
+ }
+ DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
+ dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ int cursor_req_per_width;
+
+ cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
+ if (NumberOfCursors[k] > 0) {
+ if (VRatio[k] <= 1) {
+ CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
+ }
+ if (VRatioPrefetchY[k] <= 1) {
+ CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
+ }
+ } else {
+ CursorRequestDeliveryTime[k] = 0;
+ CursorRequestDeliveryTimePrefetch[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
+#endif
+ }
+}
+
+static void CalculateMetaAndPTETimes(
+ int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ int MetaChunkSize,
+ int MinMetaChunkSizeBytes,
+ int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int BytePerPixelY[],
+ int BytePerPixelC[],
+ enum scan_direction_class SourceScan[],
+ int dpte_row_height[],
+ int dpte_row_height_chroma[],
+ int meta_row_width[],
+ int meta_row_width_chroma[],
+ int meta_row_height[],
+ int meta_row_height_chroma[],
+ int meta_req_width[],
+ int meta_req_width_chroma[],
+ int meta_req_height[],
+ int meta_req_height_chroma[],
+ int dpte_group_bytes[],
+ int PTERequestSizeY[],
+ int PTERequestSizeC[],
+ int PixelPTEReqWidthY[],
+ int PixelPTEReqHeightY[],
+ int PixelPTEReqWidthC[],
+ int PixelPTEReqHeightC[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[])
+{
+ unsigned int meta_chunk_width;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_chunks_per_row_ub;
+ unsigned int meta_chunk_width_chroma;
+ unsigned int min_meta_chunk_width_chroma;
+ unsigned int meta_chunk_per_row_int_chroma;
+ unsigned int meta_row_remainder_chroma;
+ unsigned int meta_chunk_threshold_chroma;
+ unsigned int meta_chunks_per_row_ub_chroma;
+ unsigned int dpte_group_width_luma;
+ unsigned int dpte_groups_per_row_luma_ub;
+ unsigned int dpte_group_width_chroma;
+ unsigned int dpte_groups_per_row_chroma_ub;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0) {
+ DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ } else {
+ DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
+ }
+ DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0) {
+ DST_Y_PER_META_ROW_NOM_C[k] = 0;
+ } else {
+ DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
+ }
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (DCCEnable[k] == true) {
+ meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
+ min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
+ meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
+ meta_row_remainder = meta_row_width[k] % meta_chunk_width;
+ if (SourceScan[k] != dm_vert) {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
+ } else {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
+ }
+ if (meta_row_remainder <= meta_chunk_threshold) {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ } else {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+ }
+ TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ if (BytePerPixelC[k] == 0) {
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ } else {
+ meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
+ min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
+ meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
+ meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
+ if (SourceScan[k] != dm_vert) {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
+ } else {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
+ }
+ if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
+ } else {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
+ }
+ TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ }
+ } else {
+ TimePerMetaChunkNominal[k] = 0;
+ TimePerMetaChunkVBlank[k] = 0;
+ TimePerMetaChunkFlip[k] = 0;
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ }
+ }
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (GPUVMEnable == true) {
+ if (SourceScan[k] != dm_vert) {
+ dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
+ } else {
+ dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
+ }
+ dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
+ time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ if (BytePerPixelC[k] == 0) {
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ } else {
+ if (SourceScan[k] != dm_vert) {
+ dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
+ } else {
+ dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
+ }
+ dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
+ time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ }
+ } else {
+ time_per_pte_group_nom_luma[k] = 0;
+ time_per_pte_group_vblank_luma[k] = 0;
+ time_per_pte_group_flip_luma[k] = 0;
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ }
+ }
+}
+
+static void CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActivePlanes,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ int dpte_row_width_luma_ub[],
+ int dpte_row_width_chroma_ub[],
+ int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ int meta_pte_bytes_per_frame_ub_l[],
+ int meta_pte_bytes_per_frame_ub_c[],
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[])
+{
+ int num_group_per_lower_vm_stage;
+ int num_req_per_lower_vm_stage;
+ int k;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
+ + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
+ }
+ }
+ }
+
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
+ + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ }
+ }
+
+ TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+ TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+
+ if (GPUVMMaxPageTableLevels > 2) {
+ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
+ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
+ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
+ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
+ }
+
+ } else {
+ TimePerVMGroupVBlank[k] = 0;
+ TimePerVMGroupFlip[k] = 0;
+ TimePerVMRequestVBlank[k] = 0;
+ TimePerVMRequestFlip[k] = 0;
+ }
+ }
+}
+
+static void CalculateStutterEfficiency(
+ struct display_mode_lib *mode_lib,
+ int CompressedBufferSizeInkByte,
+ bool UnboundedRequestEnabled,
+ int ConfigReturnBufferSizeInKByte,
+ int MetaFIFOSizeInKEntries,
+ int ZeroSizeBufferEntries,
+ int NumberOfActivePlanes,
+ int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ double COMPBUF_RESERVED_SPACE_64B,
+ double COMPBUF_RESERVED_SPACE_ZS,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizedVBlank,
+ double Z8StutterEnterPlusExitWatermark,
+ double StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ int DPPPerPlane[],
+ unsigned int DETBufferSizeY[],
+ int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ int HTotal[],
+ int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum scan_direction_class SourceScan[],
+ int BlockHeight256BytesY[],
+ int BlockWidth256BytesY[],
+ int BlockHeight256BytesC[],
+ int BlockWidth256BytesC[],
+ int DCCYMaxUncompressedBlock[],
+ int DCCCMaxUncompressedBlock[],
+ int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthPlaneLuma[],
+ double ReadBandwidthPlaneChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+
+ double DETBufferingTimeY;
+ double SwathWidthYCriticalPlane = 0;
+ double VActiveTimeCriticalPlane = 0;
+ double FrameTimeCriticalPlane = 0;
+ int BytePerPixelYCriticalPlane = 0;
+ double LinesToFinishSwathTransferStutterCriticalPlane = 0;
+ double MinTTUVBlankCriticalPlane = 0;
+ double TotalCompressedReadBandwidth;
+ double TotalRowReadBandwidth;
+ double AverageDCCCompressionRate;
+ double EffectiveCompressedBufferSize;
+ double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
+ double StutterBurstTime;
+ int TotalActiveWriteback;
+ double LinesInDETY;
+ double LinesInDETYRoundedDownToSwath;
+ double MaximumEffectiveCompressionLuma;
+ double MaximumEffectiveCompressionChroma;
+ double TotalZeroSizeRequestReadBandwidth;
+ double TotalZeroSizeCompressedReadBandwidth;
+ double AverageDCCZeroSizeFraction;
+ double AverageZeroSizeCompressionRate;
+ int TotalNumberOfActiveOTG = 0;
+ double LastStutterPeriod = 0.0;
+ double LastZ8StutterPeriod = 0.0;
+ int k;
+
+ TotalZeroSizeRequestReadBandwidth = 0;
+ TotalZeroSizeCompressedReadBandwidth = 0;
+ TotalRowReadBandwidth = 0;
+ TotalCompressedReadBandwidth = 0;
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (DCCEnable[k] == true) {
+ if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
+ || DCCYMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionLuma = 2;
+ } else {
+ MaximumEffectiveCompressionLuma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
+ if (ReadBandwidthPlaneChroma[k] > 0) {
+ if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
+ || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionChroma = 2;
+ } else {
+ MaximumEffectiveCompressionChroma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
+ + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
+ }
+ } else {
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
+ }
+ TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
+ }
+
+ AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
+ AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
+ dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
+ dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
+#endif
+
+ if (AverageDCCZeroSizeFraction == 1) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
+ } else if (AverageDCCZeroSizeFraction > 0) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = dml_min(
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
+ + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
+ (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+ dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print(
+ "DML::%s: min 2 = %f\n",
+ __func__,
+ MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
+ dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+ } else {
+ EffectiveCompressedBufferSize = dml_min(
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
+ dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
+ dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+#endif
+
+ *StutterPeriod = 0;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
+ / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
+ DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
+ dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
+ dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
+ dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
+ dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
+ dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
+ dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
+#endif
+
+ if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
+ bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
+
+ *StutterPeriod = DETBufferingTimeY;
+ FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
+ VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
+ BytePerPixelYCriticalPlane = BytePerPixelY[k];
+ SwathWidthYCriticalPlane = SwathWidthY[k];
+ LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
+ MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+ dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
+ dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
+ dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
+ dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
+ dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
+ dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
+#endif
+ }
+ }
+
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
+ dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+ dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
+ dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+#endif
+
+ StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
+ + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
+ + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
+ dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
+ dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+#endif
+ StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
+
+ dml_print(
+ "DML::%s: Time to finish residue swath=%f\n",
+ __func__,
+ LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
+
+ TotalActiveWriteback = 0;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (WritebackEnable[k]) {
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+ }
+
+ if (TotalActiveWriteback == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
+ dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
+ dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+#endif
+ *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
+ *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
+ *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
+ *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
+ } else {
+ *StutterEfficiencyNotIncludingVBlank = 0.;
+ *Z8StutterEfficiencyNotIncludingVBlank = 0.;
+ *NumberOfStutterBurstsPerFrame = 0;
+ *Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
+ TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
+ }
+ }
+
+ if (*StutterEfficiencyNotIncludingVBlank > 0) {
+ LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+
+ if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
+ *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
+ / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
+ } else {
+ *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *StutterEfficiency = 0;
+ }
+
+ if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
+ LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+ if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
+ *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
+ / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
+ } else {
+ *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *Z8StutterEfficiency = 0.;
+ }
+
+ dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+ dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
+ dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+}
+
+static void CalculateSwathAndDETConfiguration(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ unsigned int DETBufferSizeInKByte,
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum scan_direction_class SourceScan[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BlendingAndTiming[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ int DPPPerPlane[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ int SwathHeightY[],
+ int SwathHeightC[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool ViewportSizeSupportPerPlane[],
+ bool *ViewportSizeSupport)
+{
+ int MaximumSwathHeightY[DC__NUM_DPP__MAX];
+ int MaximumSwathHeightC[DC__NUM_DPP__MAX];
+ int MinimumSwathHeightY;
+ int MinimumSwathHeightC;
+ int RoundedUpMaxSwathSizeBytesY;
+ int RoundedUpMaxSwathSizeBytesC;
+ int RoundedUpMinSwathSizeBytesY;
+ int RoundedUpMinSwathSizeBytesC;
+ int RoundedUpSwathSizeBytesY;
+ int RoundedUpSwathSizeBytesC;
+ double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
+ double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
+ int k;
+
+ CalculateSwathWidth(
+ ForceSingleDPP,
+ NumberOfActivePlanes,
+ SourcePixelFormat,
+ SourceScan,
+ ViewportWidth,
+ ViewportHeight,
+ SurfaceWidthY,
+ SurfaceWidthC,
+ SurfaceHeightY,
+ SurfaceHeightC,
+ ODMCombineEnabled,
+ BytePerPixY,
+ BytePerPixC,
+ Read256BytesBlockHeightY,
+ Read256BytesBlockHeightC,
+ Read256BytesBlockWidthY,
+ Read256BytesBlockWidthC,
+ BlendingAndTiming,
+ HActive,
+ HRatio,
+ DPPPerPlane,
+ SwathWidthSingleDPP,
+ SwathWidthSingleDPPChroma,
+ SwathWidth,
+ SwathWidthChroma,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
+ swath_width_luma_ub,
+ swath_width_chroma_ub);
+
+ *ViewportSizeSupport = true;
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
+ || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
+ if (SurfaceTiling[k] == dm_sw_linear
+ || (SourcePixelFormat[k] == dm_444_64
+ && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
+ && SourceScan[k] != dm_vert)) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ } else {
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ }
+ MinimumSwathHeightC = MaximumSwathHeightC[k];
+ } else {
+ if (SurfaceTiling[k] == dm_sw_linear) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ MinimumSwathHeightC = MaximumSwathHeightC[k];
+ } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ MinimumSwathHeightC = MaximumSwathHeightC[k];
+ } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
+ } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
+ MinimumSwathHeightY = MaximumSwathHeightY[k];
+ MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
+ } else {
+ MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
+ MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
+ }
+ }
+
+ RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
+ RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
+ if (SourcePixelFormat[k] == dm_420_10) {
+ RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
+ RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
+ }
+ RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
+ RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
+ if (SourcePixelFormat[k] == dm_420_10) {
+ RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
+ RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
+ }
+
+ if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
+ } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
+ && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
+ SwathHeightY[k] = MinimumSwathHeightY;
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
+ } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
+ && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MinimumSwathHeightC;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
+ } else {
+ SwathHeightY[k] = MinimumSwathHeightY;
+ SwathHeightC[k] = MinimumSwathHeightC;
+ RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
+ RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
+ }
+ {
+ double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
+
+ if (SwathHeightC[k] == 0) {
+ DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
+ DETBufferSizeC[k] = 0;
+ } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
+ DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
+ DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
+ } else {
+ DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
+ DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
+ }
+
+ if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
+ || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
+ *ViewportSizeSupport = false;
+ ViewportSizeSupportPerPlane[k] = false;
+ } else {
+ ViewportSizeSupportPerPlane[k] = true;
+ }
+ }
+ }
+}
+
+static void CalculateSwathWidth(
+ bool ForceSingleDPP,
+ int NumberOfActivePlanes,
+ enum source_format_class SourcePixelFormat[],
+ enum scan_direction_class SourceScan[],
+ int ViewportWidth[],
+ int ViewportHeight[],
+ int SurfaceWidthY[],
+ int SurfaceWidthC[],
+ int SurfaceHeightY[],
+ int SurfaceHeightC[],
+ enum odm_combine_mode ODMCombineEnabled[],
+ int BytePerPixY[],
+ int BytePerPixC[],
+ int Read256BytesBlockHeightY[],
+ int Read256BytesBlockHeightC[],
+ int Read256BytesBlockWidthY[],
+ int Read256BytesBlockWidthC[],
+ int BlendingAndTiming[],
+ int HActive[],
+ double HRatio[],
+ int DPPPerPlane[],
+ double SwathWidthSingleDPPY[],
+ double SwathWidthSingleDPPC[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ int MaximumSwathHeightY[],
+ int MaximumSwathHeightC[],
+ int swath_width_luma_ub[],
+ int swath_width_chroma_ub[])
+{
+ enum odm_combine_mode MainPlaneODMCombine;
+ int j, k;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
+#endif
+
+ for (k = 0; k < NumberOfActivePlanes; ++k) {
+ if (SourceScan[k] != dm_vert) {
+ SwathWidthSingleDPPY[k] = ViewportWidth[k];
+ } else {
+ SwathWidthSingleDPPY[k] = ViewportHeight[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
+ dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
+#endif
+
+ MainPlaneODMCombine = ODMCombineEnabled[k];
+ for (j = 0; j < NumberOfActivePlanes; ++j) {
+ if (BlendingAndTiming[k] == j) {
+ MainPlaneODMCombine = ODMCombineEnabled[j];
+ }
+ }
+
+ if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
+ SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
+ else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
+ SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
+ else if (DPPPerPlane[k] == 2)
+ SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
+ else
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
+ dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
+ SwathWidthC[k] = SwathWidthY[k] / 2;
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
+ } else {
+ SwathWidthC[k] = SwathWidthY[k];
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
+ }
+
+ if (ForceSingleDPP == true) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ SwathWidthC[k] = SwathWidthSingleDPPC[k];
+ }
+ {
+ int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
+ int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
+ int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+ int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
+#endif
+
+ if (SourceScan[k] != dm_vert) {
+ MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
+ swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
+ if (BytePerPixC[k] > 0) {
+ swath_width_chroma_ub[k] = dml_min(
+ surface_width_ub_c,
+ (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ } else {
+ MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
+ swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
+ if (BytePerPixC[k] > 0) {
+ swath_width_chroma_ub[k] = dml_min(
+ surface_height_ub_c,
+ (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ }
+ }
+ }
+}
+
+static double CalculateExtraLatency(
+ int RoundTripPingLatencyCycles,
+ int ReorderingBytes,
+ double DCFCLK,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels)
+{
+ double ExtraLatencyBytes;
+ double ExtraLatency;
+
+ ExtraLatencyBytes = CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ TotalNumberOfActiveDPP,
+ PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP,
+ MetaChunkSize,
+ GPUVMEnable,
+ HostVMEnable,
+ NumberOfActivePlanes,
+ NumberOfDPP,
+ dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+
+ ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
+ dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+ dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+ dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
+#endif
+
+ return ExtraLatency;
+}
+
+static double CalculateExtraLatencyBytes(
+ int ReorderingBytes,
+ int TotalNumberOfActiveDPP,
+ int PixelChunkSizeInKByte,
+ int TotalNumberOfDCCActiveDPP,
+ int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ int NumberOfActivePlanes,
+ int NumberOfDPP[],
+ int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ int HostVMMaxNonCachedPageTableLevels)
+{
+ double ret;
+ int HostVMDynamicLevels = 0, k;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ else
+ HostVMDynamicLevels = 0;
+
+ ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
+
+ if (GPUVMEnable == true)
+ for (k = 0; k < NumberOfActivePlanes; ++k)
+ ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
+ }
+ return ret;
+}
+
+static double CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock)
+{
+ double ret;
+
+ ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
+ if (DoUrgentLatencyAdjustment == true)
+ ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
+ return ret;
+}
+
+static void UseMinimumDCFCLK(
+ struct display_mode_lib *mode_lib,
+ int MaxPrefetchMode,
+ int ReorderingBytes)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ int dummy1, i, j, k;
+ double NormalEfficiency, dummy2, dummy3;
+ double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
+
+ NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
+ double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
+ double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
+ double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
+ double MinimumTWait;
+ double NonDPTEBandwidth;
+ double DPTEBandwidth;
+ double DCFCLKRequiredForAverageBandwidth;
+ double ExtraLatencyBytes;
+ double ExtraLatencyCycles;
+ double DCFCLKRequiredForPeakBandwidth;
+ int NoOfDPPState[DC__NUM_DPP__MAX];
+ double MinimumTvmPlus2Tr0;
+
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
+ + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
+ }
+
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
+ NoOfDPPState[k] = v->NoOfDPP[i][j][k];
+
+ MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
+ NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
+ DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
+ DCFCLKRequiredForAverageBandwidth = dml_max3(
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
+ / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
+ (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
+
+ ExtraLatencyBytes = CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ v->TotalNumberOfActiveDPP[i][j],
+ v->PixelChunkSizeInKByte,
+ v->TotalNumberOfDCCActiveDPP[i][j],
+ v->MetaChunkSize,
+ v->GPUVMEnable,
+ v->HostVMEnable,
+ v->NumberOfActivePlanes,
+ NoOfDPPState,
+ v->dpte_group_bytes,
+ 1,
+ v->HostVMMinPageSize,
+ v->HostVMMaxNonCachedPageTableLevels);
+ ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double DCFCLKCyclesRequiredInPrefetch;
+ double ExpectedPrefetchBWAcceleration;
+ double PrefetchTime;
+
+ PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
+ + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
+ DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
+ + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+ + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
+ + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
+ PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
+ ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
+ / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
+ DynamicMetadataVMExtraLatency[k] =
+ (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
+ v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+ PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
+ - v->UrgLatency[i]
+ * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
+ * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
+ - DynamicMetadataVMExtraLatency[k];
+
+ if (PrefetchTime > 0) {
+ double ExpectedVRatioPrefetch;
+
+ ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
+ / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
+ * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
+ if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
+ + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
+ }
+ } else {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
+ }
+ if (v->DynamicMetadataEnable[k] == true) {
+ double TSetupPipe;
+ double TdmbfPipe;
+ double TdmsksPipe;
+ double TdmecPipe;
+ double AllowedTimeForUrgentExtraLatency;
+
+ CalculateVupdateAndDynamicMetadataParameters(
+ v->MaxInterDCNTileRepeaters,
+ v->RequiredDPPCLK[i][j][k],
+ v->RequiredDISPCLK[i][j],
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->PixelClock[k],
+ v->HTotal[k],
+ v->VTotal[k] - v->VActive[k],
+ v->DynamicMetadataTransmittedBytes[k],
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
+ v->Interlace[k],
+ v->ProgressiveToInterlaceUnitInOPP,
+ &TSetupPipe,
+ &TdmbfPipe,
+ &TdmecPipe,
+ &TdmsksPipe,
+ &dummy1,
+ &dummy2,
+ &dummy3);
+ AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
+ - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
+ if (AllowedTimeForUrgentExtraLatency > 0) {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
+ DCFCLKRequiredForPeakBandwidthPerPlane[k],
+ ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
+ } else {
+ DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
+ }
+ }
+ }
+ DCFCLKRequiredForPeakBandwidth = 0;
+ for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
+ DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
+
+ MinimumTvmPlus2Tr0 = v->UrgLatency[i]
+ * (v->GPUVMEnable == true ?
+ (v->HostVMEnable == true ?
+ (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
+ 0);
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double MaximumTvmPlus2Tr0PlusTsw;
+
+ MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
+ if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
+ DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
+ } else {
+ DCFCLKRequiredForPeakBandwidth = dml_max3(
+ DCFCLKRequiredForPeakBandwidth,
+ 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
+ (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
+ }
+ }
+ v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
+ }
+ }
+}
+
+static void CalculateUnboundedRequestAndCompressedBufferSize(
+ unsigned int DETBufferSizeInKByte,
+ int ConfigReturnBufferSizeInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ int TotalActiveDPP,
+ bool NoChromaPlanes,
+ int MaxNumDPP,
+ int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class *Output,
+ bool *UnboundedRequestEnabled,
+ int *CompressedBufferSizeInkByte)
+{
+ double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
+
+ *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
+ *CompressedBufferSizeInkByte = (
+ *UnboundedRequestEnabled == true ?
+ ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
+ ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
+ *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
+ dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
+ dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
+#endif
+}
+
+static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
+{
+ bool ret_val = false;
+
+ ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
+ if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
+ ret_val = false;
+ return ret_val;
+}
+
+static unsigned int CalculateMaxVStartup(
+ unsigned int VTotal,
+ unsigned int VActive,
+ unsigned int VBlankNom,
+ unsigned int HTotal,
+ double PixelClock,
+ bool ProgressiveTointerlaceUnitinOPP,
+ bool Interlace,
+ unsigned int VBlankNomDefaultUS,
+ double WritebackDelayTime)
+{
+ unsigned int MaxVStartup = 0;
+ unsigned int vblank_size = 0;
+ double line_time_us = HTotal / PixelClock;
+ unsigned int vblank_actual = VTotal - VActive;
+ unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0);
+ unsigned int vblank_nom_input = dml_min(VBlankNom, vblank_nom_default_in_line);
+ unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
+
+ vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail);
+ if (Interlace && !ProgressiveTointerlaceUnitinOPP)
+ MaxVStartup = dml_floor(vblank_size / 2.0, 1.0);
+ else
+ MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0));
+ if (MaxVStartup > 1023)
+ MaxVStartup = 1023;
+ return MaxVStartup;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h
new file mode 100644
index 000000000000..a8199ab7d26a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML314_DISPLAY_MODE_VBA_H__
+#define __DML314_DISPLAY_MODE_VBA_H__
+
+void dml314_recalculate(struct display_mode_lib *mode_lib);
+void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
+double dml314_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ long WritebackSourceWidth,
+ long WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize);
+
+#endif /* __DML314_DISPLAY_MODE_VBA_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
new file mode 100644
index 000000000000..61ee9ba063a7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
@@ -0,0 +1,1733 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "../display_mode_lib.h"
+#include "../display_mode_vba.h"
+#include "../dml_inline_defs.h"
+#include "display_rq_dlg_calc_314.h"
+
+static bool CalculateBytePerPixelAnd256BBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC)
+{
+ if (SourcePixelFormat == dm_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dm_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = 4.0 / 3;
+ *BytePerPixelDETC = 8.0 / 3;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+
+ if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
+ || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
+ if (SurfaceTiling == dm_sw_linear)
+ *BlockHeight256BytesY = 1;
+ else if (SourcePixelFormat == dm_444_64)
+ *BlockHeight256BytesY = 4;
+ else if (SourcePixelFormat == dm_444_8)
+ *BlockHeight256BytesY = 16;
+ else
+ *BlockHeight256BytesY = 8;
+
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+ return true;
+}
+
+static bool is_dual_plane(enum source_format_class source_format)
+{
+ bool ret_val = 0;
+
+ if ((source_format == dm_420_12) || (source_format == dm_420_8) || (source_format == dm_420_10) || (source_format == dm_rgbe_alpha))
+ ret_val = 1;
+
+ return ret_val;
+}
+
+static double get_refcyc_per_delivery(
+ struct display_mode_lib *mode_lib,
+ double refclk_freq_in_mhz,
+ double pclk_freq_in_mhz,
+ unsigned int odm_combine,
+ unsigned int recout_width,
+ unsigned int hactive,
+ double vratio,
+ double hscale_pixel_rate,
+ unsigned int delivery_width,
+ unsigned int req_per_swath_ub)
+{
+ double refcyc_per_delivery = 0.0;
+
+ if (vratio <= 1.0) {
+ if (odm_combine)
+ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) ((unsigned int) odm_combine * 2)
+ * dml_min((double) recout_width, (double) hactive / ((unsigned int) odm_combine * 2)) / pclk_freq_in_mhz / (double) req_per_swath_ub;
+ else
+ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width / pclk_freq_in_mhz / (double) req_per_swath_ub;
+ } else {
+ refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width / (double) hscale_pixel_rate / (double) req_per_swath_ub;
+ }
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width);
+ dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio);
+ dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub);
+ dml_print("DML_DLG: %s: hscale_pixel_rate = %3.2f\n", __func__, hscale_pixel_rate);
+ dml_print("DML_DLG: %s: delivery_width = %d\n", __func__, delivery_width);
+ dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery);
+#endif
+
+ return refcyc_per_delivery;
+
+}
+
+static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size)
+{
+ if (tile_size == dm_256k_tile)
+ return (256 * 1024);
+ else if (tile_size == dm_64k_tile)
+ return (64 * 1024);
+ else
+ return (4 * 1024);
+}
+
+static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib, display_data_rq_regs_st *rq_regs, const display_data_rq_sizing_params_st *rq_sizing)
+{
+ print__data_rq_sizing_params_st(mode_lib, rq_sizing);
+
+ rq_regs->chunk_size = dml_log2(rq_sizing->chunk_bytes) - 10;
+
+ if (rq_sizing->min_chunk_bytes == 0)
+ rq_regs->min_chunk_size = 0;
+ else
+ rq_regs->min_chunk_size = dml_log2(rq_sizing->min_chunk_bytes) - 8 + 1;
+
+ rq_regs->meta_chunk_size = dml_log2(rq_sizing->meta_chunk_bytes) - 10;
+ if (rq_sizing->min_meta_chunk_bytes == 0)
+ rq_regs->min_meta_chunk_size = 0;
+ else
+ rq_regs->min_meta_chunk_size = dml_log2(rq_sizing->min_meta_chunk_bytes) - 6 + 1;
+
+ rq_regs->dpte_group_size = dml_log2(rq_sizing->dpte_group_bytes) - 6;
+ rq_regs->mpte_group_size = dml_log2(rq_sizing->mpte_group_bytes) - 6;
+}
+
+static void extract_rq_regs(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_rq_params_st *rq_param)
+{
+ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
+ unsigned int detile_buf_plane1_addr = 0;
+
+ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), &rq_param->sizing.rq_l);
+
+ rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_l.dpte_row_height), 1) - 3;
+
+ if (rq_param->yuv420) {
+ extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), &rq_param->sizing.rq_c);
+ rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_c.dpte_row_height), 1) - 3;
+ }
+
+ rq_regs->rq_regs_l.swath_height = dml_log2(rq_param->dlg.rq_l.swath_height);
+ rq_regs->rq_regs_c.swath_height = dml_log2(rq_param->dlg.rq_c.swath_height);
+
+ // FIXME: take the max between luma, chroma chunk size?
+ // okay for now, as we are setting chunk_bytes to 8kb anyways
+ if (rq_param->sizing.rq_l.chunk_bytes >= 32 * 1024 || (rq_param->yuv420 && rq_param->sizing.rq_c.chunk_bytes >= 32 * 1024)) { //32kb
+ rq_regs->drq_expansion_mode = 0;
+ } else {
+ rq_regs->drq_expansion_mode = 2;
+ }
+ rq_regs->prq_expansion_mode = 1;
+ rq_regs->mrq_expansion_mode = 1;
+ rq_regs->crq_expansion_mode = 1;
+
+ // Note: detile_buf_plane1_addr is in unit of 1KB
+ if (rq_param->yuv420) {
+ if ((double) rq_param->misc.rq_l.stored_swath_bytes / (double) rq_param->misc.rq_c.stored_swath_bytes <= 1.5) {
+ detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
+#endif
+ } else {
+ detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0; // 2/3 to luma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
+#endif
+ }
+ }
+ rq_regs->plane1_base_address = detile_buf_plane1_addr;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %0d\n", __func__, detile_buf_size_in_bytes);
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d\n", __func__, detile_buf_plane1_addr);
+ dml_print("DML_DLG: %s: plane1_base_address = %0d\n", __func__, rq_regs->plane1_base_address);
+ dml_print("DML_DLG: %s: rq_l.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_l.stored_swath_bytes);
+ dml_print("DML_DLG: %s: rq_c.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_c.stored_swath_bytes);
+ dml_print("DML_DLG: %s: rq_l.swath_height = %0d\n", __func__, rq_param->dlg.rq_l.swath_height);
+ dml_print("DML_DLG: %s: rq_c.swath_height = %0d\n", __func__, rq_param->dlg.rq_c.swath_height);
+#endif
+}
+
+static void handle_det_buf_split(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_source_params_st *pipe_src_param)
+{
+ unsigned int total_swath_bytes = 0;
+ unsigned int swath_bytes_l = 0;
+ unsigned int swath_bytes_c = 0;
+ unsigned int full_swath_bytes_packed_l = 0;
+ unsigned int full_swath_bytes_packed_c = 0;
+ bool req128_l = 0;
+ bool req128_c = 0;
+ bool surf_linear = (pipe_src_param->sw_mode == dm_sw_linear);
+ bool surf_vert = (pipe_src_param->source_scan == dm_vert);
+ unsigned int log2_swath_height_l = 0;
+ unsigned int log2_swath_height_c = 0;
+ unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
+
+ full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes;
+ full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", __func__, full_swath_bytes_packed_l);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", __func__, full_swath_bytes_packed_c);
+#endif
+
+ if (rq_param->yuv420_10bpc) {
+ full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2.0 / 3.0, 256, 1) + 256;
+ full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2.0 / 3.0, 256, 1) + 256;
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d (3-2 packing)\n", __func__, full_swath_bytes_packed_l);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d (3-2 packing)\n", __func__, full_swath_bytes_packed_c);
+#endif
+ }
+
+ if (rq_param->yuv420)
+ total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c;
+ else
+ total_swath_bytes = 2 * full_swath_bytes_packed_l;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: total_swath_bytes = %0d\n", __func__, total_swath_bytes);
+ dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %0d\n", __func__, detile_buf_size_in_bytes);
+#endif
+
+ if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request
+ req128_l = 0;
+ req128_c = 0;
+ swath_bytes_l = full_swath_bytes_packed_l;
+ swath_bytes_c = full_swath_bytes_packed_c;
+ } else if (!rq_param->yuv420) {
+ req128_l = 1;
+ req128_c = 0;
+ swath_bytes_c = full_swath_bytes_packed_c;
+ swath_bytes_l = full_swath_bytes_packed_l / 2;
+ } else if ((double) full_swath_bytes_packed_l / (double) full_swath_bytes_packed_c < 1.5) {
+ req128_l = 0;
+ req128_c = 1;
+ swath_bytes_l = full_swath_bytes_packed_l;
+ swath_bytes_c = full_swath_bytes_packed_c / 2;
+
+ total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c;
+
+ if (total_swath_bytes > detile_buf_size_in_bytes) {
+ req128_l = 1;
+ swath_bytes_l = full_swath_bytes_packed_l / 2;
+ }
+ } else {
+ req128_l = 1;
+ req128_c = 0;
+ swath_bytes_l = full_swath_bytes_packed_l / 2;
+ swath_bytes_c = full_swath_bytes_packed_c;
+
+ total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c;
+
+ if (total_swath_bytes > detile_buf_size_in_bytes) {
+ req128_c = 1;
+ swath_bytes_c = full_swath_bytes_packed_c / 2;
+ }
+ }
+
+ if (rq_param->yuv420)
+ total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c;
+ else
+ total_swath_bytes = 2 * swath_bytes_l;
+
+ rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l;
+ rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: total_swath_bytes = %0d\n", __func__, total_swath_bytes);
+ dml_print("DML_DLG: %s: rq_l.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_l.stored_swath_bytes);
+ dml_print("DML_DLG: %s: rq_c.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_c.stored_swath_bytes);
+#endif
+ if (surf_linear) {
+ log2_swath_height_l = 0;
+ log2_swath_height_c = 0;
+ } else {
+ unsigned int swath_height_l;
+ unsigned int swath_height_c;
+
+ if (!surf_vert) {
+ swath_height_l = rq_param->misc.rq_l.blk256_height;
+ swath_height_c = rq_param->misc.rq_c.blk256_height;
+ } else {
+ swath_height_l = rq_param->misc.rq_l.blk256_width;
+ swath_height_c = rq_param->misc.rq_c.blk256_width;
+ }
+
+ if (swath_height_l > 0)
+ log2_swath_height_l = dml_log2(swath_height_l);
+
+ if (req128_l && log2_swath_height_l > 0)
+ log2_swath_height_l -= 1;
+
+ if (swath_height_c > 0)
+ log2_swath_height_c = dml_log2(swath_height_c);
+
+ if (req128_c && log2_swath_height_c > 0)
+ log2_swath_height_c -= 1;
+ }
+
+ rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
+ rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l);
+ dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", __func__, full_swath_bytes_packed_l);
+ dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", __func__, full_swath_bytes_packed_c);
+ dml_print("DML_DLG: %s: swath_height luma = %0d\n", __func__, rq_param->dlg.rq_l.swath_height);
+ dml_print("DML_DLG: %s: swath_height chroma = %0d\n", __func__, rq_param->dlg.rq_c.swath_height);
+#endif
+}
+
+static void get_meta_and_pte_attr(
+ struct display_mode_lib *mode_lib,
+ display_data_rq_dlg_params_st *rq_dlg_param,
+ display_data_rq_misc_params_st *rq_misc_param,
+ display_data_rq_sizing_params_st *rq_sizing_param,
+ unsigned int vp_width,
+ unsigned int vp_height,
+ unsigned int data_pitch,
+ unsigned int meta_pitch,
+ unsigned int source_format,
+ unsigned int tiling,
+ unsigned int macro_tile_size,
+ unsigned int source_scan,
+ unsigned int hostvm_enable,
+ unsigned int is_chroma,
+ unsigned int surface_height)
+{
+ bool surf_linear = (tiling == dm_sw_linear);
+ bool surf_vert = (source_scan == dm_vert);
+
+ unsigned int bytes_per_element;
+ unsigned int bytes_per_element_y;
+ unsigned int bytes_per_element_c;
+
+ unsigned int blk256_width = 0;
+ unsigned int blk256_height = 0;
+
+ unsigned int blk256_width_y = 0;
+ unsigned int blk256_height_y = 0;
+ unsigned int blk256_width_c = 0;
+ unsigned int blk256_height_c = 0;
+ unsigned int log2_bytes_per_element;
+ unsigned int log2_blk256_width;
+ unsigned int log2_blk256_height;
+ unsigned int blk_bytes;
+ unsigned int log2_blk_bytes;
+ unsigned int log2_blk_height;
+ unsigned int log2_blk_width;
+ unsigned int log2_meta_req_bytes;
+ unsigned int log2_meta_req_height;
+ unsigned int log2_meta_req_width;
+ unsigned int meta_req_width;
+ unsigned int meta_req_height;
+ unsigned int log2_meta_row_height;
+ unsigned int meta_row_width_ub;
+ unsigned int log2_meta_chunk_bytes;
+ unsigned int log2_meta_chunk_height;
+
+ //full sized meta chunk width in unit of data elements
+ unsigned int log2_meta_chunk_width;
+ unsigned int log2_min_meta_chunk_bytes;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_blk_height;
+ unsigned int meta_surface_bytes;
+ unsigned int vmpg_bytes;
+ unsigned int meta_pte_req_per_frame_ub;
+ unsigned int meta_pte_bytes_per_frame_ub;
+ const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.gpuvm_min_page_size_bytes);
+ const bool dual_plane_en = is_dual_plane((enum source_format_class) (source_format));
+ const unsigned int dpte_buf_in_pte_reqs =
+ dual_plane_en ? (is_chroma ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma) : (mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma
+ + mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
+
+ unsigned int log2_vmpg_height = 0;
+ unsigned int log2_vmpg_width = 0;
+ unsigned int log2_dpte_req_height_ptes = 0;
+ unsigned int log2_dpte_req_height = 0;
+ unsigned int log2_dpte_req_width = 0;
+ unsigned int log2_dpte_row_height_linear = 0;
+ unsigned int log2_dpte_row_height = 0;
+ unsigned int log2_dpte_group_width = 0;
+ unsigned int dpte_row_width_ub = 0;
+ unsigned int dpte_req_height = 0;
+ unsigned int dpte_req_width = 0;
+ unsigned int dpte_group_width = 0;
+ unsigned int log2_dpte_group_bytes = 0;
+ unsigned int log2_dpte_group_length = 0;
+ double byte_per_pixel_det_y;
+ double byte_per_pixel_det_c;
+
+ CalculateBytePerPixelAnd256BBlockSizes(
+ (enum source_format_class) (source_format),
+ (enum dm_swizzle_mode) (tiling),
+ &bytes_per_element_y,
+ &bytes_per_element_c,
+ &byte_per_pixel_det_y,
+ &byte_per_pixel_det_c,
+ &blk256_height_y,
+ &blk256_height_c,
+ &blk256_width_y,
+ &blk256_width_c);
+
+ if (!is_chroma) {
+ blk256_width = blk256_width_y;
+ blk256_height = blk256_height_y;
+ bytes_per_element = bytes_per_element_y;
+ } else {
+ blk256_width = blk256_width_c;
+ blk256_height = blk256_height_c;
+ bytes_per_element = bytes_per_element_c;
+ }
+
+ log2_bytes_per_element = dml_log2(bytes_per_element);
+
+ dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear);
+ dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert);
+ dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width);
+ dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height);
+
+ log2_blk256_width = dml_log2((double) blk256_width);
+ log2_blk256_height = dml_log2((double) blk256_height);
+ blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
+ log2_blk_bytes = dml_log2((double) blk_bytes);
+ log2_blk_height = 0;
+ log2_blk_width = 0;
+
+ // remember log rule
+ // "+" in log is multiply
+ // "-" in log is divide
+ // "/2" is like square root
+ // blk is vertical biased
+ if (tiling != dm_sw_linear)
+ log2_blk_height = log2_blk256_height + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1);
+ else
+ log2_blk_height = 0; // blk height of 1
+
+ log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height;
+
+ if (!surf_vert) {
+ unsigned int temp;
+
+ temp = dml_round_to_multiple(vp_width - 1, blk256_width, 1) + blk256_width;
+ if (data_pitch < blk256_width) {
+ dml_print("WARNING: DML_DLG: %s: swath_size calculation ignoring data_pitch=%u < blk256_width=%u\n", __func__, data_pitch, blk256_width);
+ } else {
+ if (temp > data_pitch) {
+ if (data_pitch >= vp_width)
+ temp = data_pitch;
+ else
+ dml_print("WARNING: DML_DLG: %s: swath_size calculation ignoring data_pitch=%u < vp_width=%u\n", __func__, data_pitch, vp_width);
+ }
+ }
+ rq_dlg_param->swath_width_ub = temp;
+ rq_dlg_param->req_per_swath_ub = temp >> log2_blk256_width;
+ } else {
+ unsigned int temp;
+
+ temp = dml_round_to_multiple(vp_height - 1, blk256_height, 1) + blk256_height;
+ if (surface_height < blk256_height) {
+ dml_print("WARNING: DML_DLG: %s swath_size calculation ignored surface_height=%u < blk256_height=%u\n", __func__, surface_height, blk256_height);
+ } else {
+ if (temp > surface_height) {
+ if (surface_height >= vp_height)
+ temp = surface_height;
+ else
+ dml_print("WARNING: DML_DLG: %s swath_size calculation ignored surface_height=%u < vp_height=%u\n", __func__, surface_height, vp_height);
+ }
+ }
+ rq_dlg_param->swath_width_ub = temp;
+ rq_dlg_param->req_per_swath_ub = temp >> log2_blk256_height;
+ }
+
+ if (!surf_vert)
+ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height * bytes_per_element;
+ else
+ rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width * bytes_per_element;
+
+ rq_misc_param->blk256_height = blk256_height;
+ rq_misc_param->blk256_width = blk256_width;
+
+ // -------
+ // meta
+ // -------
+ log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element
+
+ // each 64b meta request for dcn is 8x8 meta elements and
+ // a meta element covers one 256b block of the data surface.
+ log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256
+ log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height;
+ meta_req_width = 1 << log2_meta_req_width;
+ meta_req_height = 1 << log2_meta_req_height;
+ log2_meta_row_height = 0;
+ meta_row_width_ub = 0;
+
+ // the dimensions of a meta row are meta_row_width x meta_row_height in elements.
+ // calculate upper bound of the meta_row_width
+ if (!surf_vert) {
+ log2_meta_row_height = log2_meta_req_height;
+ meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) + meta_req_width;
+ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width;
+ } else {
+ log2_meta_row_height = log2_meta_req_width;
+ meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) + meta_req_height;
+ rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height;
+ }
+ rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64;
+
+ rq_dlg_param->meta_row_height = 1 << log2_meta_row_height;
+
+ log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes);
+ log2_meta_chunk_height = log2_meta_row_height;
+
+ //full sized meta chunk width in unit of data elements
+ log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element - log2_meta_chunk_height;
+ log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes);
+ min_meta_chunk_width = 1 << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element - log2_meta_chunk_height);
+ meta_chunk_width = 1 << log2_meta_chunk_width;
+ meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width);
+ meta_row_remainder = meta_row_width_ub % meta_chunk_width;
+ meta_chunk_threshold = 0;
+ meta_blk_height = blk256_height * 64;
+ meta_surface_bytes = meta_pitch * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height) * bytes_per_element / 256;
+ vmpg_bytes = mode_lib->soc.gpuvm_min_page_size_bytes;
+ meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes, 8 * vmpg_bytes, 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes);
+ meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request
+ rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub;
+
+ dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height);
+ dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes);
+ dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", __func__, meta_pte_req_per_frame_ub);
+ dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", __func__, meta_pte_bytes_per_frame_ub);
+
+ if (!surf_vert)
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width;
+ else
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height;
+
+ if (meta_row_remainder <= meta_chunk_threshold)
+ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ else
+ rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+
+ // ------
+ // dpte
+ // ------
+ if (surf_linear)
+ log2_vmpg_height = 0; // one line high
+ else
+ log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height;
+
+ log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height;
+
+ // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4.
+ if (surf_linear) { //one 64B PTE request returns 8 PTEs
+ log2_dpte_req_height_ptes = 0;
+ log2_dpte_req_width = log2_vmpg_width + 3;
+ log2_dpte_req_height = 0;
+ } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size
+ //one 64B req gives 8x1 PTEs for 4KB tile
+ log2_dpte_req_height_ptes = 0;
+ log2_dpte_req_width = log2_blk_width + 3;
+ log2_dpte_req_height = log2_blk_height + 0;
+ } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB
+ //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB
+ log2_dpte_req_height_ptes = 4;
+ log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width
+ log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height
+ } else { //64KB page size and must 64KB tile block
+ //one 64B req gives 8x1 PTEs for 64KB tile
+ log2_dpte_req_height_ptes = 0;
+ log2_dpte_req_width = log2_blk_width + 3;
+ log2_dpte_req_height = log2_blk_height + 0;
+ }
+
+ // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height
+ // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent
+ // That depends on the pte shape (i.e. 8x1, 4x2, 2x4)
+ //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes;
+ //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes;
+ dpte_req_height = 1 << log2_dpte_req_height;
+ dpte_req_width = 1 << log2_dpte_req_width;
+
+ // calculate pitch dpte row buffer can hold
+ // round the result down to a power of two.
+ if (surf_linear) {
+ unsigned int dpte_row_height;
+
+ log2_dpte_row_height_linear = dml_floor(dml_log2(dpte_buf_in_pte_reqs * dpte_req_width / data_pitch), 1);
+
+ dml_print("DML_DLG: %s: is_chroma = %d\n", __func__, is_chroma);
+ dml_print("DML_DLG: %s: dpte_buf_in_pte_reqs = %d\n", __func__, dpte_buf_in_pte_reqs);
+ dml_print("DML_DLG: %s: log2_dpte_row_height_linear = %d\n", __func__, log2_dpte_row_height_linear);
+
+ ASSERT(log2_dpte_row_height_linear >= 3);
+
+ if (log2_dpte_row_height_linear > 7)
+ log2_dpte_row_height_linear = 7;
+
+ log2_dpte_row_height = log2_dpte_row_height_linear;
+ // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary.
+ // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering.
+ dpte_row_height = 1 << log2_dpte_row_height;
+ dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1, dpte_req_width, 1) + dpte_req_width;
+ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
+ } else {
+ // the upper bound of the dpte_row_width without dependency on viewport position follows.
+ // for tiled mode, row height is the same as req height and row store up to vp size upper bound
+ if (!surf_vert) {
+ log2_dpte_row_height = log2_dpte_req_height;
+ dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) + dpte_req_width;
+ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
+ } else {
+ log2_dpte_row_height = (log2_blk_width < log2_dpte_req_width) ? log2_blk_width : log2_dpte_req_width;
+ dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) + dpte_req_height;
+ rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height;
+ }
+ }
+ if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB
+ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request
+ else
+ rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request
+
+ rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height;
+
+ // the dpte_group_bytes is reduced for the specific case of vertical
+ // access of a tile surface that has dpte request of 8x1 ptes.
+ if (hostvm_enable)
+ rq_sizing_param->dpte_group_bytes = 512;
+ else {
+ if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ rq_sizing_param->dpte_group_bytes = 512;
+ else
+ rq_sizing_param->dpte_group_bytes = 2048;
+ }
+
+ //since pte request size is 64byte, the number of data pte requests per full sized group is as follows.
+ log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes);
+ log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests
+
+ // full sized data pte group width in elements
+ if (!surf_vert)
+ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width;
+ else
+ log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height;
+
+ //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B
+ if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB
+ log2_dpte_group_width = log2_dpte_group_width - 1;
+
+ dpte_group_width = 1 << log2_dpte_group_width;
+
+ // since dpte groups are only aligned to dpte_req_width and not dpte_group_width,
+ // the upper bound for the dpte groups per row is as follows.
+ rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width, 1);
+}
+
+static void get_surf_rq_param(
+ struct display_mode_lib *mode_lib,
+ display_data_rq_sizing_params_st *rq_sizing_param,
+ display_data_rq_dlg_params_st *rq_dlg_param,
+ display_data_rq_misc_params_st *rq_misc_param,
+ const display_pipe_params_st *pipe_param,
+ bool is_chroma,
+ bool is_alpha)
+{
+ bool mode_422 = 0;
+ unsigned int vp_width = 0;
+ unsigned int vp_height = 0;
+ unsigned int data_pitch = 0;
+ unsigned int meta_pitch = 0;
+ unsigned int surface_height = 0;
+ unsigned int ppe = mode_422 ? 2 : 1;
+
+ // FIXME check if ppe apply for both luma and chroma in 422 case
+ if (is_chroma | is_alpha) {
+ vp_width = pipe_param->src.viewport_width_c / ppe;
+ vp_height = pipe_param->src.viewport_height_c;
+ data_pitch = pipe_param->src.data_pitch_c;
+ meta_pitch = pipe_param->src.meta_pitch_c;
+ surface_height = pipe_param->src.surface_height_y / 2.0;
+ } else {
+ vp_width = pipe_param->src.viewport_width / ppe;
+ vp_height = pipe_param->src.viewport_height;
+ data_pitch = pipe_param->src.data_pitch;
+ meta_pitch = pipe_param->src.meta_pitch;
+ surface_height = pipe_param->src.surface_height_y;
+ }
+
+ if (pipe_param->dest.odm_combine) {
+ unsigned int access_dir;
+ unsigned int full_src_vp_width;
+ unsigned int hactive_odm;
+ unsigned int src_hactive_odm;
+
+ access_dir = (pipe_param->src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
+ hactive_odm = pipe_param->dest.hactive / ((unsigned int) pipe_param->dest.odm_combine * 2);
+ if (is_chroma) {
+ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio_c * pipe_param->dest.full_recout_width;
+ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio_c * hactive_odm;
+ } else {
+ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio * pipe_param->dest.full_recout_width;
+ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio * hactive_odm;
+ }
+
+ if (access_dir == 0) {
+ vp_width = dml_min(full_src_vp_width, src_hactive_odm);
+ dml_print("DML_DLG: %s: vp_width = %d\n", __func__, vp_width);
+ } else {
+ vp_height = dml_min(full_src_vp_width, src_hactive_odm);
+ dml_print("DML_DLG: %s: vp_height = %d\n", __func__, vp_height);
+
+ }
+ dml_print("DML_DLG: %s: full_src_vp_width = %d\n", __func__, full_src_vp_width);
+ dml_print("DML_DLG: %s: hactive_odm = %d\n", __func__, hactive_odm);
+ dml_print("DML_DLG: %s: src_hactive_odm = %d\n", __func__, src_hactive_odm);
+ }
+
+ rq_sizing_param->chunk_bytes = 8192;
+
+ if (is_alpha)
+ rq_sizing_param->chunk_bytes = 4096;
+
+ if (rq_sizing_param->chunk_bytes == 64 * 1024)
+ rq_sizing_param->min_chunk_bytes = 0;
+ else
+ rq_sizing_param->min_chunk_bytes = 1024;
+
+ rq_sizing_param->meta_chunk_bytes = 2048;
+ rq_sizing_param->min_meta_chunk_bytes = 256;
+
+ if (pipe_param->src.hostvm)
+ rq_sizing_param->mpte_group_bytes = 512;
+ else
+ rq_sizing_param->mpte_group_bytes = 2048;
+
+ get_meta_and_pte_attr(
+ mode_lib,
+ rq_dlg_param,
+ rq_misc_param,
+ rq_sizing_param,
+ vp_width,
+ vp_height,
+ data_pitch,
+ meta_pitch,
+ pipe_param->src.source_format,
+ pipe_param->src.sw_mode,
+ pipe_param->src.macro_tile_size,
+ pipe_param->src.source_scan,
+ pipe_param->src.hostvm,
+ is_chroma,
+ surface_height);
+}
+
+static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_params_st *pipe_param)
+{
+ // get param for luma surface
+ rq_param->yuv420 = pipe_param->src.source_format == dm_420_8 || pipe_param->src.source_format == dm_420_10 || pipe_param->src.source_format == dm_rgbe_alpha
+ || pipe_param->src.source_format == dm_420_12;
+
+ rq_param->yuv420_10bpc = pipe_param->src.source_format == dm_420_10;
+
+ rq_param->rgbe_alpha = (pipe_param->src.source_format == dm_rgbe_alpha) ? 1 : 0;
+
+ get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_l), &(rq_param->dlg.rq_l), &(rq_param->misc.rq_l), pipe_param, 0, 0);
+
+ if (is_dual_plane((enum source_format_class) (pipe_param->src.source_format))) {
+ // get param for chroma surface
+ get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_c), &(rq_param->dlg.rq_c), &(rq_param->misc.rq_c), pipe_param, 1, rq_param->rgbe_alpha);
+ }
+
+ // calculate how to split the det buffer space between luma and chroma
+ handle_det_buf_split(mode_lib, rq_param, &pipe_param->src);
+ print__rq_params_st(mode_lib, rq_param);
+}
+
+void dml314_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_pipe_params_st *pipe_param)
+{
+ display_rq_params_st rq_param = {0};
+
+ memset(rq_regs, 0, sizeof(*rq_regs));
+ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param);
+ extract_rq_regs(mode_lib, rq_regs, &rq_param);
+
+ print__rq_regs_st(mode_lib, rq_regs);
+}
+
+static void calculate_ttu_cursor(
+ struct display_mode_lib *mode_lib,
+ double *refcyc_per_req_delivery_pre_cur,
+ double *refcyc_per_req_delivery_cur,
+ double refclk_freq_in_mhz,
+ double ref_freq_to_pix_freq,
+ double hscale_pixel_rate_l,
+ double hscl_ratio,
+ double vratio_pre_l,
+ double vratio_l,
+ unsigned int cur_width,
+ enum cursor_bpp cur_bpp)
+{
+ unsigned int cur_src_width = cur_width;
+ unsigned int cur_req_size = 0;
+ unsigned int cur_req_width = 0;
+ double cur_width_ub = 0.0;
+ double cur_req_per_width = 0.0;
+ double hactive_cur = 0.0;
+
+ ASSERT(cur_src_width <= 256);
+
+ *refcyc_per_req_delivery_pre_cur = 0.0;
+ *refcyc_per_req_delivery_cur = 0.0;
+ if (cur_src_width > 0) {
+ unsigned int cur_bit_per_pixel = 0;
+
+ if (cur_bpp == dm_cur_2bit) {
+ cur_req_size = 64; // byte
+ cur_bit_per_pixel = 2;
+ } else { // 32bit
+ cur_bit_per_pixel = 32;
+ if (cur_src_width >= 1 && cur_src_width <= 16)
+ cur_req_size = 64;
+ else if (cur_src_width >= 17 && cur_src_width <= 31)
+ cur_req_size = 128;
+ else
+ cur_req_size = 256;
+ }
+
+ cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0);
+ cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) * (double) cur_req_width;
+ cur_req_per_width = cur_width_ub / (double) cur_req_width;
+ hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor
+
+ if (vratio_pre_l <= 1.0)
+ *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq / (double) cur_req_per_width;
+ else
+ *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz * (double) cur_src_width / hscale_pixel_rate_l / (double) cur_req_per_width;
+
+ ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13));
+
+ if (vratio_l <= 1.0)
+ *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq / (double) cur_req_per_width;
+ else
+ *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz * (double) cur_src_width / hscale_pixel_rate_l / (double) cur_req_per_width;
+
+ dml_print("DML_DLG: %s: cur_req_width = %d\n", __func__, cur_req_width);
+ dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n", __func__, cur_width_ub);
+ dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n", __func__, cur_req_per_width);
+ dml_print("DML_DLG: %s: hactive_cur = %3.2f\n", __func__, hactive_cur);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", __func__, *refcyc_per_req_delivery_pre_cur);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", __func__, *refcyc_per_req_delivery_cur);
+
+ ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13));
+ }
+}
+
+// Note: currently taken in as is.
+// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma.
+static void dml_rq_dlg_get_dlg_params(
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ display_dlg_regs_st *disp_dlg_regs,
+ display_ttu_regs_st *disp_ttu_regs,
+ const display_rq_dlg_params_st *rq_dlg_param,
+ const display_dlg_sys_params_st *dlg_sys_param,
+ const bool cstate_en,
+ const bool pstate_en,
+ const bool vm_en,
+ const bool ignore_viewport_pos,
+ const bool immediate_flip_support)
+{
+ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
+ const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
+ const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout;
+ const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
+ const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth;
+ const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps;
+ unsigned int pipe_index_in_combine[DC__NUM_PIPES__MAX];
+
+ // -------------------------
+ // Section 1.15.2.1: OTG dependent Params
+ // -------------------------
+ // Timing
+ unsigned int htotal = dst->htotal;
+ unsigned int hblank_end = dst->hblank_end;
+ unsigned int vblank_start = dst->vblank_start;
+ unsigned int vblank_end = dst->vblank_end;
+
+ double dppclk_freq_in_mhz = clks->dppclk_mhz;
+ double refclk_freq_in_mhz = clks->refclk_mhz;
+ double pclk_freq_in_mhz = dst->pixel_rate_mhz;
+ bool interlaced = dst->interlaced;
+ double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
+ double min_ttu_vblank;
+ unsigned int dlg_vblank_start;
+ bool dual_plane;
+ bool mode_422;
+ unsigned int access_dir;
+ unsigned int vp_height_l;
+ unsigned int vp_width_l;
+ unsigned int vp_height_c;
+ unsigned int vp_width_c;
+
+ // Scaling
+ unsigned int htaps_l;
+ unsigned int htaps_c;
+ double hratio_l;
+ double hratio_c;
+ double vratio_l;
+ double vratio_c;
+ bool scl_enable;
+
+ unsigned int swath_width_ub_l;
+ unsigned int dpte_groups_per_row_ub_l;
+ unsigned int swath_width_ub_c;
+ unsigned int dpte_groups_per_row_ub_c;
+
+ unsigned int meta_chunks_per_row_ub_l;
+ unsigned int meta_chunks_per_row_ub_c;
+ unsigned int vupdate_offset;
+ unsigned int vupdate_width;
+ unsigned int vready_offset;
+
+ unsigned int dppclk_delay_subtotal;
+ unsigned int dispclk_delay_subtotal;
+
+ unsigned int vstartup_start;
+ unsigned int dst_x_after_scaler;
+ unsigned int dst_y_after_scaler;
+ double dst_y_prefetch;
+ double dst_y_per_vm_vblank;
+ double dst_y_per_row_vblank;
+ double dst_y_per_vm_flip;
+ double dst_y_per_row_flip;
+ double max_dst_y_per_vm_vblank;
+ double max_dst_y_per_row_vblank;
+ double vratio_pre_l;
+ double vratio_pre_c;
+ unsigned int req_per_swath_ub_l;
+ unsigned int req_per_swath_ub_c;
+ unsigned int meta_row_height_l;
+ unsigned int meta_row_height_c;
+ unsigned int swath_width_pixels_ub_l;
+ unsigned int swath_width_pixels_ub_c;
+ unsigned int scaler_rec_in_width_l;
+ unsigned int scaler_rec_in_width_c;
+ unsigned int dpte_row_height_l;
+ unsigned int dpte_row_height_c;
+ double hscale_pixel_rate_l;
+ double hscale_pixel_rate_c;
+ double min_hratio_fact_l;
+ double min_hratio_fact_c;
+ double refcyc_per_line_delivery_pre_l;
+ double refcyc_per_line_delivery_pre_c;
+ double refcyc_per_line_delivery_l;
+ double refcyc_per_line_delivery_c;
+
+ double refcyc_per_req_delivery_pre_l;
+ double refcyc_per_req_delivery_pre_c;
+ double refcyc_per_req_delivery_l;
+ double refcyc_per_req_delivery_c;
+
+ unsigned int full_recout_width;
+ double refcyc_per_req_delivery_pre_cur0;
+ double refcyc_per_req_delivery_cur0;
+ double refcyc_per_req_delivery_pre_cur1;
+ double refcyc_per_req_delivery_cur1;
+ unsigned int vba__min_dst_y_next_start = get_min_dst_y_next_start(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA
+ unsigned int vba__vready_after_vcount0 = get_vready_at_or_after_vsync(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ float vba__refcyc_per_line_delivery_pre_l = get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_line_delivery_l = get_refcyc_per_line_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ int blank_lines = 0;
+
+ memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs));
+ memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs));
+
+ dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en);
+ dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en);
+ dml_print("DML_DLG: %s: vm_en = %d\n", __func__, vm_en);
+ dml_print("DML_DLG: %s: ignore_viewport_pos = %d\n", __func__, ignore_viewport_pos);
+ dml_print("DML_DLG: %s: immediate_flip_support = %d\n", __func__, immediate_flip_support);
+
+ dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); ASSERT(ref_freq_to_pix_freq < 4.0);
+
+ disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19));
+ disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal * dml_pow(2, 8));
+ disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits
+
+ //set_prefetch_mode(mode_lib, cstate_en, pstate_en, ignore_viewport_pos, immediate_flip_support);
+ min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
+ disp_dlg_regs->optimized_min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start;
+ disp_dlg_regs->optimized_min_dst_y_next_start_us = 0;
+ disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2));
+ blank_lines = (dst->vblank_end + dst->vtotal_min - dst->vblank_start - dst->vstartup_start - 1);
+ if (blank_lines < 0)
+ blank_lines = 0;
+ if (blank_lines != 0) {
+ disp_dlg_regs->optimized_min_dst_y_next_start = vba__min_dst_y_next_start;
+ disp_dlg_regs->optimized_min_dst_y_next_start_us = (disp_dlg_regs->optimized_min_dst_y_next_start * dst->hactive) / (unsigned int) dst->pixel_rate_mhz;
+ disp_dlg_regs->min_dst_y_next_start = disp_dlg_regs->optimized_min_dst_y_next_start;
+ }
+ ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
+
+ dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank);
+ dml_print("DML_DLG: %s: min_dst_y_next_start = 0x%0x\n", __func__, disp_dlg_regs->min_dst_y_next_start);
+ dml_print("DML_DLG: %s: dlg_vblank_start = 0x%0x\n", __func__, dlg_vblank_start);
+ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq);
+ dml_print("DML_DLG: %s: vba__min_dst_y_next_start = 0x%0x\n", __func__, vba__min_dst_y_next_start);
+
+ //old_impl_vs_vba_impl("min_dst_y_next_start", dlg_vblank_start, vba__min_dst_y_next_start);
+
+ // -------------------------
+ // Section 1.15.2.2: Prefetch, Active and TTU
+ // -------------------------
+ // Prefetch Calc
+ // Source
+ dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
+ mode_422 = 0;
+ access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
+ vp_height_l = src->viewport_height;
+ vp_width_l = src->viewport_width;
+ vp_height_c = src->viewport_height_c;
+ vp_width_c = src->viewport_width_c;
+
+ // Scaling
+ htaps_l = taps->htaps;
+ htaps_c = taps->htaps_c;
+ hratio_l = scl->hscl_ratio;
+ hratio_c = scl->hscl_ratio_c;
+ vratio_l = scl->vscl_ratio;
+ vratio_c = scl->vscl_ratio_c;
+ scl_enable = scl->scl_enable;
+
+ swath_width_ub_l = rq_dlg_param->rq_l.swath_width_ub;
+ dpte_groups_per_row_ub_l = rq_dlg_param->rq_l.dpte_groups_per_row_ub;
+ swath_width_ub_c = rq_dlg_param->rq_c.swath_width_ub;
+ dpte_groups_per_row_ub_c = rq_dlg_param->rq_c.dpte_groups_per_row_ub;
+
+ meta_chunks_per_row_ub_l = rq_dlg_param->rq_l.meta_chunks_per_row_ub;
+ meta_chunks_per_row_ub_c = rq_dlg_param->rq_c.meta_chunks_per_row_ub;
+ vupdate_offset = dst->vupdate_offset;
+ vupdate_width = dst->vupdate_width;
+ vready_offset = dst->vready_offset;
+
+ dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal;
+ dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal;
+
+ if (scl_enable)
+ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl;
+ else
+ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only;
+
+ dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor;
+
+ if (dout->dsc_enable) {
+ double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA
+
+ dispclk_delay_subtotal += dsc_delay;
+ }
+
+ vstartup_start = dst->vstartup_start;
+ if (interlaced) {
+ if (vstartup_start / 2.0 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end / 2.0)
+ disp_dlg_regs->vready_after_vcount0 = 1;
+ else
+ disp_dlg_regs->vready_after_vcount0 = 0;
+ } else {
+ if (vstartup_start - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end)
+ disp_dlg_regs->vready_after_vcount0 = 1;
+ else
+ disp_dlg_regs->vready_after_vcount0 = 0;
+ }
+
+ dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
+ dml_print("DML_DLG: %s: vba__vready_after_vcount0 = %d\n", __func__, vba__vready_after_vcount0);
+ //old_impl_vs_vba_impl("vready_after_vcount0", disp_dlg_regs->vready_after_vcount0, vba__vready_after_vcount0);
+
+ if (interlaced)
+ vstartup_start = vstartup_start / 2;
+
+ dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ // do some adjustment on the dst_after scaler to account for odm combine mode
+ dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: input dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler);
+
+ // need to figure out which side of odm combine we're in
+ if (dst->odm_combine) {
+ // figure out which pipes go together
+ bool visited[DC__NUM_PIPES__MAX];
+ unsigned int i, j, k;
+
+ for (k = 0; k < num_pipes; ++k) {
+ visited[k] = false;
+ pipe_index_in_combine[k] = 0;
+ }
+
+ for (i = 0; i < num_pipes; i++) {
+ if (e2e_pipe_param[i].pipe.src.is_hsplit && !visited[i]) {
+
+ unsigned int grp = e2e_pipe_param[i].pipe.src.hsplit_grp;
+ unsigned int grp_idx = 0;
+
+ for (j = i; j < num_pipes; j++) {
+ if (e2e_pipe_param[j].pipe.src.hsplit_grp == grp && e2e_pipe_param[j].pipe.src.is_hsplit && !visited[j]) {
+ pipe_index_in_combine[j] = grp_idx;
+ dml_print("DML_DLG: %s: pipe[%d] is in grp %d idx %d\n", __func__, j, grp, grp_idx);
+ grp_idx++;
+ visited[j] = true;
+ }
+ }
+ }
+ }
+
+ }
+
+ if (dst->odm_combine == dm_odm_combine_mode_disabled) {
+ disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end * ref_freq_to_pix_freq);
+ } else {
+ unsigned int odm_combine_factor = (dst->odm_combine == dm_odm_combine_mode_2to1 ? 2 : 4); // TODO: We should really check that 4to1 is supported before setting it to 4
+ unsigned int odm_pipe_index = pipe_index_in_combine[pipe_idx];
+
+ disp_dlg_regs->refcyc_h_blank_end = (unsigned int) (((double) hblank_end + odm_pipe_index * (double) dst->hactive / odm_combine_factor) * ref_freq_to_pix_freq);
+ } ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13));
+
+ dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal);
+ dml_print("DML_DLG: %s: dst_x_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: dst_y_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_y_after_scaler);
+
+ dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ max_dst_y_per_vm_vblank = 32.0; //U5.2
+ max_dst_y_per_row_vblank = 16.0; //U4.2
+
+ // magic!
+ if (htotal <= 75) {
+ max_dst_y_per_vm_vblank = 100.0;
+ max_dst_y_per_row_vblank = 100.0;
+ }
+
+ dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch);
+ dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip);
+ dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank);
+
+ ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank);
+
+ ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
+
+ vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l);
+ dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c);
+
+ // Active
+ req_per_swath_ub_l = rq_dlg_param->rq_l.req_per_swath_ub;
+ req_per_swath_ub_c = rq_dlg_param->rq_c.req_per_swath_ub;
+ meta_row_height_l = rq_dlg_param->rq_l.meta_row_height;
+ meta_row_height_c = rq_dlg_param->rq_c.meta_row_height;
+ swath_width_pixels_ub_l = 0;
+ swath_width_pixels_ub_c = 0;
+ scaler_rec_in_width_l = 0;
+ scaler_rec_in_width_c = 0;
+ dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
+ dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
+
+ if (mode_422) {
+ swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
+ swath_width_pixels_ub_c = swath_width_ub_c * 2;
+ } else {
+ swath_width_pixels_ub_l = swath_width_ub_l * 1;
+ swath_width_pixels_ub_c = swath_width_ub_c * 1;
+ }
+
+ hscale_pixel_rate_l = 0.;
+ hscale_pixel_rate_c = 0.;
+ min_hratio_fact_l = 1.0;
+ min_hratio_fact_c = 1.0;
+
+ if (hratio_l <= 1)
+ min_hratio_fact_l = 2.0;
+ else if (htaps_l <= 6) {
+ if ((hratio_l * 2.0) > 4.0)
+ min_hratio_fact_l = 4.0;
+ else
+ min_hratio_fact_l = hratio_l * 2.0;
+ } else {
+ if (hratio_l > 4.0)
+ min_hratio_fact_l = 4.0;
+ else
+ min_hratio_fact_l = hratio_l;
+ }
+
+ hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz;
+
+ dml_print("DML_DLG: %s: hratio_l = %3.2f\n", __func__, hratio_l);
+ dml_print("DML_DLG: %s: min_hratio_fact_l = %3.2f\n", __func__, min_hratio_fact_l);
+ dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", __func__, hscale_pixel_rate_l);
+
+ if (hratio_c <= 1)
+ min_hratio_fact_c = 2.0;
+ else if (htaps_c <= 6) {
+ if ((hratio_c * 2.0) > 4.0)
+ min_hratio_fact_c = 4.0;
+ else
+ min_hratio_fact_c = hratio_c * 2.0;
+ } else {
+ if (hratio_c > 4.0)
+ min_hratio_fact_c = 4.0;
+ else
+ min_hratio_fact_c = hratio_c;
+ }
+
+ hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz;
+
+ refcyc_per_line_delivery_pre_l = 0.;
+ refcyc_per_line_delivery_pre_c = 0.;
+ refcyc_per_line_delivery_l = 0.;
+ refcyc_per_line_delivery_c = 0.;
+
+ refcyc_per_req_delivery_pre_l = 0.;
+ refcyc_per_req_delivery_pre_c = 0.;
+ refcyc_per_req_delivery_l = 0.;
+ refcyc_per_req_delivery_c = 0.;
+
+ full_recout_width = 0;
+ // In ODM
+ if (src->is_hsplit) {
+ // This "hack" is only allowed (and valid) for MPC combine. In ODM
+ // combine, you MUST specify the full_recout_width...according to Oswin
+ if (dst->full_recout_width == 0 && !dst->odm_combine) {
+ dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", __func__);
+ full_recout_width = dst->recout_width * 2; // assume half split for dcn1
+ } else
+ full_recout_width = dst->full_recout_width;
+ } else
+ full_recout_width = dst->recout_width;
+
+ // As of DCN2, mpc_combine and odm_combine are mutually exclusive
+ refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_l,
+ hscale_pixel_rate_l,
+ swath_width_pixels_ub_l,
+ 1); // per line
+
+ refcyc_per_line_delivery_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_l,
+ hscale_pixel_rate_l,
+ swath_width_pixels_ub_l,
+ 1); // per line
+
+ dml_print("DML_DLG: %s: full_recout_width = %d\n", __func__, full_recout_width);
+ dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", __func__, hscale_pixel_rate_l);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, vba__refcyc_per_line_delivery_pre_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_l = %3.2f\n", __func__, vba__refcyc_per_line_delivery_l);
+
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_pre_l", refcyc_per_line_delivery_pre_l, vba__refcyc_per_line_delivery_pre_l);
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_l", refcyc_per_line_delivery_l, vba__refcyc_per_line_delivery_l);
+
+ if (dual_plane) {
+ float vba__refcyc_per_line_delivery_pre_c = get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_line_delivery_c = get_refcyc_per_line_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_c,
+ hscale_pixel_rate_c,
+ swath_width_pixels_ub_c,
+ 1); // per line
+
+ refcyc_per_line_delivery_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_c,
+ hscale_pixel_rate_c,
+ swath_width_pixels_ub_c,
+ 1); // per line
+
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, refcyc_per_line_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, refcyc_per_line_delivery_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, vba__refcyc_per_line_delivery_pre_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_c = %3.2f\n", __func__, vba__refcyc_per_line_delivery_c);
+
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_pre_c", refcyc_per_line_delivery_pre_c, vba__refcyc_per_line_delivery_pre_c);
+ //old_impl_vs_vba_impl("refcyc_per_line_delivery_c", refcyc_per_line_delivery_c, vba__refcyc_per_line_delivery_c);
+ }
+
+ if (src->dynamic_metadata_enable && src->gpuvm)
+ disp_dlg_regs->refcyc_per_vm_dmdata = get_refcyc_per_vm_dmdata_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ disp_dlg_regs->dmdata_dl_delta = get_dmdata_dl_delta_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ // TTU - Luma / Chroma
+ if (access_dir) { // vertical access
+ scaler_rec_in_width_l = vp_height_l;
+ scaler_rec_in_width_c = vp_height_c;
+ } else {
+ scaler_rec_in_width_l = vp_width_l;
+ scaler_rec_in_width_c = vp_width_c;
+ }
+
+ refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_l,
+ hscale_pixel_rate_l,
+ scaler_rec_in_width_l,
+ req_per_swath_ub_l); // per req
+
+ refcyc_per_req_delivery_l = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_l,
+ hscale_pixel_rate_l,
+ scaler_rec_in_width_l,
+ req_per_swath_ub_l); // per req
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_l);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_l = %3.2f\n", __func__, vba__refcyc_per_req_delivery_l);
+
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_l", refcyc_per_req_delivery_pre_l, vba__refcyc_per_req_delivery_pre_l);
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_l", refcyc_per_req_delivery_l, vba__refcyc_per_req_delivery_l);
+
+ ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
+
+ if (dual_plane) {
+ float vba__refcyc_per_req_delivery_pre_c = get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ float vba__refcyc_per_req_delivery_c = get_refcyc_per_req_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_pre_c,
+ hscale_pixel_rate_c,
+ scaler_rec_in_width_c,
+ req_per_swath_ub_c); // per req
+ refcyc_per_req_delivery_c = get_refcyc_per_delivery(
+ mode_lib,
+ refclk_freq_in_mhz,
+ pclk_freq_in_mhz,
+ dst->odm_combine,
+ full_recout_width,
+ dst->hactive,
+ vratio_c,
+ hscale_pixel_rate_c,
+ scaler_rec_in_width_c,
+ req_per_swath_ub_c); // per req
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, refcyc_per_req_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_c);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_c = %3.2f\n", __func__, vba__refcyc_per_req_delivery_c);
+
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_c", refcyc_per_req_delivery_pre_c, vba__refcyc_per_req_delivery_pre_c);
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_c", refcyc_per_req_delivery_c, vba__refcyc_per_req_delivery_c);
+
+ ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
+ }
+
+ // TTU - Cursor
+ refcyc_per_req_delivery_pre_cur0 = 0.0;
+ refcyc_per_req_delivery_cur0 = 0.0;
+
+ ASSERT(src->num_cursors <= 1);
+
+ if (src->num_cursors > 0) {
+ float vba__refcyc_per_req_delivery_pre_cur0;
+ float vba__refcyc_per_req_delivery_cur0;
+
+ calculate_ttu_cursor(
+ mode_lib,
+ &refcyc_per_req_delivery_pre_cur0,
+ &refcyc_per_req_delivery_cur0,
+ refclk_freq_in_mhz,
+ ref_freq_to_pix_freq,
+ hscale_pixel_rate_l,
+ scl->hscl_ratio,
+ vratio_pre_l,
+ vratio_l,
+ src->cur0_src_width,
+ (enum cursor_bpp) (src->cur0_bpp));
+
+ vba__refcyc_per_req_delivery_pre_cur0 = get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ vba__refcyc_per_req_delivery_cur0 = get_refcyc_per_cursor_req_delivery_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_cur0);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, vba__refcyc_per_req_delivery_cur0);
+
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_cur0", refcyc_per_req_delivery_pre_cur0, vba__refcyc_per_req_delivery_pre_cur0);
+ //old_impl_vs_vba_impl("refcyc_per_req_delivery_cur0", refcyc_per_req_delivery_cur0, vba__refcyc_per_req_delivery_cur0);
+ }
+
+ refcyc_per_req_delivery_pre_cur1 = 0.0;
+ refcyc_per_req_delivery_cur1 = 0.0;
+
+ // TTU - Misc
+ // all hard-coded
+
+ // Assignment to register structures
+ disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line
+ ASSERT(disp_dlg_regs->dst_y_after_scaler < 8);
+ disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk
+ ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13));
+ disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2));
+
+ disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19));
+ disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19));
+
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
+
+ // hack for FPGA
+ if (mode_lib->project == DML_PROJECT_DCN31_FPGA) {
+ if (disp_dlg_regs->vratio_prefetch >= (unsigned int) dml_pow(2, 22)) {
+ disp_dlg_regs->vratio_prefetch = (unsigned int) dml_pow(2, 22) - 1;
+ dml_print("vratio_prefetch exceed the max value, the register field is [21:0]\n");
+ }
+ }
+
+ disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l);
+ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
+
+ if (dual_plane) {
+ disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_c);
+ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)dml_pow(2, 13));
+ }
+
+ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l);
+ ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13));
+
+ disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now
+
+ disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l;
+ disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l;
+
+ if (dual_plane) {
+ disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c;
+ disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c;
+ }
+
+ disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+ disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+
+ // Clamp to max for now
+ if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1;
+
+ if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1;
+
+ if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1;
+
+ if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1;
+
+ disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l / (double) vratio_l * dml_pow(2, 2));
+ ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17));
+ if (dual_plane) {
+ disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c / (double) vratio_c * dml_pow(2, 2));
+ if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) {
+ dml_print(
+ "DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n",
+ __func__,
+ disp_dlg_regs->dst_y_per_pte_row_nom_c,
+ (unsigned int) dml_pow(2, 17) - 1);
+ }
+ }
+
+ disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l / (double) vratio_l * dml_pow(2, 2));
+ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17));
+
+ disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int) ((double) meta_row_height_c / (double) vratio_c * dml_pow(2, 2));
+ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_c < (unsigned int)dml_pow(2, 17));
+
+ disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
+ / (double) dpte_groups_per_row_ub_l);
+ if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1;
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
+ / (double) meta_chunks_per_row_ub_l);
+ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1;
+
+ if (dual_plane) {
+ disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int) ((double) dpte_row_height_c / (double) vratio_c * (double) htotal * ref_freq_to_pix_freq
+ / (double) dpte_groups_per_row_ub_c);
+ if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1;
+
+ // TODO: Is this the right calculation? Does htotal need to be halved?
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int) ((double) meta_row_height_c / (double) vratio_c * (double) htotal * ref_freq_to_pix_freq
+ / (double) meta_chunks_per_row_ub_c);
+ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1;
+ }
+
+ disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, 1);
+ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13)); ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13));
+
+ disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, 1);
+ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13)); ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13));
+
+ disp_dlg_regs->chunk_hdl_adjust_cur0 = 3;
+ disp_dlg_regs->dst_y_offset_cur0 = 0;
+ disp_dlg_regs->chunk_hdl_adjust_cur1 = 3;
+ disp_dlg_regs->dst_y_offset_cur1 = 0;
+
+ disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
+
+ disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 * dml_pow(2, 10));
+
+ disp_ttu_regs->qos_level_low_wm = 0;
+ ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14));
+
+ disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal * ref_freq_to_pix_freq);
+ ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));
+
+ disp_ttu_regs->qos_level_flip = 14;
+ disp_ttu_regs->qos_level_fixed_l = 8;
+ disp_ttu_regs->qos_level_fixed_c = 8;
+ disp_ttu_regs->qos_level_fixed_cur0 = 8;
+ disp_ttu_regs->qos_ramp_disable_l = 0;
+ disp_ttu_regs->qos_ramp_disable_c = 0;
+ disp_ttu_regs->qos_ramp_disable_cur0 = 0;
+
+ disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
+ ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24));
+
+ print__ttu_regs_st(mode_lib, disp_ttu_regs);
+ print__dlg_regs_st(mode_lib, disp_dlg_regs);
+}
+
+void dml314_rq_dlg_get_dlg_reg(
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+ const bool pstate_en,
+ const bool vm_en,
+ const bool ignore_viewport_pos,
+ const bool immediate_flip_support)
+{
+ display_rq_params_st rq_param = {0};
+ display_dlg_sys_params_st dlg_sys_param = {0};
+
+ // Get watermark and Tex.
+ dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib, e2e_pipe_param, num_pipes);
+ dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes);
+
+ print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
+
+ // system parameter calculation done
+
+ dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx);
+ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe);
+ dml_rq_dlg_get_dlg_params(
+ mode_lib,
+ e2e_pipe_param,
+ num_pipes,
+ pipe_idx,
+ dlg_regs,
+ ttu_regs,
+ &rq_param.dlg,
+ &dlg_sys_param,
+ cstate_en,
+ pstate_en,
+ vm_en,
+ ignore_viewport_pos,
+ immediate_flip_support);
+ dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h
new file mode 100644
index 000000000000..49cb85d1056c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML314_DISPLAY_RQ_DLG_CALC_H__
+#define __DML314_DISPLAY_RQ_DLG_CALC_H__
+
+#include "../display_rq_dlg_helpers.h"
+
+struct display_mode_lib;
+
+// Function: dml_rq_dlg_get_rq_reg
+// Main entry point for test to get the register values out of this DML class.
+// This function calls <get_rq_param> and <extract_rq_regs> fucntions to calculate
+// and then populate the rq_regs struct
+// Input:
+// pipe_param - pipe source configuration (e.g. vp, pitch, scaling, dest, etc.)
+// Output:
+// rq_regs - struct that holds all the RQ registers field value.
+// See also: <display_rq_regs_st>
+void dml314_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+ const display_pipe_params_st *pipe_param);
+
+// Function: dml_rq_dlg_get_dlg_reg
+// Calculate and return DLG and TTU register struct given the system setting
+// Output:
+// dlg_regs - output DLG register struct
+// ttu_regs - output DLG TTU register struct
+// Input:
+// e2e_pipe_param - "compacted" array of e2e pipe param struct
+// num_pipes - num of active "pipe" or "route"
+// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg
+// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered.
+// Added for legacy or unrealistic timing tests.
+void dml314_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+ const bool pstate_en,
+ const bool vm_en,
+ const bool ignore_viewport_pos,
+ const bool immediate_flip_support);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
new file mode 100644
index 000000000000..66453546e24f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -0,0 +1,2291 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "dcn32_fpu.h"
+#include "dc_link_dp.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn20/dcn20_resource.h"
+#include "display_mode_vba_util_32.h"
+// We need this includes for WATERMARKS_* defines
+#include "clk_mgr/dcn32/dcn32_smu13_driver_if.h"
+#include "dcn30/dcn30_resource.h"
+
+#define DC_LOGGER_INIT(logger)
+
+struct _vcs_dpi_ip_params_st dcn3_2_ip = {
+ .gpuvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_enable = 0,
+ .rob_buffer_size_kbytes = 128,
+ .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1280,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 22,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .alpha_pixel_chunk_size_kbytes = 4,
+ .min_pixel_chunk_size_bytes = 1024,
+ .dcc_meta_buffer_size_bytes = 6272,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,
+ .maximum_pixels_per_line_per_dsc_unit = 6016,
+ .dsc422_native_support = true,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 57,
+ .line_buffer_size_bits = 1171920,
+ .max_line_buffer_lines = 32,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 47,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 125,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+ .max_num_dp2p0_outputs = 2,
+ .max_num_dp2p0_streams = 4,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 1564.0,
+ .fabricclk_mhz = 400.0,
+ .dispclk_mhz = 2150.0,
+ .dppclk_mhz = 2150.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .phyclk_d32_mhz = 625.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 716.667,
+ .dram_speed_mts = 16000.0,
+ .dtbclk_mhz = 1564.0,
+ },
+ },
+ .num_states = 1,
+ .sr_exit_time_us = 20.16,
+ .sr_enter_plus_exit_time_us = 27.13,
+ .sr_exit_z8_time_us = 285.0,
+ .sr_enter_plus_exit_z8_time_us = 320,
+ .writeback_latency_us = 12.0,
+ .round_trip_ping_latency_dcfclk_cycles = 263,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .fclk_change_latency_us = 20,
+ .usr_retraining_latency_us = 2,
+ .smn_latency_us = 2,
+ .mall_allocated_for_dcn_mbytes = 64,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 100.0,
+ .pct_ideal_fabric_bw_after_urgent = 67.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented
+ .pct_ideal_dram_bw_after_urgent_strobe = 67.0,
+ .max_avg_sdp_bw_use_normal_percent = 80.0,
+ .max_avg_fabric_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_strobe_percent = 50.0,
+ .max_avg_dram_bw_use_normal_percent = 15.0,
+ .num_chans = 8,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .dram_clock_change_latency_us = 400,
+ .dispclk_dppclk_vco_speed_mhz = 4300.0,
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+};
+
+void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr)
+{
+ /* defaults */
+ double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us;
+ double fclk_change_latency_us = clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us;
+ double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us;
+ double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
+ /* For min clocks use as reported by PM FW and report those as min */
+ uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz;
+ uint16_t min_dcfclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz;
+ uint16_t setb_min_uclk_mhz = min_uclk_mhz;
+ uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz;
+
+ dc_assert_fp_enabled();
+
+ /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */
+ if (dcfclk_mhz_for_the_second_state)
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state;
+ else
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz;
+
+ if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz)
+ setb_min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz;
+
+ /* Set A - Normal - default values */
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
+ /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */
+ if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) {
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
+ clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
+ clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
+ clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
+ clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[3].memclk_mhz * 16;
+ clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
+ }
+ /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */
+ /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us / 2; // TBD
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us / 2; // TBD
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
+}
+
+/**
+ * dcn32_helper_populate_phantom_dlg_params - Get DLG params for phantom pipes
+ * and populate pipe_ctx with those params.
+ *
+ * This function must be called AFTER the phantom pipes are added to context
+ * and run through DML (so that the DLG params for the phantom pipes can be
+ * populated), and BEFORE we program the timing for the phantom pipes.
+ *
+ * @dc: [in] current dc state
+ * @context: [in] new dc state
+ * @pipes: [in] DML pipe params array
+ * @pipe_cnt: [in] DML pipe count
+ */
+void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ uint32_t i, pipe_idx;
+
+ dc_assert_fp_enabled();
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ pipes[pipe_idx].pipe.dest.vstartup_start =
+ get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_offset =
+ get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_width =
+ get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipes[pipe_idx].pipe.dest.vready_offset =
+ get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+ }
+ pipe_idx++;
+ }
+}
+
+bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index)
+{
+ double pscl_throughput;
+ double pscl_throughput_chroma;
+ double dpp_clk_single_dpp, clock;
+ double clk_frequency = 0.0;
+ double vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz;
+
+ dc_assert_fp_enabled();
+
+ dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio,
+ pipe.scale_ratio_depth.hscl_ratio_c,
+ pipe.scale_ratio_depth.vscl_ratio,
+ pipe.scale_ratio_depth.vscl_ratio_c,
+ context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk,
+ context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk,
+ pipe.dest.pixel_rate_mhz,
+ pipe.src.source_format,
+ pipe.scale_taps.htaps,
+ pipe.scale_taps.htaps_c,
+ pipe.scale_taps.vtaps,
+ pipe.scale_taps.vtaps_c,
+ /* Output */
+ &pscl_throughput, &pscl_throughput_chroma,
+ &dpp_clk_single_dpp);
+
+ clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100);
+
+ if (clock > 0)
+ clk_frequency = vco_speed * 4.0 / ((int)(vco_speed * 4.0));
+
+ if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz)
+ return true;
+ else
+ return false;
+}
+
+static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ float memory_bw_kbytes_sec;
+ float fabric_bw_kbytes_sec;
+ float sdp_bw_kbytes_sec;
+ float limiting_bw_kbytes_sec;
+
+ memory_bw_kbytes_sec = entry->dram_speed_mts *
+ dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes *
+ ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ fabric_bw_kbytes_sec = entry->fabricclk_mhz *
+ dcn3_2_soc.return_bus_width_bytes *
+ ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ sdp_bw_kbytes_sec = entry->dcfclk_mhz *
+ dcn3_2_soc.return_bus_width_bytes *
+ ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ limiting_bw_kbytes_sec = memory_bw_kbytes_sec;
+
+ if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = fabric_bw_kbytes_sec;
+
+ if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = sdp_bw_kbytes_sec;
+
+ return limiting_bw_kbytes_sec;
+}
+
+static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ if (entry->dcfclk_mhz > 0) {
+ float bw_on_sdp = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ entry->fabricclk_mhz = bw_on_sdp / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->fabricclk_mhz > 0) {
+ float bw_on_fabric = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ entry->dcfclk_mhz = bw_on_fabric / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->dram_speed_mts > 0) {
+ float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ entry->fabricclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dcfclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ }
+}
+
+void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+ unsigned int *num_entries,
+ struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ int i = 0;
+ int index = 0;
+ float net_bw_of_new_state = 0;
+
+ dc_assert_fp_enabled();
+
+ get_optimal_ntuple(entry);
+
+ if (*num_entries == 0) {
+ table[0] = *entry;
+ (*num_entries)++;
+ } else {
+ net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
+ while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) {
+ index++;
+ if (index >= *num_entries)
+ break;
+ }
+
+ for (i = *num_entries; i > index; i--)
+ table[i] = table[i - 1];
+
+ table[index] = *entry;
+ (*num_entries)++;
+ }
+}
+
+/**
+ * dcn32_set_phantom_stream_timing: Set timing params for the phantom stream
+ *
+ * Set timing params of the phantom stream based on calculated output from DML.
+ * This function first gets the DML pipe index using the DC pipe index, then
+ * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of
+ * lines required for SubVP MCLK switching and assigns to the phantom stream
+ * accordingly.
+ *
+ * - The number of SubVP lines calculated in DML does not take into account
+ * FW processing delays and required pstate allow width, so we must include
+ * that separately.
+ *
+ * - Set phantom backporch = vstartup of main pipe
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ * @ref_pipe: Main pipe for the phantom stream
+ * @pipes: DML pipe params
+ * @pipe_cnt: number of DML pipes
+ * @dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe)
+ */
+void dcn32_set_phantom_stream_timing(struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *ref_pipe,
+ struct dc_stream_state *phantom_stream,
+ display_e2e_pipe_params_st *pipes,
+ unsigned int pipe_cnt,
+ unsigned int dc_pipe_idx)
+{
+ unsigned int i, pipe_idx;
+ struct pipe_ctx *pipe;
+ uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines;
+ unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel;
+ unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel];
+
+ dc_assert_fp_enabled();
+
+ // Find DML pipe index (pipe_idx) using dc_pipe_idx
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (i == dc_pipe_idx)
+ break;
+
+ pipe_idx++;
+ }
+
+ // Calculate lines required for pstate allow width and FW processing delays
+ pstate_width_fw_delay_lines = ((double)(dc->caps.subvp_fw_processing_delay_us +
+ dc->caps.subvp_pstate_allow_width_us) / 1000000) *
+ (ref_pipe->stream->timing.pix_clk_100hz * 100) /
+ (double)ref_pipe->stream->timing.h_total;
+
+ // Update clks_cfg for calling into recalculate
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = socclk;
+
+ // DML calculation for MALL region doesn't take into account FW delay
+ // and required pstate allow width for multi-display cases
+ phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) +
+ pstate_width_fw_delay_lines;
+
+ // For backporch of phantom pipe, use vstartup of the main pipe
+ phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ phantom_stream->dst.y = 0;
+ phantom_stream->dst.height = phantom_vactive;
+ phantom_stream->src.y = 0;
+ phantom_stream->src.height = phantom_vactive;
+
+ phantom_stream->timing.v_addressable = phantom_vactive;
+ phantom_stream->timing.v_front_porch = 1;
+ phantom_stream->timing.v_total = phantom_stream->timing.v_addressable +
+ phantom_stream->timing.v_front_porch +
+ phantom_stream->timing.v_sync_width +
+ phantom_bp;
+}
+
+/**
+ * dcn32_get_num_free_pipes: Calculate number of free pipes
+ *
+ * This function assumes that a "used" pipe is a pipe that has
+ * both a stream and a plane assigned to it.
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * Return:
+ * Number of free pipes available in the context
+ */
+static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state *context)
+{
+ unsigned int i;
+ unsigned int free_pipes = 0;
+ unsigned int num_pipes = 0;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && !pipe->top_pipe) {
+ while (pipe) {
+ num_pipes++;
+ pipe = pipe->bottom_pipe;
+ }
+ }
+ }
+
+ free_pipes = dc->res_pool->pipe_count - num_pipes;
+ return free_pipes;
+}
+
+/**
+ * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP.
+ *
+ * We enter this function if we are Sub-VP capable (i.e. enough pipes available)
+ * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if
+ * we are forcing SubVP P-State switching on the current config.
+ *
+ * The number of pipes used for the chosen surface must be less than or equal to the
+ * number of free pipes available.
+ *
+ * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK).
+ * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own
+ * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't
+ * support MCLK switching naturally [i.e. ACTIVE or VBLANK]).
+ *
+ * @param dc: current dc state
+ * @param context: new dc state
+ * @param index: [out] dc pipe index for the pipe chosen to have phantom pipes assigned
+ *
+ * Return:
+ * True if a valid pipe assignment was found for Sub-VP. Otherwise false.
+ */
+static bool dcn32_assign_subvp_pipe(struct dc *dc,
+ struct dc_state *context,
+ unsigned int *index)
+{
+ unsigned int i, pipe_idx;
+ unsigned int max_frame_time = 0;
+ bool valid_assignment_found = false;
+ unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context);
+ bool current_assignment_freesync = false;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ unsigned int num_pipes = 0;
+ unsigned int refresh_rate = 0;
+
+ if (!pipe->stream)
+ continue;
+
+ // Round up
+ refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 +
+ pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1)
+ / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total);
+ if (pipe->plane_state && !pipe->top_pipe &&
+ pipe->stream->mall_stream_config.type == SUBVP_NONE && refresh_rate < 120) {
+ while (pipe) {
+ num_pipes++;
+ pipe = pipe->bottom_pipe;
+ }
+
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (num_pipes <= free_pipes) {
+ struct dc_stream_state *stream = pipe->stream;
+ unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total /
+ (double)(stream->timing.pix_clk_100hz * 100)) * 1000000;
+ if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) {
+ *index = i;
+ max_frame_time = frame_us;
+ valid_assignment_found = true;
+ current_assignment_freesync = false;
+ /* For the 2-Freesync display case, still choose the one with the
+ * longest frame time
+ */
+ } else if (stream->ignore_msa_timing_param && (!valid_assignment_found ||
+ (current_assignment_freesync && frame_us > max_frame_time))) {
+ *index = i;
+ valid_assignment_found = true;
+ current_assignment_freesync = true;
+ }
+ }
+ }
+ pipe_idx++;
+ }
+ return valid_assignment_found;
+}
+
+/**
+ * dcn32_enough_pipes_for_subvp: Function to check if there are "enough" pipes for SubVP.
+ *
+ * This function returns true if there are enough free pipes
+ * to create the required phantom pipes for any given stream
+ * (that does not already have phantom pipe assigned).
+ *
+ * e.g. For a 2 stream config where the first stream uses one
+ * pipe and the second stream uses 2 pipes (i.e. pipe split),
+ * this function will return true because there is 1 remaining
+ * pipe which can be used as the phantom pipe for the non pipe
+ * split pipe.
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * Return:
+ * True if there are enough free pipes to assign phantom pipes to at least one
+ * stream that does not already have phantom pipes assigned. Otherwise false.
+ */
+static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context)
+{
+ unsigned int i, split_cnt, free_pipes;
+ unsigned int min_pipe_split = dc->res_pool->pipe_count + 1; // init as max number of pipes + 1
+ bool subvp_possible = false;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ // Find the minimum pipe split count for non SubVP pipes
+ if (pipe->stream && !pipe->top_pipe &&
+ pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ split_cnt = 0;
+ while (pipe) {
+ split_cnt++;
+ pipe = pipe->bottom_pipe;
+ }
+
+ if (split_cnt < min_pipe_split)
+ min_pipe_split = split_cnt;
+ }
+ }
+
+ free_pipes = dcn32_get_num_free_pipes(dc, context);
+
+ // SubVP only possible if at least one pipe is being used (i.e. free_pipes
+ // should not equal to the pipe_count)
+ if (free_pipes >= min_pipe_split && free_pipes < dc->res_pool->pipe_count)
+ subvp_possible = true;
+
+ return subvp_possible;
+}
+
+/**
+ * subvp_subvp_schedulable: Determine if SubVP + SubVP config is schedulable
+ *
+ * High level algorithm:
+ * 1. Find longest microschedule length (in us) between the two SubVP pipes
+ * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both
+ * pipes still allows for the maximum microschedule to fit in the active
+ * region for both pipes.
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * Return:
+ * bool - True if the SubVP + SubVP config is schedulable, false otherwise
+ */
+static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context)
+{
+ struct pipe_ctx *subvp_pipes[2];
+ struct dc_stream_state *phantom = NULL;
+ uint32_t microschedule_lines = 0;
+ uint32_t index = 0;
+ uint32_t i;
+ uint32_t max_microschedule_us = 0;
+ int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ uint32_t time_us = 0;
+
+ /* Loop to calculate the maximum microschedule time between the two SubVP pipes,
+ * and also to store the two main SubVP pipe pointers in subvp_pipes[2].
+ */
+ if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
+ pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+ phantom = pipe->stream->mall_stream_config.paired_stream;
+ microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) +
+ phantom->timing.v_addressable;
+
+ // Round up when calculating microschedule time (+ 1 at the end)
+ time_us = (microschedule_lines * phantom->timing.h_total) /
+ (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 +
+ dc->caps.subvp_prefetch_end_to_mall_start_us +
+ dc->caps.subvp_fw_processing_delay_us + 1;
+ if (time_us > max_microschedule_us)
+ max_microschedule_us = time_us;
+
+ subvp_pipes[index] = pipe;
+ index++;
+
+ // Maximum 2 SubVP pipes
+ if (index == 2)
+ break;
+ }
+ }
+ vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) /
+ (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) /
+ (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) *
+ subvp_pipes[0]->stream->timing.h_total) /
+ (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) *
+ subvp_pipes[1]->stream->timing.h_total) /
+ (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+
+ if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us &&
+ (vactive2_us - vblank1_us) / 2 > max_microschedule_us)
+ return true;
+
+ return false;
+}
+
+/**
+ * subvp_drr_schedulable: Determine if SubVP + DRR config is schedulable
+ *
+ * High level algorithm:
+ * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe
+ * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching
+ * (the margin is equal to the MALL region + DRR margin (500us))
+ * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame))
+ * then report the configuration as supported
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ * @drr_pipe: DRR pipe_ctx for the SubVP + DRR config
+ *
+ * Return:
+ * bool - True if the SubVP + DRR config is schedulable, false otherwise
+ */
+static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context, struct pipe_ctx *drr_pipe)
+{
+ bool schedulable = false;
+ uint32_t i;
+ struct pipe_ctx *pipe = NULL;
+ struct dc_crtc_timing *main_timing = NULL;
+ struct dc_crtc_timing *phantom_timing = NULL;
+ struct dc_crtc_timing *drr_timing = NULL;
+ int16_t prefetch_us = 0;
+ int16_t mall_region_us = 0;
+ int16_t drr_frame_us = 0; // nominal frame time
+ int16_t subvp_active_us = 0;
+ int16_t stretched_drr_us = 0;
+ int16_t drr_stretched_vblank_us = 0;
+ int16_t max_vblank_mallregion = 0;
+
+ // Find SubVP pipe
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ // We check for master pipe, but it shouldn't matter since we only need
+ // the pipe for timing info (stream should be same for any pipe splits)
+ if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe)
+ continue;
+
+ // Find the SubVP pipe
+ if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ break;
+ }
+
+ main_timing = &pipe->stream->timing;
+ phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing;
+ drr_timing = &drr_pipe->stream->timing;
+ prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+ dc->caps.subvp_prefetch_end_to_mall_start_us;
+ subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+ (double)(main_timing->pix_clk_100hz * 100) * 1000000;
+ drr_frame_us = drr_timing->v_total * drr_timing->h_total /
+ (double)(drr_timing->pix_clk_100hz * 100) * 1000000;
+ // P-State allow width and FW delays already included phantom_timing->v_addressable
+ mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+ stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
+ drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
+ (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
+ max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+
+ /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the
+ * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis
+ * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time,
+ * and the max of (VBLANK blanking time, MALL region)).
+ */
+ if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 &&
+ subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0)
+ schedulable = true;
+
+ return schedulable;
+}
+
+
+/**
+ * subvp_vblank_schedulable: Determine if SubVP + VBLANK config is schedulable
+ *
+ * High level algorithm:
+ * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe
+ * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time))
+ * then report the configuration as supported
+ * 3. If the VBLANK display is DRR, then take the DRR static schedulability path
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * Return:
+ * bool - True if the SubVP + VBLANK/DRR config is schedulable, false otherwise
+ */
+static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
+{
+ struct pipe_ctx *pipe = NULL;
+ struct pipe_ctx *subvp_pipe = NULL;
+ bool found = false;
+ bool schedulable = false;
+ uint32_t i = 0;
+ uint8_t vblank_index = 0;
+ uint16_t prefetch_us = 0;
+ uint16_t mall_region_us = 0;
+ uint16_t vblank_frame_us = 0;
+ uint16_t subvp_active_us = 0;
+ uint16_t vblank_blank_us = 0;
+ uint16_t max_vblank_mallregion = 0;
+ struct dc_crtc_timing *main_timing = NULL;
+ struct dc_crtc_timing *phantom_timing = NULL;
+ struct dc_crtc_timing *vblank_timing = NULL;
+
+ /* For SubVP + VBLANK/DRR cases, we assume there can only be
+ * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK
+ * is supported, it is either a single VBLANK case or two VBLANK
+ * displays which are synchronized (in which case they have identical
+ * timings).
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ // We check for master pipe, but it shouldn't matter since we only need
+ // the pipe for timing info (stream should be same for any pipe splits)
+ if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe)
+ continue;
+
+ if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe).
+ vblank_index = i;
+ found = true;
+ }
+
+ if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ subvp_pipe = pipe;
+ }
+ // Use ignore_msa_timing_param flag to identify as DRR
+ if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) {
+ // SUBVP + DRR case
+ schedulable = subvp_drr_schedulable(dc, context, &context->res_ctx.pipe_ctx[vblank_index]);
+ } else if (found) {
+ main_timing = &subvp_pipe->stream->timing;
+ phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+ vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing;
+ // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe
+ // Also include the prefetch end to mallstart delay time
+ prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+ dc->caps.subvp_prefetch_end_to_mall_start_us;
+ // P-State allow width and FW delays already included phantom_timing->v_addressable
+ mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+ vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total /
+ (double)(vblank_timing->pix_clk_100hz * 100) * 1000000;
+ vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total /
+ (double)(vblank_timing->pix_clk_100hz * 100) * 1000000;
+ subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+ (double)(main_timing->pix_clk_100hz * 100) * 1000000;
+ max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us;
+
+ // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time,
+ // and the max of (VBLANK blanking time, MALL region)
+ // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0)
+ if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0)
+ schedulable = true;
+ }
+ return schedulable;
+}
+
+/**
+ * subvp_validate_static_schedulability: Check which SubVP case is calculated and handle
+ * static analysis based on the case.
+ *
+ * Three cases:
+ * 1. SubVP + SubVP
+ * 2. SubVP + VBLANK (DRR checked internally)
+ * 3. SubVP + VACTIVE (currently unsupported)
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ * @vlevel: Voltage level calculated by DML
+ *
+ * Return:
+ * bool - True if statically schedulable, false otherwise
+ */
+static bool subvp_validate_static_schedulability(struct dc *dc,
+ struct dc_state *context,
+ int vlevel)
+{
+ bool schedulable = true; // true by default for single display case
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ uint32_t i, pipe_idx;
+ uint8_t subvp_count = 0;
+ uint8_t vactive_count = 0;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->plane_state && !pipe->top_pipe &&
+ pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ subvp_count++;
+
+ // Count how many planes that aren't SubVP/phantom are capable of VACTIVE
+ // switching (SubVP + VACTIVE unsupported). In situations where we force
+ // SubVP for a VACTIVE plane, we don't want to increment the vactive_count.
+ if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 &&
+ pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ vactive_count++;
+ }
+ pipe_idx++;
+ }
+
+ if (subvp_count == 2) {
+ // Static schedulability check for SubVP + SubVP case
+ schedulable = subvp_subvp_schedulable(dc, context);
+ } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp) {
+ // Static schedulability check for SubVP + VBLANK case. Also handle the case where
+ // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK)
+ if (vactive_count > 0)
+ schedulable = false;
+ else
+ schedulable = subvp_vblank_schedulable(dc, context);
+ } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vactive_w_mall_sub_vp &&
+ vactive_count > 0) {
+ // For single display SubVP cases, DML will output dm_dram_clock_change_vactive_w_mall_sub_vp by default.
+ // We tell the difference between SubVP vs. SubVP + VACTIVE by checking the vactive_count.
+ // SubVP + VACTIVE currently unsupported
+ schedulable = false;
+ }
+ return schedulable;
+}
+
+static void dcn32_full_validate_bw_helper(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *vlevel,
+ int *split,
+ bool *merge,
+ int *pipe_cnt)
+{
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ unsigned int dc_pipe_idx = 0;
+ bool found_supported_config = false;
+ struct pipe_ctx *pipe = NULL;
+ uint32_t non_subvp_pipes = 0;
+ bool drr_pipe_found = false;
+ uint32_t drr_pipe_index = 0;
+ uint32_t i = 0;
+
+ dc_assert_fp_enabled();
+
+ /*
+ * DML favors voltage over p-state, but we're more interested in
+ * supporting p-state over voltage. We can't support p-state in
+ * prefetch mode > 0 so try capping the prefetch mode to start.
+ */
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_uclk_fclk_and_stutter;
+ *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
+ /* This may adjust vlevel and maxMpcComb */
+ if (*vlevel < context->bw_ctx.dml.soc.num_states)
+ *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
+
+ /* Conditions for setting up phantom pipes for SubVP:
+ * 1. Not force disable SubVP
+ * 2. Full update (i.e. !fast_validate)
+ * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?)
+ * 4. Display configuration passes validation
+ * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch)
+ */
+ if (!dc->debug.force_disable_subvp && dcn32_all_pipes_have_stream_and_plane(dc, context) &&
+ !dcn32_mpo_in_use(context) && (*vlevel == context->bw_ctx.dml.soc.num_states ||
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported ||
+ dc->debug.force_subvp_mclk_switch)) {
+
+ dcn32_merge_pipes_for_subvp(dc, context);
+
+ while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) &&
+ dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) {
+ /* For the case where *vlevel = num_states, bandwidth validation has failed for this config.
+ * Adding phantom pipes won't change the validation result, so change the DML input param
+ * for P-State support before adding phantom pipes and recalculating the DML result.
+ * However, this case is only applicable for SubVP + DRR cases because the prefetch mode
+ * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched
+ * enough to support MCLK switching.
+ */
+ if (*vlevel == context->bw_ctx.dml.soc.num_states) {
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_stutter;
+ /* There are params (such as FabricClock) that need to be recalculated
+ * after validation fails (otherwise it will be 0). Calculation for
+ * phantom vactive requires call into DML, so we must ensure all the
+ * vba params are valid otherwise we'll get incorrect phantom vactive.
+ */
+ *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
+ }
+
+ dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx);
+
+ *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
+ // Populate dppclk to trigger a recalculate in dml_get_voltage_level
+ // so the phantom pipe DLG params can be assigned correctly.
+ pipes[0].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, *pipe_cnt, 0);
+ *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
+
+ if (*vlevel < context->bw_ctx.dml.soc.num_states &&
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported
+ && subvp_validate_static_schedulability(dc, context, *vlevel)) {
+ found_supported_config = true;
+ } else if (*vlevel < context->bw_ctx.dml.soc.num_states &&
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
+ /* Case where 1 SubVP is added, and DML reports MCLK unsupported. This handles
+ * the case for SubVP + DRR, where the DRR display does not support MCLK switch
+ * at it's native refresh rate / timing.
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
+ pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ non_subvp_pipes++;
+ // Use ignore_msa_timing_param flag to identify as DRR
+ if (pipe->stream->ignore_msa_timing_param) {
+ drr_pipe_found = true;
+ drr_pipe_index = i;
+ }
+ }
+ }
+ // If there is only 1 remaining non SubVP pipe that is DRR, check static
+ // schedulability for SubVP + DRR.
+ if (non_subvp_pipes == 1 && drr_pipe_found) {
+ found_supported_config = subvp_drr_schedulable(dc, context,
+ &context->res_ctx.pipe_ctx[drr_pipe_index]);
+ }
+ }
+ }
+
+ // If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
+ // remove phantom pipes and repopulate dml pipes
+ if (!found_supported_config) {
+ dc->res_pool->funcs->remove_phantom_pipes(dc, context);
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported;
+ *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
+ } else {
+ // only call dcn20_validate_apply_pipe_split_flags if we found a supported config
+ memset(split, 0, MAX_PIPES * sizeof(int));
+ memset(merge, 0, MAX_PIPES * sizeof(bool));
+ *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
+
+ // Most populate phantom DLG params before programming hardware / timing for phantom pipe
+ DC_FP_START();
+ dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt);
+ DC_FP_END();
+
+ // Note: We can't apply the phantom pipes to hardware at this time. We have to wait
+ // until driver has acquired the DMCUB lock to do it safely.
+ }
+ }
+}
+
+static bool is_dtbclk_required(struct dc *dc, struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+ if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i]))
+ return true;
+ }
+ return false;
+}
+
+static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt, int vlevel)
+{
+ int i, pipe_idx;
+ bool usr_retraining_support = false;
+ bool unbounded_req_enabled = false;
+
+ dc_assert_fp_enabled();
+
+ /* Writeback MCIF_WB arbitration parameters */
+ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
+
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16;
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000;
+ context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support =
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
+ != dm_dram_clock_change_unsupported;
+ context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context);
+
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+ context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
+ context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000;
+ if (context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_fclock_change_unsupported)
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false;
+ else
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true;
+
+ usr_retraining_support = context->bw_ctx.dml.vba.USRRetrainingSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ ASSERT(usr_retraining_support);
+
+ if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+ if (unbounded_req_enabled && pipe_cnt > 1) {
+ // Unbounded requesting should not ever be used when more than 1 pipe is enabled.
+ ASSERT(false);
+ unbounded_req_enabled = false;
+ }
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+ pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+
+ if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
+ context->res_ctx.pipe_ctx[i].unbounded_req = false;
+ } else {
+ context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt,
+ pipe_idx);
+ context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled;
+ }
+
+ if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+ pipe_idx++;
+ }
+ /*save a original dppclock copy*/
+ context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
+ context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz
+ * 1000;
+ context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz
+ * 1000;
+
+ context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].stream)
+ context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[i].det_buffer_size_kb;
+ }
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg_v2(&context->bw_ctx.dml,
+ &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs, pipes,
+ pipe_cnt, pipe_idx);
+
+ context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg_v2(&context->res_ctx.pipe_ctx[i].rq_regs,
+ &context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+ pipe_idx++;
+ }
+}
+
+static struct pipe_ctx *dcn32_find_split_pipe(
+ struct dc *dc,
+ struct dc_state *context,
+ int old_index)
+{
+ struct pipe_ctx *pipe = NULL;
+ int i;
+
+ if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) {
+ pipe = &context->res_ctx.pipe_ctx[old_index];
+ pipe->pipe_idx = old_index;
+ }
+
+ if (!pipe)
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL
+ && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
+ if (context->res_ctx.pipe_ctx[i].stream == NULL) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ pipe->pipe_idx = i;
+ break;
+ }
+ }
+ }
+
+ /*
+ * May need to fix pipes getting tossed from 1 opp to another on flip
+ * Add for debugging transient underflow during topology updates:
+ * ASSERT(pipe);
+ */
+ if (!pipe)
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (context->res_ctx.pipe_ctx[i].stream == NULL) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ pipe->pipe_idx = i;
+ break;
+ }
+ }
+
+ return pipe;
+}
+
+static bool dcn32_split_stream_for_mpc_or_odm(
+ const struct dc *dc,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *pri_pipe,
+ struct pipe_ctx *sec_pipe,
+ bool odm)
+{
+ int pipe_idx = sec_pipe->pipe_idx;
+ const struct resource_pool *pool = dc->res_pool;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (odm && pri_pipe->plane_state) {
+ /* ODM + window MPO, where MPO window is on left half only */
+ if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <=
+ pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) {
+
+ DC_LOG_SCALER("%s - ODM + window MPO(left). pri_pipe:%d\n",
+ __func__,
+ pri_pipe->pipe_idx);
+ return true;
+ }
+
+ /* ODM + window MPO, where MPO window is on right half only */
+ if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) {
+
+ DC_LOG_SCALER("%s - ODM + window MPO(right). pri_pipe:%d\n",
+ __func__,
+ pri_pipe->pipe_idx);
+ return true;
+ }
+ }
+
+ *sec_pipe = *pri_pipe;
+
+ sec_pipe->pipe_idx = pipe_idx;
+ sec_pipe->plane_res.mi = pool->mis[pipe_idx];
+ sec_pipe->plane_res.hubp = pool->hubps[pipe_idx];
+ sec_pipe->plane_res.ipp = pool->ipps[pipe_idx];
+ sec_pipe->plane_res.xfm = pool->transforms[pipe_idx];
+ sec_pipe->plane_res.dpp = pool->dpps[pipe_idx];
+ sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst;
+ sec_pipe->stream_res.dsc = NULL;
+ if (odm) {
+ if (pri_pipe->next_odm_pipe) {
+ ASSERT(pri_pipe->next_odm_pipe != sec_pipe);
+ sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe;
+ sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe;
+ }
+ if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) {
+ pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe;
+ sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe;
+ }
+ if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) {
+ pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe;
+ sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe;
+ }
+ pri_pipe->next_odm_pipe = sec_pipe;
+ sec_pipe->prev_odm_pipe = pri_pipe;
+ ASSERT(sec_pipe->top_pipe == NULL);
+
+ if (!sec_pipe->top_pipe)
+ sec_pipe->stream_res.opp = pool->opps[pipe_idx];
+ else
+ sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp;
+ if (sec_pipe->stream->timing.flags.DSC == 1) {
+ dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx);
+ ASSERT(sec_pipe->stream_res.dsc);
+ if (sec_pipe->stream_res.dsc == NULL)
+ return false;
+ }
+ } else {
+ if (pri_pipe->bottom_pipe) {
+ ASSERT(pri_pipe->bottom_pipe != sec_pipe);
+ sec_pipe->bottom_pipe = pri_pipe->bottom_pipe;
+ sec_pipe->bottom_pipe->top_pipe = sec_pipe;
+ }
+ pri_pipe->bottom_pipe = sec_pipe;
+ sec_pipe->top_pipe = pri_pipe;
+
+ ASSERT(pri_pipe->plane_state);
+ }
+
+ return true;
+}
+
+bool dcn32_internal_validate_bw(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *pipe_cnt_out,
+ int *vlevel_out,
+ bool fast_validate)
+{
+ bool out = false;
+ bool repopulate_pipes = false;
+ int split[MAX_PIPES] = { 0 };
+ bool merge[MAX_PIPES] = { false };
+ bool newly_split[MAX_PIPES] = { false };
+ int pipe_cnt, i, pipe_idx, vlevel;
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+
+ dc_assert_fp_enabled();
+
+ ASSERT(pipes);
+ if (!pipes)
+ return false;
+
+ // For each full update, remove all existing phantom pipes first
+ dc->res_pool->funcs->remove_phantom_pipes(dc, context);
+
+ dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+
+ if (!pipe_cnt) {
+ out = true;
+ goto validate_out;
+ }
+
+ dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+ if (!fast_validate) {
+ DC_FP_START();
+ dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt);
+ DC_FP_END();
+ }
+
+ if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states ||
+ vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
+ /*
+ * If mode is unsupported or there's still no p-state support then
+ * fall back to favoring voltage.
+ *
+ * If Prefetch mode 0 failed for this config, or passed with Max UCLK, try if
+ * supported with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2)
+ */
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_fclk_and_stutter;
+
+ vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+ /* Last attempt with Prefetch mode 2 (dm_prefetch_support_stutter == 3) */
+ if (vlevel == context->bw_ctx.dml.soc.num_states) {
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_stutter;
+ vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+ }
+
+ if (vlevel < context->bw_ctx.dml.soc.num_states) {
+ memset(split, 0, sizeof(split));
+ memset(merge, 0, sizeof(merge));
+ vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
+ }
+ }
+
+ dml_log_mode_support_params(&context->bw_ctx.dml);
+
+ if (vlevel == context->bw_ctx.dml.soc.num_states)
+ goto validate_fail;
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *mpo_pipe = pipe->bottom_pipe;
+
+ if (!pipe->stream)
+ continue;
+
+ if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled
+ && !dc->config.enable_windowed_mpo_odm
+ && pipe->plane_state && mpo_pipe
+ && memcmp(&mpo_pipe->plane_res.scl_data.recout,
+ &pipe->plane_res.scl_data.recout,
+ sizeof(struct rect)) != 0) {
+ ASSERT(mpo_pipe->plane_state != pipe->plane_state);
+ goto validate_fail;
+ }
+ pipe_idx++;
+ }
+
+ /* merge pipes if necessary */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ /*skip pipes that don't need merging*/
+ if (!merge[i])
+ continue;
+
+ /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */
+ if (pipe->prev_odm_pipe) {
+ /*split off odm pipe*/
+ pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe;
+ if (pipe->next_odm_pipe)
+ pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe;
+
+ pipe->bottom_pipe = NULL;
+ pipe->next_odm_pipe = NULL;
+ pipe->plane_state = NULL;
+ pipe->stream = NULL;
+ pipe->top_pipe = NULL;
+ pipe->prev_odm_pipe = NULL;
+ if (pipe->stream_res.dsc)
+ dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc);
+ memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
+ memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
+ repopulate_pipes = true;
+ } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
+ struct pipe_ctx *top_pipe = pipe->top_pipe;
+ struct pipe_ctx *bottom_pipe = pipe->bottom_pipe;
+
+ top_pipe->bottom_pipe = bottom_pipe;
+ if (bottom_pipe)
+ bottom_pipe->top_pipe = top_pipe;
+
+ pipe->top_pipe = NULL;
+ pipe->bottom_pipe = NULL;
+ pipe->plane_state = NULL;
+ pipe->stream = NULL;
+ memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
+ memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
+ repopulate_pipes = true;
+ } else
+ ASSERT(0); /* Should never try to merge master pipe */
+
+ }
+
+ for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *hsplit_pipe = NULL;
+ bool odm;
+ int old_index = -1;
+
+ if (!pipe->stream || newly_split[i])
+ continue;
+
+ pipe_idx++;
+ odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled;
+
+ if (!pipe->plane_state && !odm)
+ continue;
+
+ if (split[i]) {
+ if (odm) {
+ if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
+ else if (old_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->pipe_idx;
+ } else {
+ if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx;
+ else if (old_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->pipe_idx;
+ }
+ hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(hsplit_pipe);
+ if (!hsplit_pipe)
+ goto validate_fail;
+
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ pipe, hsplit_pipe, odm))
+ goto validate_fail;
+
+ newly_split[hsplit_pipe->pipe_idx] = true;
+ repopulate_pipes = true;
+ }
+ if (split[i] == 4) {
+ struct pipe_ctx *pipe_4to1;
+
+ if (odm && old_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->pipe_idx;
+ else if (!odm && old_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->pipe_idx;
+ else
+ old_index = -1;
+ pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(pipe_4to1);
+ if (!pipe_4to1)
+ goto validate_fail;
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ pipe, pipe_4to1, odm))
+ goto validate_fail;
+ newly_split[pipe_4to1->pipe_idx] = true;
+
+ if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe
+ && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
+ else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx;
+ else
+ old_index = -1;
+ pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(pipe_4to1);
+ if (!pipe_4to1)
+ goto validate_fail;
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ hsplit_pipe, pipe_4to1, odm))
+ goto validate_fail;
+ newly_split[pipe_4to1->pipe_idx] = true;
+ }
+ if (odm)
+ dcn20_build_mapped_resource(dc, context, pipe->stream);
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state) {
+ if (!resource_build_scaling_params(pipe))
+ goto validate_fail;
+ }
+ }
+
+ /* Actual dsc count per stream dsc validation*/
+ if (!dcn20_validate_dsc(dc, context)) {
+ vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE;
+ goto validate_fail;
+ }
+
+ if (repopulate_pipes)
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+ *vlevel_out = vlevel;
+ *pipe_cnt_out = pipe_cnt;
+
+ out = true;
+ goto validate_out;
+
+validate_fail:
+ out = false;
+
+validate_out:
+ return out;
+}
+
+
+void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel)
+{
+ int i, pipe_idx, vlevel_temp = 0;
+ double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
+ double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] !=
+ dm_dram_clock_change_unsupported;
+ unsigned int dummy_latency_index = 0;
+ int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+ unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+ unsigned int min_dram_speed_mts_margin;
+
+ dc_assert_fp_enabled();
+
+ // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK
+ if (!pstate_en && dcn32_subvp_in_use(dc, context)) {
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
+ pstate_en = true;
+ }
+
+ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;
+
+ if (!pstate_en) {
+ /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */
+ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching =
+ dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context);
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ dummy_latency_index = dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(dc,
+ context, pipes, pipe_cnt, vlevel);
+
+ /* After calling dcn30_find_dummy_latency_index_for_fw_based_mclk_switch
+ * we reinstate the original dram_clock_change_latency_us on the context
+ * and all variables that may have changed up to this point, except the
+ * newly found dummy_latency_index
+ */
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
+ maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
+ dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] !=
+ dm_dram_clock_change_unsupported;
+ }
+ }
+
+ /* Set B:
+ * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present,
+ * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark
+ * calculations to cover bootup clocks.
+ * DCFCLK: soc.clock_limits[2] when available
+ * UCLK: soc.clock_limits[2] when available
+ */
+ if (dcn3_2_soc.num_states > 2) {
+ vlevel_temp = 2;
+ dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz;
+ } else
+ dcfclk = 615; //DCFCLK Vmin_lv
+
+ pipes[0].clks_cfg.voltage = vlevel_temp;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz;
+
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
+ }
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ /* Set D:
+ * All clocks min.
+ * DCFCLK: Min, as reported by PM FW when available
+ * UCLK : Min, as reported by PM FW when available
+ * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr)
+ */
+
+ if (dcn3_2_soc.num_states > 2) {
+ vlevel_temp = 0;
+ dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz;
+ } else
+ dcfclk = 615; //DCFCLK Vmin_lv
+
+ pipes[0].clks_cfg.voltage = vlevel_temp;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz;
+
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
+ context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
+ }
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ /* Set C, for Dummy P-State:
+ * All clocks min.
+ * DCFCLK: Min, as reported by PM FW, when available
+ * UCLK : Min, as reported by PM FW, when available
+ * pstate latency as per UCLK state dummy pstate latency
+ */
+
+ // For Set A and Set C use values from validation
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+
+ if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
+ min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+ min_dram_speed_mts_margin = 160;
+
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us;
+
+ if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] ==
+ dm_dram_clock_change_unsupported) {
+ int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1;
+
+ min_dram_speed_mts =
+ dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;
+ }
+
+ if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ /* find largest table entry that is lower than dram speed,
+ * but lower than DPM0 still uses DPM0
+ */
+ for (dummy_latency_index = 3; dummy_latency_index > 0; dummy_latency_index--)
+ if (min_dram_speed_mts + min_dram_speed_mts_margin >
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dram_speed_mts)
+ break;
+ }
+
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+
+ context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us;
+ context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
+ context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
+ }
+
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) {
+ /* The only difference between A and C is p-state latency, if p-state is not supported
+ * with full p-state latency we want to calculate DLG based on dummy p-state latency,
+ * Set A p-state watermark set to 0 on DCN30, when p-state unsupported, for now keep as DCN30.
+ */
+ context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
+ } else {
+ /* Set A:
+ * All clocks min.
+ * DCFCLK: Min, as reported by PM FW, when available
+ * UCLK: Min, as reported by PM FW, when available
+ */
+ dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ }
+
+ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+ if (dc->config.forced_clocks) {
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
+ }
+ if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
+ if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+ pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
+
+ pipe_idx++;
+ }
+
+ context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
+
+ dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+
+ if (!pstate_en)
+ /* Restore full p-state latency */
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching)
+ dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context);
+}
+
+static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans *
+ dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
+ unsigned int index)
+{
+ int i;
+
+ if (*num_entries == 0)
+ return;
+
+ for (i = index; i < *num_entries - 1; i++) {
+ table[i] = table[i + 1];
+ }
+ memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st));
+}
+
+static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
+ struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ int i, j;
+ struct _vcs_dpi_voltage_scaling_st entry = {0};
+
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0,
+ max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0;
+
+ unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299;
+
+ static const unsigned int num_dcfclk_stas = 5;
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564};
+
+ unsigned int num_uclk_dpms = 0;
+ unsigned int num_fclk_dpms = 0;
+ unsigned int num_dcfclk_dpms = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz)
+ max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz)
+ max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz)
+ max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+
+ if (bw_params->clk_table.entries[i].memclk_mhz > 0)
+ num_uclk_dpms++;
+ if (bw_params->clk_table.entries[i].fclk_mhz > 0)
+ num_fclk_dpms++;
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > 0)
+ num_dcfclk_dpms++;
+ }
+
+ if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz)
+ return -1;
+
+ if (max_dppclk_mhz == 0)
+ max_dppclk_mhz = max_dispclk_mhz;
+
+ if (max_fclk_mhz == 0)
+ max_fclk_mhz = max_dcfclk_mhz * dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / dcn3_2_soc.pct_ideal_fabric_bw_after_urgent;
+
+ if (max_phyclk_mhz == 0)
+ max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz;
+
+ *num_entries = 0;
+ entry.dispclk_mhz = max_dispclk_mhz;
+ entry.dscclk_mhz = max_dispclk_mhz / 3;
+ entry.dppclk_mhz = max_dppclk_mhz;
+ entry.dtbclk_mhz = max_dtbclk_mhz;
+ entry.phyclk_mhz = max_phyclk_mhz;
+ entry.phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz;
+ entry.phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz;
+
+ // Insert all the DCFCLK STAs
+ for (i = 0; i < num_dcfclk_stas; i++) {
+ entry.dcfclk_mhz = dcfclk_sta_targets[i];
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ DC_FP_START();
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ DC_FP_END();
+ }
+
+ // Insert the max DCFCLK
+ entry.dcfclk_mhz = max_dcfclk_mhz;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ DC_FP_START();
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ DC_FP_END();
+
+ // Insert the UCLK DPMS
+ for (i = 0; i < num_uclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16;
+
+ DC_FP_START();
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ DC_FP_END();
+ }
+
+ // If FCLK is coarse grained, insert individual DPMs.
+ if (num_fclk_dpms > 2) {
+ for (i = 0; i < num_fclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ DC_FP_START();
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ DC_FP_END();
+ }
+ }
+ // If FCLK fine grained, only insert max
+ else {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = max_fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ DC_FP_START();
+ insert_entry_into_table_sorted(table, num_entries, &entry);
+ DC_FP_END();
+ }
+
+ // At this point, the table contains all "points of interest" based on
+ // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock
+ // ratios (by derate, are exact).
+
+ // Remove states that require higher clocks than are supported
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz > max_dcfclk_mhz ||
+ table[i].fabricclk_mhz > max_fclk_mhz ||
+ table[i].dram_speed_mts > max_uclk_mhz * 16)
+ remove_entry_from_table_at_index(table, num_entries, i);
+ }
+
+ // At this point, the table only contains supported points of interest
+ // it could be used as is, but some states may be redundant due to
+ // coarse grained nature of some clocks, so we want to round up to
+ // coarse grained DPMs and remove duplicates.
+
+ // Round up UCLKs
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_uclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) {
+ table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ // If FCLK is coarse grained, round up to next DPMs
+ if (num_fclk_dpms > 2) {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_fclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) {
+ table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz;
+ break;
+ }
+ }
+ }
+ }
+ // Otherwise, round up to minimum.
+ else {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].fabricclk_mhz < min_fclk_mhz) {
+ table[i].fabricclk_mhz = min_fclk_mhz;
+ break;
+ }
+ }
+ }
+
+ // Round DCFCLKs up to minimum
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz < min_dcfclk_mhz) {
+ table[i].dcfclk_mhz = min_dcfclk_mhz;
+ break;
+ }
+ }
+
+ // Remove duplicate states, note duplicate states are always neighbouring since table is sorted.
+ i = 0;
+ while (i < *num_entries - 1) {
+ if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz &&
+ table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz &&
+ table[i].dram_speed_mts == table[i + 1].dram_speed_mts)
+ remove_entry_from_table_at_index(table, num_entries, i + 1);
+ else
+ i++;
+ }
+
+ // Fix up the state indicies
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ table[i].state = i;
+ }
+
+ return 0;
+}
+
+/**
+ * dcn32_update_bw_bounding_box
+ *
+ * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from
+ * spreadsheet with actual values as per dGPU SKU:
+ * - with passed few options from dc->config
+ * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might
+ * need to get it from PM FW)
+ * - with passed latency values (passed in ns units) in dc-> bb override for
+ * debugging purposes
+ * - with passed latencies from VBIOS (in 100_ns units) if available for
+ * certain dGPU SKU
+ * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU
+ * of the same ASIC)
+ * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM
+ * FW for different clocks (which might differ for certain dGPU SKU of the
+ * same ASIC)
+ */
+void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ dc_assert_fp_enabled();
+
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
+ /* Overrides from dc->config options */
+ dcn3_2_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk;
+
+ /* Override from passed dc->bb_overrides if available*/
+ if ((int)(dcn3_2_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
+ && dc->bb_overrides.sr_exit_time_ns) {
+ dcn3_2_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.sr_enter_plus_exit_time_us * 1000)
+ != dc->bb_overrides.sr_enter_plus_exit_time_ns
+ && dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ dcn3_2_soc.sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns
+ && dc->bb_overrides.urgent_latency_ns) {
+ dcn3_2_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.dram_clock_change_latency_us * 1000)
+ != dc->bb_overrides.dram_clock_change_latency_ns
+ && dc->bb_overrides.dram_clock_change_latency_ns) {
+ dcn3_2_soc.dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000)
+ != dc->bb_overrides.dummy_clock_change_latency_ns
+ && dc->bb_overrides.dummy_clock_change_latency_ns) {
+ dcn3_2_soc.dummy_pstate_latency_us =
+ dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0;
+ }
+
+ /* Override from VBIOS if VBIOS bb_info available */
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
+ struct bp_soc_bb_info bb_info = {0};
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dcn3_2_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_2_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_2_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
+ }
+ }
+
+ /* Override from VBIOS for num_chan */
+ if (dc->ctx->dc_bios->vram_info.num_chans)
+ dcn3_2_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+
+ }
+
+ /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */
+ dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */
+ if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) {
+ if (dc->debug.use_legacy_soc_bb_mechanism) {
+ unsigned int i = 0, j = 0, num_states = 0;
+
+ unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
+ unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
+ unsigned int min_dcfclk = UINT_MAX;
+ /* Set 199 as first value in STA target array to have a minimum DCFCLK value.
+ * For DCN32 we set min to 199 so minimum FCLK DPM0 (300Mhz can be achieved) */
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564};
+ unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0;
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dcfclk_mhz != 0 &&
+ bw_params->clk_table.entries[i].dcfclk_mhz < min_dcfclk)
+ min_dcfclk = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ }
+ if (min_dcfclk > dcfclk_sta_targets[0])
+ dcfclk_sta_targets[0] = min_dcfclk;
+ if (!max_dcfclk_mhz)
+ max_dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
+ if (!max_dispclk_mhz)
+ max_dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz;
+ if (!max_dppclk_mhz)
+ max_dppclk_mhz = dcn3_2_soc.clock_limits[0].dppclk_mhz;
+ if (!max_phyclk_mhz)
+ max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz;
+
+ if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
+ dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+ num_dcfclk_sta_targets++;
+ } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
+ dcfclk_sta_targets[i] = max_dcfclk_mhz;
+ break;
+ }
+ }
+ // Update size of array since we "removed" duplicates
+ num_dcfclk_sta_targets = i + 1;
+ }
+
+ num_uclk_states = bw_params->clk_table.num_entries;
+
+ // Calculate optimal dcfclk for each uclk
+ for (i = 0; i < num_uclk_states; i++) {
+ dcn32_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
+ &optimal_dcfclk_for_uclk[i], NULL);
+ if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
+ optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
+ }
+ }
+
+ // Calculate optimal uclk for each dcfclk sta target
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ for (j = 0; j < num_uclk_states; j++) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ i = 0;
+ j = 0;
+ // create the final dcfclk and uclk table
+ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ } else {
+ if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ } else {
+ j = num_uclk_states;
+ }
+ }
+ }
+
+ while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ }
+
+ while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
+ optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+
+ dcn3_2_soc.num_states = num_states;
+ for (i = 0; i < dcn3_2_soc.num_states; i++) {
+ dcn3_2_soc.clock_limits[i].state = i;
+ dcn3_2_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_2_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+
+ /* Fill all states with max values of all these clocks */
+ dcn3_2_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_2_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_2_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
+ dcn3_2_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3;
+
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (i > 0) {
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_2_soc.clock_limits[i].dtbclk_mhz = dcn3_2_soc.clock_limits[i-1].dtbclk_mhz;
+ } else {
+ dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+ } else if (bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_2_soc.clock_limits[i].socclk_mhz = dcn3_2_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_2_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
+
+ if (!dram_speed_mts[i] && i > 0)
+ dcn3_2_soc.clock_limits[i].dram_speed_mts = dcn3_2_soc.clock_limits[i-1].dram_speed_mts;
+ else
+ dcn3_2_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* These clocks cannot come from bw_params, always fill from dcn3_2_soc[0] */
+ /* PHYCLK_D18, PHYCLK_D32 */
+ dcn3_2_soc.clock_limits[i].phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_2_soc.clock_limits[i].phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz;
+ }
+ } else {
+ build_synthetic_soc_states(bw_params, dcn3_2_soc.clock_limits, &dcn3_2_soc.num_states);
+ }
+
+ /* Re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32);
+ }
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
new file mode 100644
index 000000000000..3ed06ab855be
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN32_FPU_H__
+#define __DCN32_FPU_H__
+
+#include "clk_mgr_internal.h"
+
+#define DCN3_2_DEFAULT_DET_SIZE 256
+#define DCN3_2_MAX_DET_SIZE 1152
+#define DCN3_2_MIN_DET_SIZE 128
+#define DCN3_2_MIN_COMPBUF_SIZE_KB 128
+
+void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr);
+
+void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
+bool dcn32_predict_pipe_split(struct dc_state *context,
+ display_pipe_params_st pipe,
+ int index);
+
+void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+ unsigned int *num_entries,
+ struct _vcs_dpi_voltage_scaling_st *entry);
+
+void dcn32_set_phantom_stream_timing(struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *ref_pipe,
+ struct dc_stream_state *phantom_stream,
+ display_e2e_pipe_params_st *pipes,
+ unsigned int pipe_cnt,
+ unsigned int dc_pipe_idx);
+
+bool dcn32_internal_validate_bw(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *pipe_cnt_out,
+ int *vlevel_out,
+ bool fast_validate);
+
+void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel);
+
+void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
new file mode 100644
index 000000000000..890612db08dc
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -0,0 +1,3778 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dc.h"
+#include "dc_link.h"
+#include "../display_mode_lib.h"
+#include "display_mode_vba_32.h"
+#include "../dml_inline_defs.h"
+#include "display_mode_vba_util_32.h"
+
+void dml32_recalculate(struct display_mode_lib *mode_lib);
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
+ struct display_mode_lib *mode_lib);
+void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
+
+void dml32_recalculate(struct display_mode_lib *mode_lib)
+{
+ ModeSupportAndSystemConfiguration(mode_lib);
+
+ dml32_CalculateMaxDETAndMinCompressedBufferSize(mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.ROBBufferSizeInKByte,
+ DC__NUM_DPP,
+ false, //mode_lib->vba.override_setting.nomDETInKByteOverrideEnable,
+ 0, //mode_lib->vba.override_setting.nomDETInKByteOverrideValue,
+
+ /* Output */
+ &mode_lib->vba.MaxTotalDETInKByte, &mode_lib->vba.nomDETInKByte,
+ &mode_lib->vba.MinCompressedBufferSizeInKByte);
+
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
+#endif
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
+}
+
+static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
+ struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ unsigned int j, k;
+ bool ImmediateFlipRequirementFinal;
+ int iteration;
+ double MaxTotalRDBandwidth;
+ unsigned int NextPrefetchMode;
+ double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
+ bool DestinationLineTimesForPrefetchLessThan2 = false;
+ bool VRatioPrefetchMoreThanMax = false;
+ double TWait;
+ double TotalWRBandwidth = 0;
+ double WRBandwidth = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- START ---\n", __func__);
+ dml_print("DML::%s: mode_lib->vba.PrefetchMode = %d\n", __func__, mode_lib->vba.PrefetchMode);
+ dml_print("DML::%s: mode_lib->vba.ImmediateFlipSupport = %d\n", __func__, mode_lib->vba.ImmediateFlipSupport);
+ dml_print("DML::%s: mode_lib->vba.VoltageLevel = %d\n", __func__, mode_lib->vba.VoltageLevel);
+#endif
+
+ v->WritebackDISPCLK = 0.0;
+ v->GlobalDPPCLK = 0.0;
+
+ // DISPCLK and DPPCLK Calculation
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k]) {
+ v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
+ dml32_CalculateWriteBackDISPCLK(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.PixelClock[k], mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackHTaps[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackSourceWidth[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.HTotal[k], mode_lib->vba.WritebackLineBufferSize,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed));
+ }
+ }
+
+ v->DISPCLK_calculated = v->WritebackDISPCLK;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ v->DISPCLK_calculated = dml_max(v->DISPCLK_calculated,
+ dml32_CalculateRequiredDispclk(
+ mode_lib->vba.ODMCombineEnabled[k],
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKRampingMargin,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed,
+ mode_lib->vba.MaxDppclk[v->soc.num_states - 1]));
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(mode_lib->vba.HRatio[k],
+ mode_lib->vba.HRatioChroma[k],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.MaxDCHUBToPSCLThroughput,
+ mode_lib->vba.MaxPSCLToLBThroughput,
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.htaps[k],
+ mode_lib->vba.HTAPsChroma[k],
+ mode_lib->vba.vtaps[k],
+ mode_lib->vba.VTAPsChroma[k],
+
+ /* Output */
+ &v->PSCL_THROUGHPUT_LUMA[k], &v->PSCL_THROUGHPUT_CHROMA[k],
+ &v->DPPCLKUsingSingleDPP[k]);
+ }
+
+ dml32_CalculateDPPCLK(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed, v->DPPCLKUsingSingleDPP, mode_lib->vba.DPPPerPlane,
+ /* Output */
+ &v->GlobalDPPCLK, v->DPPCLK);
+
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ v->DPPCLK_calculated[k] = v->DPPCLK[k];
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateBytePerPixelAndBlockSizes(
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.SurfaceTiling[k],
+
+ /* Output */
+ &v->BytePerPixelY[k],
+ &v->BytePerPixelC[k],
+ &v->BytePerPixelDETY[k],
+ &v->BytePerPixelDETC[k],
+ &v->BlockHeight256BytesY[k],
+ &v->BlockHeight256BytesC[k],
+ &v->BlockWidth256BytesY[k],
+ &v->BlockWidth256BytesC[k],
+ &v->BlockHeightY[k],
+ &v->BlockHeightC[k],
+ &v->BlockWidthY[k],
+ &v->BlockWidthC[k]);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: %d\n", __func__, __LINE__);
+#endif
+ dml32_CalculateSwathWidth(
+ false, // ForceSingleDPP
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.ODMCombineEnabled,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ mode_lib->vba.BlendingAndTiming,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.DPPPerPlane,
+
+ /* Output */
+ v->SwathWidthSingleDPPY, v->SwathWidthSingleDPPC, v->SwathWidthY, v->SwathWidthC,
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_integer_array[0], // Integer MaximumSwathHeightY[]
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_integer_array[1], // Integer MaximumSwathHeightC[]
+ v->swath_width_luma_ub, v->swath_width_chroma_ub);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->ReadBandwidthSurfaceLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ v->ReadBandwidthSurfaceChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
+ * mode_lib->vba.VRatioChroma[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n",
+ __func__, k, v->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n",
+ __func__, k, v->ReadBandwidthSurfaceChroma[k]);
+#endif
+ }
+
+ {
+ // VBA_DELTA
+ // Calculate DET size, swath height
+ dml32_CalculateSwathAndDETConfiguration(
+ &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
+ mode_lib->vba.DETSizeOverride,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.MaxTotalDETInKByte,
+ mode_lib->vba.MinCompressedBufferSizeInKByte,
+ false, /* ForceSingleDPP */
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.nomDETInKByte,
+ mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.ip.pixel_chunk_size_kbytes,
+ mode_lib->vba.ip.rob_buffer_size_kbytes,
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_output_encoder_array, /* output_encoder_class Output[] */
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_single_array[0], /* Single MaximumSwathWidthLuma[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_single_array[1], /* Single MaximumSwathWidthChroma[] */
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SurfaceTiling,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ mode_lib->vba.ODMCombineEnabled,
+ mode_lib->vba.BlendingAndTiming,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.DPPPerPlane,
+
+ /* Output */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_long_array[0], /* Long swath_width_luma_ub[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_long_array[1], /* Long swath_width_chroma_ub[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_double_array[0], /* Long SwathWidth[] */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_double_array[1], /* Long SwathWidthChroma[] */
+ mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ mode_lib->vba.DETBufferSizeInKByte,
+ mode_lib->vba.DETBufferSizeY,
+ mode_lib->vba.DETBufferSizeC,
+ &v->UnboundedRequestEnabled,
+ &v->CompressedBufferSizeInkByte,
+ &v->CompBufReservedSpaceKBytes,
+ &v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean, /* bool *CompBufReservedSpaceNeedAjustment */
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean_array, /* bool ViewportSizeSupportPerSurface[] */
+ &v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean); /* bool *ViewportSizeSupport */
+ }
+
+ v->CompBufReservedSpaceZs = v->CompBufReservedSpaceKBytes * 1024.0 / 256.0;
+ v->CompBufReservedSpace64B = v->CompBufReservedSpaceKBytes * 1024.0 / 64.0;
+
+ // DCFCLK Deep Sleep
+ dml32_CalculateDCFCLKDeepSleep(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ mode_lib->vba.DPPCLK,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ mode_lib->vba.ReturnBusWidth,
+
+ /* Output */
+ &v->DCFCLKDeepSleep);
+
+ // DSCCLK
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) {
+ v->DSCCLK_calculated[k] = 0.0;
+ } else {
+ if (mode_lib->vba.OutputFormat[k] == dm_420)
+ mode_lib->vba.DSCFormatFactor = 2;
+ else if (mode_lib->vba.OutputFormat[k] == dm_444)
+ mode_lib->vba.DSCFormatFactor = 1;
+ else if (mode_lib->vba.OutputFormat[k] == dm_n422)
+ mode_lib->vba.DSCFormatFactor = 2;
+ else
+ mode_lib->vba.DSCFormatFactor = 1;
+ if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
+ v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 12
+ / mode_lib->vba.DSCFormatFactor
+ / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
+ v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 6
+ / mode_lib->vba.DSCFormatFactor
+ / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ else
+ v->DSCCLK_calculated[k] = mode_lib->vba.PixelClockBackEnd[k] / 3
+ / mode_lib->vba.DSCFormatFactor
+ / (1 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ }
+ }
+
+ // DSC Delay
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->DSCDelay[k] = dml32_DSCDelayRequirement(mode_lib->vba.DSCEnabled[k],
+ mode_lib->vba.ODMCombineEnabled[k], mode_lib->vba.DSCInputBitPerComponent[k],
+ mode_lib->vba.OutputBpp[k], mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k],
+ mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.Output[k], mode_lib->vba.PixelClock[k],
+ mode_lib->vba.PixelClockBackEnd[k]);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) // NumberOfSurfaces
+ if (j != k && mode_lib->vba.BlendingAndTiming[k] == j && mode_lib->vba.DSCEnabled[j])
+ v->DSCDelay[k] = v->DSCDelay[j];
+
+ //Immediate Flip
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->ImmediateFlipSupportedSurface[k] = mode_lib->vba.ImmediateFlipSupport
+ && (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required);
+ }
+
+ // Prefetch
+ dml32_CalculateSurfaceSizeInMall(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ v->BytePerPixelY,
+ mode_lib->vba.ViewportWidthChroma,
+ mode_lib->vba.ViewportHeightChroma,
+ v->BytePerPixelC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ v->BlockWidth256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlockHeight256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidthY,
+ v->BlockWidthC,
+ v->BlockHeightY,
+ v->BlockHeightC,
+
+ /* Output */
+ v->SurfaceSizeInMALL,
+ &v->dummy_vars.
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .dummy_boolean2); /* Boolean *ExceededMALLSize */
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DPPPerSurface = mode_lib->vba.DPPPerPlane[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportHeight = mode_lib->vba.ViewportHeight[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportHeightChroma = mode_lib->vba.ViewportHeightChroma[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidth256BytesY = v->BlockWidth256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeight256BytesY = v->BlockHeight256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidth256BytesC = v->BlockWidth256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeight256BytesC = v->BlockHeight256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidthY = v->BlockWidthY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeightY = v->BlockHeightY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockWidthC = v->BlockWidthC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BlockHeightC = v->BlockHeightC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SurfaceTiling = mode_lib->vba.SurfaceTiling[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BytePerPixelY = v->BytePerPixelY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].BytePerPixelC = v->BytePerPixelC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VRatio = mode_lib->vba.VRatio[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VRatioChroma = mode_lib->vba.VRatioChroma[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VTaps = mode_lib->vba.vtaps[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].VTapsChroma = mode_lib->vba.VTAPsChroma[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PitchY = mode_lib->vba.PitchY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCMetaPitchY = mode_lib->vba.DCCMetaPitchY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].PitchC = mode_lib->vba.PitchC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].DCCMetaPitchC = mode_lib->vba.DCCMetaPitchC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportStationary = mode_lib->vba.ViewportStationary[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportXStart = mode_lib->vba.ViewportXStartY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportYStart = mode_lib->vba.ViewportYStartY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportXStartC = mode_lib->vba.ViewportXStartC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].ViewportYStartC = mode_lib->vba.ViewportYStartC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->vba.ForceOneRowForFrame[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SwathHeightY = mode_lib->vba.SwathHeightY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters[k].SwathHeightC = mode_lib->vba.SwathHeightC[k];
+ }
+
+ {
+
+ dml32_CalculateVMRowAndSwath(
+ &v->dummy_vars.dml32_CalculateVMRowAndSwath,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters,
+ v->SurfaceSizeInMALL,
+ mode_lib->vba.PTEBufferSizeInRequestsLuma,
+ mode_lib->vba.PTEBufferSizeInRequestsChroma,
+ mode_lib->vba.DCCMetaBufferSizeBytes,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.GPUVMMinPageSizeKBytes,
+ mode_lib->vba.HostVMMinPageSize,
+
+ /* Output */
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean_array2[0], // Boolean PTEBufferSizeNotExceeded[]
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean_array2[1], // Boolean DCCMetaBufferSizeNotExceeded[]
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->dpte_row_height,
+ v->dpte_row_height_chroma,
+ v->dpte_row_height_linear,
+ v->dpte_row_height_linear_chroma,
+ v->meta_req_width,
+ v->meta_req_width_chroma,
+ v->meta_req_height,
+ v->meta_req_height_chroma,
+ v->meta_row_width,
+ v->meta_row_width_chroma,
+ v->meta_row_height,
+ v->meta_row_height_chroma,
+ v->vm_group_bytes,
+ v->dpte_group_bytes,
+ v->PixelPTEReqWidthY,
+ v->PixelPTEReqHeightY,
+ v->PTERequestSizeY,
+ v->PixelPTEReqWidthC,
+ v->PixelPTEReqHeightC,
+ v->PTERequestSizeC,
+ v->dpde0_bytes_per_frame_ub_l,
+ v->meta_pte_bytes_per_frame_ub_l,
+ v->dpde0_bytes_per_frame_ub_c,
+ v->meta_pte_bytes_per_frame_ub_c,
+ v->PrefetchSourceLinesY,
+ v->PrefetchSourceLinesC,
+ v->VInitPreFillY, v->VInitPreFillC,
+ v->MaxNumSwathY,
+ v->MaxNumSwathC,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->PixelPTEBytesPerRow,
+ v->PDEAndMetaPTEBytesFrame,
+ v->MetaRowByte,
+ v->Use_One_Row_For_Frame,
+ v->Use_One_Row_For_Frame_Flip,
+ v->UsesMALLForStaticScreen,
+ v->PTE_BUFFER_MODE,
+ v->BIGK_FRAGMENT_SIZE);
+ }
+
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.ReorderBytes = mode_lib->vba.NumberOfChannels
+ * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.VMDataOnlyReturnBW = dml32_get_return_bw_mbps_vm_only(
+ &mode_lib->vba.soc,
+ mode_lib->vba.VoltageLevel,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.FabricClock,
+ mode_lib->vba.DRAMSpeed);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: mode_lib->vba.ReturnBusWidth = %f\n", __func__, mode_lib->vba.ReturnBusWidth);
+ dml_print("DML::%s: mode_lib->vba.DCFCLK = %f\n", __func__, mode_lib->vba.DCFCLK);
+ dml_print("DML::%s: mode_lib->vba.FabricClock = %f\n", __func__, mode_lib->vba.FabricClock);
+ dml_print("DML::%s: mode_lib->vba.FabricDatapathToDCNDataReturn = %f\n", __func__,
+ mode_lib->vba.FabricDatapathToDCNDataReturn);
+ dml_print("DML::%s: mode_lib->vba.PercentOfIdealSDPPortBWReceivedAfterUrgLatency = %f\n",
+ __func__, mode_lib->vba.PercentOfIdealSDPPortBWReceivedAfterUrgLatency);
+ dml_print("DML::%s: mode_lib->vba.DRAMSpeed = %f\n", __func__, mode_lib->vba.DRAMSpeed);
+ dml_print("DML::%s: mode_lib->vba.NumberOfChannels = %f\n", __func__, mode_lib->vba.NumberOfChannels);
+ dml_print("DML::%s: mode_lib->vba.DRAMChannelWidth = %f\n", __func__, mode_lib->vba.DRAMChannelWidth);
+ dml_print("DML::%s: mode_lib->vba.PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n",
+ __func__, mode_lib->vba.PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->vba.ReturnBW);
+#endif
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor = 1.0;
+
+ if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable)
+ v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .HostVMInefficiencyFactor =
+ mode_lib->vba.ReturnBW / v->dummy_vars
+ .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ .VMDataOnlyReturnBW;
+
+ mode_lib->vba.TotalDCCActiveDPP = 0;
+ mode_lib->vba.TotalActiveDPP = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + mode_lib->vba.DPPPerPlane[k];
+ if (mode_lib->vba.DCCEnable[k])
+ mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP
+ + mode_lib->vba.DPPPerPlane[k];
+ }
+
+ v->UrgentExtraLatency = dml32_CalculateExtraLatency(
+ mode_lib->vba.RoundTripPingLatencyCycles,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.ReorderBytes,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.TotalActiveDPP,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.TotalDCCActiveDPP,
+ mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.ReturnBW,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.DPPPerPlane,
+ v->dpte_group_bytes,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+ mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels);
+
+ mode_lib->vba.TCalc = 24.0 / v->DCFCLKDeepSleep;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] = mode_lib->vba.WritebackLatency
+ + dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.WritebackDestinationHeight[k],
+ mode_lib->vba.WritebackSourceHeight[k],
+ mode_lib->vba.HTotal[k]) / mode_lib->vba.DISPCLK;
+ } else
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0;
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.BlendingAndTiming[j] == k &&
+ mode_lib->vba.WritebackEnable[j] == true) {
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
+ dml_max(v->WritebackDelay[mode_lib->vba.VoltageLevel][k],
+ mode_lib->vba.WritebackLatency +
+ dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[j],
+ mode_lib->vba.WritebackHRatio[j],
+ mode_lib->vba.WritebackVRatio[j],
+ mode_lib->vba.WritebackVTaps[j],
+ mode_lib->vba.WritebackDestinationWidth[j],
+ mode_lib->vba.WritebackDestinationHeight[j],
+ mode_lib->vba.WritebackSourceHeight[j],
+ mode_lib->vba.HTotal[k]) / mode_lib->vba.DISPCLK);
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j)
+ if (mode_lib->vba.BlendingAndTiming[k] == j)
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
+ v->WritebackDelay[mode_lib->vba.VoltageLevel][j];
+
+ v->UrgentLatency = dml32_CalculateUrgentLatency(mode_lib->vba.UrgentLatencyPixelDataOnly,
+ mode_lib->vba.UrgentLatencyPixelMixedWithVMData,
+ mode_lib->vba.UrgentLatencyVMDataOnly,
+ mode_lib->vba.DoUrgentLatencyAdjustment,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockComponent,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockReference,
+ mode_lib->vba.FabricClock);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateUrgentBurstFactor(mode_lib->vba.UsesMALLForPStateChange[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ mode_lib->vba.SwathHeightY[k],
+ mode_lib->vba.SwathHeightC[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ v->UrgentLatency,
+ mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0],
+ mode_lib->vba.CursorBPP[k][0],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ mode_lib->vba.DETBufferSizeY[k],
+ mode_lib->vba.DETBufferSizeC[k],
+
+ /* output */
+ &v->UrgBurstFactorCursor[k],
+ &v->UrgBurstFactorLuma[k],
+ &v->UrgBurstFactorChroma[k],
+ &v->NoUrgentLatencyHiding[k]);
+
+ v->cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / 8 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->MaxVStartupLines[k] = ((mode_lib->vba.Interlace[k] &&
+ !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ?
+ dml_floor((mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]) / 2.0, 1.0) :
+ mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]) - dml_max(1.0,
+ dml_ceil((double) v->WritebackDelay[mode_lib->vba.VoltageLevel][k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1));
+
+ // Clamp to max OTG vstartup register limit
+ if (v->MaxVStartupLines[k] > 1023)
+ v->MaxVStartupLines[k] = 1023;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, mode_lib->vba.VoltageLevel);
+ dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__,
+ k, v->WritebackDelay[mode_lib->vba.VoltageLevel][k]);
+#endif
+ }
+
+ v->MaximumMaxVStartupLines = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
+
+ ImmediateFlipRequirementFinal = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ ImmediateFlipRequirementFinal = ImmediateFlipRequirementFinal
+ || (mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ImmediateFlipRequirementFinal = %d\n", __func__, ImmediateFlipRequirementFinal);
+#endif
+ // ModeProgramming will not repeat the schedule calculation using different prefetch mode,
+ //it is just calcualated once with given prefetch mode
+ dml32_CalculateMinAndMaxPrefetchMode(
+ mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal,
+ &mode_lib->vba.MinPrefetchMode,
+ &mode_lib->vba.MaxPrefetchMode);
+
+ v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
+
+ iteration = 0;
+ MaxTotalRDBandwidth = 0;
+ NextPrefetchMode = mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb];
+
+ do {
+ MaxTotalRDBandwidth = 0;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines);
+#endif
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ /* NOTE PerfetchMode variable is invalid in DAL as per the input received.
+ * Hence the direction is to use PrefetchModePerState.
+ */
+ TWait = dml32_CalculateTWait(
+ mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->vba.DRRDisplay[k],
+ mode_lib->vba.DRAMClockChangeLatency,
+ mode_lib->vba.FCLKChangeLatency, v->UrgentLatency,
+ mode_lib->vba.SREnterPlusExitTime);
+
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dppclk = mode_lib->vba.DPPCLK[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dispclk = mode_lib->vba.DISPCLK;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HActive = mode_lib->vba.HActive[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k];
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+ v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(
+ &v->dummy_vars.dml32_CalculatePrefetchSchedule,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k],
+ mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater,
+ mode_lib->vba.DPPCLKDelaySCL,
+ mode_lib->vba.DPPCLKDelaySCLLBOnly,
+ mode_lib->vba.DPPCLKDelayCNVCCursor,
+ mode_lib->vba.DISPCLKDelaySubtotal,
+ (unsigned int) (v->SwathWidthY[k] / mode_lib->vba.HRatio[k]),
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.MaxInterDCNTileRepeaters,
+ dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
+ v->MaxVStartupLines[k],
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.DynamicMetadataEnable[k],
+ mode_lib->vba.DynamicMetadataVMEnabled,
+ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
+ mode_lib->vba.DynamicMetadataTransmittedBytes[k],
+ v->UrgentLatency,
+ v->UrgentExtraLatency,
+ mode_lib->vba.TCalc,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k],
+ v->PrefetchSourceLinesY[k],
+ v->SwathWidthY[k],
+ v->VInitPreFillY[k],
+ v->MaxNumSwathY[k],
+ v->PrefetchSourceLinesC[k],
+ v->SwathWidthC[k],
+ v->VInitPreFillC[k],
+ v->MaxNumSwathC[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ mode_lib->vba.SwathHeightY[k],
+ mode_lib->vba.SwathHeightC[k],
+ TWait,
+ /* Output */
+ &v->DSTXAfterScaler[k],
+ &v->DSTYAfterScaler[k],
+ &v->DestinationLinesForPrefetch[k],
+ &v->PrefetchBandwidth[k],
+ &v->DestinationLinesToRequestVMInVBlank[k],
+ &v->DestinationLinesToRequestRowInVBlank[k],
+ &v->VRatioPrefetchY[k],
+ &v->VRatioPrefetchC[k],
+ &v->RequiredPrefetchPixDataBWLuma[k],
+ &v->RequiredPrefetchPixDataBWChroma[k],
+ &v->NotEnoughTimeForDynamicMetadata[k],
+ &v->Tno_bw[k], &v->prefetch_vmrow_bw[k],
+ &v->Tdmdl_vm[k],
+ &v->Tdmdl[k],
+ &v->TSetup[k],
+ &v->VUpdateOffsetPix[k],
+ &v->VUpdateWidthPix[k],
+ &v->VReadyOffsetPix[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d Prefetch calculation errResult=%0d\n",
+ __func__, k, mode_lib->vba.ErrorResult[k]);
+#endif
+ v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateUrgentBurstFactor(mode_lib->vba.UsesMALLForPStateChange[k],
+ v->swath_width_luma_ub[k],
+ v->swath_width_chroma_ub[k],
+ mode_lib->vba.SwathHeightY[k],
+ mode_lib->vba.SwathHeightC[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ v->UrgentLatency,
+ mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0],
+ mode_lib->vba.CursorBPP[k][0],
+ v->VRatioPrefetchY[k],
+ v->VRatioPrefetchC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ mode_lib->vba.DETBufferSizeY[k],
+ mode_lib->vba.DETBufferSizeC[k],
+ /* Output */
+ &v->UrgBurstFactorCursorPre[k],
+ &v->UrgBurstFactorLumaPre[k],
+ &v->UrgBurstFactorChromaPre[k],
+ &v->NoUrgentLatencyHidingPre[k]);
+
+ v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] /
+ 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPrefetchY[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DPPPerSurface=%d\n", __func__, k, mode_lib->vba.DPPPerPlane[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k,
+ v->UrgBurstFactorLumaPre[k]);
+ dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k,
+ v->UrgBurstFactorChromaPre[k]);
+
+ dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
+ dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, mode_lib->vba.VRatio[k]);
+
+ dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthSurfaceLuma=%f\n", __func__, k,
+ v->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0d ReadBandwidthSurfaceChroma=%f\n", __func__, k,
+ v->ReadBandwidthSurfaceChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
+ dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
+ dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k,
+ v->RequiredPrefetchPixDataBWLuma[k]);
+ dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k,
+ v->RequiredPrefetchPixDataBWChroma[k]);
+ dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
+ dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k,
+ MaxTotalRDBandwidthNoUrgentBurst);
+#endif
+ if (v->DestinationLinesForPrefetch[k] < 2)
+ DestinationLineTimesForPrefetchLessThan2 = true;
+
+ if (v->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE__
+ || v->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE__)
+ VRatioPrefetchMoreThanMax = true;
+
+ //bool DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false;
+ //bool DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false;
+ //if (v->DestinationLinesToRequestVMInVBlank[k] >= 32) {
+ // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true;
+ //}
+
+ //if (v->DestinationLinesToRequestRowInVBlank[k] >= 16) {
+ // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true;
+ //}
+ }
+
+ v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n",
+ __func__, MaxTotalRDBandwidthNoUrgentBurst);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, mode_lib->vba.ReturnBW);
+ dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n",
+ __func__, mode_lib->vba.FractionOfUrgentBandwidth);
+#endif
+
+ {
+ dml32_CalculatePrefetchBandwithSupport(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ v->NoUrgentLatencyHidingPre,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->UrgBurstFactorLuma,
+ v->UrgBurstFactorChroma,
+ v->UrgBurstFactorCursor,
+ v->UrgBurstFactorLumaPre,
+ v->UrgBurstFactorChromaPre,
+ v->UrgBurstFactorCursorPre,
+
+ /* output */
+ &MaxTotalRDBandwidth,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0],
+ &v->PrefetchModeSupported);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector[k] = 1.0;
+
+ {
+ dml32_CalculatePrefetchBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ v->NoUrgentLatencyHidingPre,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+
+ /* output */
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0],
+ &v->FractionOfUrgentBandwidth,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean);
+ }
+
+ if (VRatioPrefetchMoreThanMax != false || DestinationLineTimesForPrefetchLessThan2 != false) {
+ v->PrefetchModeSupported = false;
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k]) {
+ v->PrefetchModeSupported = false;
+ }
+ }
+
+ if (v->PrefetchModeSupported == true && mode_lib->vba.ImmediateFlipSupport == true) {
+ mode_lib->vba.BandwidthAvailableForImmediateFlip = dml32_CalculateBandwidthAvailableForImmediateFlip(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->cursor_bw_pre,
+ mode_lib->vba.DPPPerPlane,
+ v->UrgBurstFactorLuma,
+ v->UrgBurstFactorChroma,
+ v->UrgBurstFactorCursor,
+ v->UrgBurstFactorLumaPre,
+ v->UrgBurstFactorChromaPre,
+ v->UrgBurstFactorCursorPre);
+
+ mode_lib->vba.TotImmediateFlipBytes = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
+ mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes
+ + mode_lib->vba.DPPPerPlane[k]
+ * (v->PDEAndMetaPTEBytesFrame[k]
+ + v->MetaRowByte[k]);
+ if (v->use_one_row_for_frame_flip[k][0][0]) {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + 2 * v->PixelPTEBytesPerRow[k];
+ } else {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + v->PixelPTEBytesPerRow[k];
+ }
+ }
+ }
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateFlipSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+ v->UrgentExtraLatency,
+ v->UrgentLatency,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMMinPageSize,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+ v->PixelPTEBytesPerRow[k],
+ mode_lib->vba.BandwidthAvailableForImmediateFlip,
+ mode_lib->vba.TotImmediateFlipBytes,
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ v->Tno_bw[k],
+ mode_lib->vba.DCCEnable[k],
+ v->dpte_row_height[k],
+ v->meta_row_height[k],
+ v->dpte_row_height_chroma[k],
+ v->meta_row_height_chroma[k],
+ v->Use_One_Row_For_Frame_Flip[k],
+
+ /* Output */
+ &v->DestinationLinesToRequestVMInImmediateFlip[k],
+ &v->DestinationLinesToRequestRowInImmediateFlip[k],
+ &v->final_flip_bw[k],
+ &v->ImmediateFlipSupportedForPipe[k]);
+ }
+
+ {
+ dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ mode_lib->vba.ImmediateFlipRequirement,
+ v->final_flip_bw,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->UrgBurstFactorLuma,
+ v->UrgBurstFactorChroma,
+ v->UrgBurstFactorCursor,
+ v->UrgBurstFactorLumaPre,
+ v->UrgBurstFactorChromaPre,
+ v->UrgBurstFactorCursorPre,
+
+ /* output */
+ &v->total_dcn_read_bw_with_flip, // Single *TotalBandwidth
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], // Single *FractionOfUrgentBandwidth
+ &v->ImmediateFlipSupported); // Boolean *ImmediateFlipBandwidthSupport
+
+ dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ mode_lib->vba.ImmediateFlipRequirement,
+ v->final_flip_bw,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->RequiredPrefetchPixDataBWLuma,
+ v->RequiredPrefetchPixDataBWChroma,
+ v->cursor_bw,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ v->cursor_bw_pre,
+ v->prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector,
+
+ /* output */
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], // Single *TotalBandwidth
+ &v->FractionOfUrgentBandwidthImmediateFlip, // Single *FractionOfUrgentBandwidth
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); // Boolean *ImmediateFlipBandwidthSupport
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.ImmediateFlipRequirement[k] != dm_immediate_flip_not_required && v->ImmediateFlipSupportedForPipe[k] == false) {
+ v->ImmediateFlipSupported = false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
+#endif
+ }
+ }
+ } else {
+ v->ImmediateFlipSupported = false;
+ }
+
+ /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */
+ v->PrefetchAndImmediateFlipSupported = (v->PrefetchModeSupported == true &&
+ ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable && !ImmediateFlipRequirementFinal) ||
+ v->ImmediateFlipSupported)) ? true : false;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchModeSupported = %d\n", __func__, locals->PrefetchModeSupported);
+ for (uint k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ dml_print("DML::%s: ImmediateFlipRequirement[%d] = %d\n", __func__, k, mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required);
+ dml_print("DML::%s: ImmediateFlipSupported = %d\n", __func__, locals->ImmediateFlipSupported);
+ dml_print("DML::%s: ImmediateFlipSupport = %d\n", __func__, mode_lib->vba.ImmediateFlipSupport);
+ dml_print("DML::%s: HostVMEnable = %d\n", __func__, mode_lib->vba.HostVMEnable);
+ dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %d\n", __func__, locals->PrefetchAndImmediateFlipSupported);
+ dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup=%d\n", __func__, locals->VStartupLines, locals->MaximumMaxVStartupLines);
+#endif
+
+ v->VStartupLines = v->VStartupLines + 1;
+
+ if (v->VStartupLines > v->MaximumMaxVStartupLines) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Vstartup exceeds max vstartup, exiting loop\n", __func__);
+#endif
+ break; // VBA_DELTA: Implementation divergence! Gabe is *still* iterating across prefetch modes which we don't care to do
+ }
+ iteration++;
+ if (iteration > 2500) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: too many errors, exit now\n", __func__);
+ assert(0);
+#endif
+ }
+ } while (!(v->PrefetchAndImmediateFlipSupported || NextPrefetchMode > mode_lib->vba.MaxPrefetchMode));
+
+
+ if (v->VStartupLines <= v->MaximumMaxVStartupLines) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Good, Prefetch and flip scheduling found solution at VStartupLines=%d\n", __func__, locals->VStartupLines-1);
+#endif
+ }
+
+
+ //Watermarks and NB P-State/DRAM Clock Change Support
+ {
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.UrgentLatency = v->UrgentLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.ExtraLatency = v->UrgentExtraLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency;
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency;
+
+ dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,
+ mode_lib->vba.USRRetrainingRequiredFinal,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.MaxLineBufferLines,
+ mode_lib->vba.LineBufferSizeFinal,
+ mode_lib->vba.WritebackInterfaceBufferSize,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.ReturnBW,
+ mode_lib->vba.SynchronizeTimingsFinal,
+ mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->vba.DRRDisplay,
+ v->dpte_group_bytes,
+ v->meta_row_height,
+ v->meta_row_height_chroma,
+ v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters,
+ mode_lib->vba.WritebackChunkSize,
+ mode_lib->vba.SOCCLK,
+ v->DCFCLKDeepSleep,
+ mode_lib->vba.DETBufferSizeY,
+ mode_lib->vba.DETBufferSizeC,
+ mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ mode_lib->vba.LBBitPerPixel,
+ v->SwathWidthY,
+ v->SwathWidthC,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.vtaps,
+ mode_lib->vba.VTAPsChroma,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.VTotal,
+ mode_lib->vba.VActive,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.BlendingAndTiming,
+ mode_lib->vba.DPPPerPlane,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+ v->DSTXAfterScaler,
+ v->DSTYAfterScaler,
+ mode_lib->vba.WritebackEnable,
+ mode_lib->vba.WritebackPixelFormat,
+ mode_lib->vba.WritebackDestinationWidth,
+ mode_lib->vba.WritebackDestinationHeight,
+ mode_lib->vba.WritebackSourceHeight,
+ v->UnboundedRequestEnabled,
+ v->CompressedBufferSizeInkByte,
+
+ /* Output */
+ &v->Watermark,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support,
+ v->MaxActiveDRAMClockChangeLatencySupported,
+ v->SubViewportLinesNeededInMALL,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_fclkchange_support,
+ &v->MinActiveFCLKChangeLatencySupported,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_USRRetrainingSupport,
+ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin);
+
+ /* DCN32 has a new struct Watermarks (typedef) which is used to store
+ * calculated WM values. Copy over values from struct to vba varaibles
+ * to ensure that the DCN32 getters return the correct value.
+ */
+ v->UrgentWatermark = v->Watermark.UrgentWatermark;
+ v->WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark;
+ v->DRAMClockChangeWatermark = v->Watermark.DRAMClockChangeWatermark;
+ v->WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark;
+ v->StutterExitWatermark = v->Watermark.StutterExitWatermark;
+ v->StutterEnterPlusExitWatermark = v->Watermark.StutterEnterPlusExitWatermark;
+ v->Z8StutterExitWatermark = v->Watermark.Z8StutterExitWatermark;
+ v->Z8StutterEnterPlusExitWatermark = v->Watermark.Z8StutterEnterPlusExitWatermark;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
+ v->VStartup[k] * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]
+ - v->Watermark.WritebackDRAMClockChangeWatermark);
+ v->WritebackAllowFCLKChangeEndPosition[k] = dml_max(0,
+ v->VStartup[k] * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]
+ - v->Watermark.WritebackFCLKChangeWatermark);
+ } else {
+ v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
+ v->WritebackAllowFCLKChangeEndPosition[k] = 0;
+ }
+ }
+ }
+
+ //Display Pipeline Delivery Time in Prefetch, Groups
+ dml32_CalculatePixelDeliveryTimes(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ v->VRatioPrefetchY,
+ v->VRatioPrefetchC,
+ v->swath_width_luma_ub,
+ v->swath_width_chroma_ub,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.PixelClock,
+ v->PSCL_THROUGHPUT_LUMA,
+ v->PSCL_THROUGHPUT_CHROMA,
+ mode_lib->vba.DPPCLK,
+ v->BytePerPixelC,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.NumberOfCursors,
+ mode_lib->vba.CursorWidth,
+ mode_lib->vba.CursorBPP,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesC,
+ v->BlockHeight256BytesC,
+
+ /* Output */
+ v->DisplayPipeLineDeliveryTimeLuma,
+ v->DisplayPipeLineDeliveryTimeChroma,
+ v->DisplayPipeLineDeliveryTimeLumaPrefetch,
+ v->DisplayPipeLineDeliveryTimeChromaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeLuma,
+ v->DisplayPipeRequestDeliveryTimeChroma,
+ v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
+ v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
+ v->CursorRequestDeliveryTime,
+ v->CursorRequestDeliveryTimePrefetch);
+
+ dml32_CalculateMetaAndPTETimes(v->Use_One_Row_For_Frame,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.MinMetaChunkSizeBytes,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ v->DestinationLinesToRequestRowInVBlank,
+ v->DestinationLinesToRequestRowInImmediateFlip,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.PixelClock,
+ v->BytePerPixelY,
+ v->BytePerPixelC,
+ mode_lib->vba.SourceRotation,
+ v->dpte_row_height,
+ v->dpte_row_height_chroma,
+ v->meta_row_width,
+ v->meta_row_width_chroma,
+ v->meta_row_height,
+ v->meta_row_height_chroma,
+ v->meta_req_width,
+ v->meta_req_width_chroma,
+ v->meta_req_height,
+ v->meta_req_height_chroma,
+ v->dpte_group_bytes,
+ v->PTERequestSizeY,
+ v->PTERequestSizeC,
+ v->PixelPTEReqWidthY,
+ v->PixelPTEReqHeightY,
+ v->PixelPTEReqWidthC,
+ v->PixelPTEReqHeightC,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+
+ /* Output */
+ v->DST_Y_PER_PTE_ROW_NOM_L,
+ v->DST_Y_PER_PTE_ROW_NOM_C,
+ v->DST_Y_PER_META_ROW_NOM_L,
+ v->DST_Y_PER_META_ROW_NOM_C,
+ v->TimePerMetaChunkNominal,
+ v->TimePerChromaMetaChunkNominal,
+ v->TimePerMetaChunkVBlank,
+ v->TimePerChromaMetaChunkVBlank,
+ v->TimePerMetaChunkFlip,
+ v->TimePerChromaMetaChunkFlip,
+ v->time_per_pte_group_nom_luma,
+ v->time_per_pte_group_vblank_luma,
+ v->time_per_pte_group_flip_luma,
+ v->time_per_pte_group_nom_chroma,
+ v->time_per_pte_group_vblank_chroma,
+ v->time_per_pte_group_flip_chroma);
+
+ dml32_CalculateVMGroupAndRequestTimes(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HTotal,
+ v->BytePerPixelC,
+ v->DestinationLinesToRequestVMInVBlank,
+ v->DestinationLinesToRequestVMInImmediateFlip,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.PixelClock,
+ v->dpte_row_width_luma_ub,
+ v->dpte_row_width_chroma_ub,
+ v->vm_group_bytes,
+ v->dpde0_bytes_per_frame_ub_l,
+ v->dpde0_bytes_per_frame_ub_c,
+ v->meta_pte_bytes_per_frame_ub_l,
+ v->meta_pte_bytes_per_frame_ub_c,
+
+ /* Output */
+ v->TimePerVMGroupVBlank,
+ v->TimePerVMGroupFlip,
+ v->TimePerVMRequestVBlank,
+ v->TimePerVMRequestFlip);
+
+ // Min TTUVBlank
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
+ v->MinTTUVBlank[k] = dml_max4(v->Watermark.DRAMClockChangeWatermark,
+ v->Watermark.FCLKChangeWatermark, v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.UrgentWatermark);
+ } else if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]
+ == 1) {
+ v->MinTTUVBlank[k] = dml_max3(v->Watermark.FCLKChangeWatermark,
+ v->Watermark.StutterEnterPlusExitWatermark, v->Watermark.UrgentWatermark);
+ } else if (mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]
+ == 2) {
+ v->MinTTUVBlank[k] = dml_max(v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.UrgentWatermark);
+ } else {
+ v->MinTTUVBlank[k] = v->Watermark.UrgentWatermark;
+ }
+ if (!mode_lib->vba.DynamicMetadataEnable[k])
+ v->MinTTUVBlank[k] = mode_lib->vba.TCalc + v->MinTTUVBlank[k];
+ }
+
+ // DCC Configuration
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calculate DCC configuration for surface k=%d\n", __func__, k);
+#endif
+ dml32_CalculateDCCConfiguration(
+ mode_lib->vba.DCCEnable[k],
+ mode_lib->vba.DCCProgrammingAssumesScanDirectionUnknownFinal,
+ mode_lib->vba.SourcePixelFormat[k], mode_lib->vba.SurfaceWidthY[k],
+ mode_lib->vba.SurfaceWidthC[k],
+ mode_lib->vba.SurfaceHeightY[k],
+ mode_lib->vba.SurfaceHeightC[k],
+ mode_lib->vba.nomDETInKByte,
+ v->BlockHeight256BytesY[k],
+ v->BlockHeight256BytesC[k],
+ mode_lib->vba.SurfaceTiling[k],
+ v->BytePerPixelY[k],
+ v->BytePerPixelC[k],
+ v->BytePerPixelDETY[k],
+ v->BytePerPixelDETC[k],
+ (enum dm_rotation_angle) mode_lib->vba.SourceScan[k],
+ /* Output */
+ &v->DCCYMaxUncompressedBlock[k],
+ &v->DCCCMaxUncompressedBlock[k],
+ &v->DCCYMaxCompressedBlock[k],
+ &v->DCCCMaxCompressedBlock[k],
+ &v->DCCYIndependentBlock[k],
+ &v->DCCCIndependentBlock[k]);
+ }
+
+ // VStartup Adjustment
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ bool isInterlaceTiming;
+ double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k,
+ v->MinTTUVBlank[k]);
+#endif
+
+ v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
+ dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
+#endif
+
+ v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
+ if (mode_lib->vba.DynamicMetadataEnable[k] && mode_lib->vba.DynamicMetadataVMEnabled)
+ v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
+
+ isInterlaceTiming = (mode_lib->vba.Interlace[k] &&
+ !mode_lib->vba.ProgressiveToInterlaceUnitInOPP);
+
+ v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((mode_lib->vba.VTotal[k] -
+ mode_lib->vba.VFrontPorch[k]) / 2.0, 1.0) :
+ mode_lib->vba.VTotal[k]) - mode_lib->vba.VFrontPorch[k])
+ + dml_max(1.0,
+ dml_ceil(v->WritebackDelay[mode_lib->vba.VoltageLevel][k]
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0))
+ + dml_floor(4.0 * v->TSetup[k] / (mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k]), 1.0) / 4.0;
+
+ v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
+
+ if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k])
+ / mode_lib->vba.HTotal[k]) <= (isInterlaceTiming ? dml_floor((mode_lib->vba.VTotal[k]
+ - mode_lib->vba.VActive[k] - mode_lib->vba.VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
+ (int) (mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
+ - mode_lib->vba.VFrontPorch[k] - v->VStartup[k]))) {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
+ } else {
+ v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
+ dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
+ dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
+ dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, mode_lib->vba.HTotal[k]);
+ dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, mode_lib->vba.VTotal[k]);
+ dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, mode_lib->vba.VActive[k]);
+ dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, mode_lib->vba.VFrontPorch[k]);
+ dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
+ dml_print("DML::%s: k=%d, TSetup = %f\n", __func__, k, v->TSetup[k]);
+ dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
+ dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k,
+ v->VREADY_AT_OR_AFTER_VSYNC[k]);
+#endif
+ }
+
+ {
+ //Maximum Bandwidth Used
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k] == true
+ && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
+ WRBandwidth = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.HTotal[k] * mode_lib->vba.WritebackSourceHeight[k]
+ / mode_lib->vba.PixelClock[k]) * 4;
+ } else if (mode_lib->vba.WritebackEnable[k] == true) {
+ WRBandwidth = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.HTotal[k] * mode_lib->vba.WritebackSourceHeight[k]
+ / mode_lib->vba.PixelClock[k]) * 8;
+ }
+ TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
+ }
+
+ v->TotalDataReadBandwidth = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthSurfaceLuma[k]
+ + v->ReadBandwidthSurfaceChroma[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, TotalDataReadBandwidth = %f\n",
+ __func__, k, v->TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%d, ReadBandwidthSurfaceLuma = %f\n",
+ __func__, k, v->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%d, ReadBandwidthSurfaceChroma = %f\n",
+ __func__, k, v->ReadBandwidthSurfaceChroma[k]);
+#endif
+ }
+ }
+
+ // Stutter Efficiency
+ dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte,
+ mode_lib->vba.UsesMALLForPStateChange,
+ v->UnboundedRequestEnabled,
+ mode_lib->vba.MetaFIFOSizeInKEntries,
+ mode_lib->vba.ZeroSizeBufferEntries,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ROBBufferSizeInKByte,
+ v->TotalDataReadBandwidth,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.ReturnBW,
+ v->CompbufReservedSpace64B,
+ v->CompbufReservedSpaceZs,
+ mode_lib->vba.SRExitTime,
+ mode_lib->vba.SRExitZ8Time,
+ mode_lib->vba.SynchronizeTimingsFinal,
+ mode_lib->vba.BlendingAndTiming,
+ v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.Z8StutterEnterPlusExitWatermark,
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+ mode_lib->vba.Interlace,
+ v->MinTTUVBlank, mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.DETBufferSizeY,
+ v->BytePerPixelY,
+ v->BytePerPixelDETY,
+ v->SwathWidthY,
+ mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ mode_lib->vba.DCCRateLuma,
+ mode_lib->vba.DCCRateChroma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsLuma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsChroma,
+ mode_lib->vba.HTotal, mode_lib->vba.VTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.SourceRotation,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesC,
+ v->DCCYMaxUncompressedBlock,
+ v->DCCCMaxUncompressedBlock,
+ mode_lib->vba.VActive,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.WritebackEnable,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->meta_row_bw,
+ v->dpte_row_bw,
+ /* Output */
+ &v->StutterEfficiencyNotIncludingVBlank,
+ &v->StutterEfficiency,
+ &v->NumberOfStutterBurstsPerFrame,
+ &v->Z8StutterEfficiencyNotIncludingVBlank,
+ &v->Z8StutterEfficiency,
+ &v->Z8NumberOfStutterBurstsPerFrame,
+ &v->StutterPeriod,
+ &v->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ {
+ unsigned int dummy_integer[1];
+
+ // Calculate z8 stutter eff assuming 0 reserved space
+ dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte,
+ mode_lib->vba.UsesMALLForPStateChange,
+ v->UnboundedRequestEnabled,
+ mode_lib->vba.MetaFIFOSizeInKEntries,
+ mode_lib->vba.ZeroSizeBufferEntries,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ROBBufferSizeInKByte,
+ v->TotalDataReadBandwidth,
+ mode_lib->vba.DCFCLK,
+ mode_lib->vba.ReturnBW,
+ 0, //CompbufReservedSpace64B,
+ 0, //CompbufReservedSpaceZs,
+ mode_lib->vba.SRExitTime,
+ mode_lib->vba.SRExitZ8Time,
+ mode_lib->vba.SynchronizeTimingsFinal,
+ mode_lib->vba.BlendingAndTiming,
+ v->Watermark.StutterEnterPlusExitWatermark,
+ v->Watermark.Z8StutterEnterPlusExitWatermark,
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+ mode_lib->vba.Interlace,
+ v->MinTTUVBlank,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.DETBufferSizeY,
+ v->BytePerPixelY, v->BytePerPixelDETY,
+ v->SwathWidthY, mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ mode_lib->vba.DCCRateLuma,
+ mode_lib->vba.DCCRateChroma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsLuma,
+ mode_lib->vba.DCCFractionOfZeroSizeRequestsChroma,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.VTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.SourceRotation,
+ v->BlockHeight256BytesY,
+ v->BlockWidth256BytesY,
+ v->BlockHeight256BytesC,
+ v->BlockWidth256BytesC,
+ v->DCCYMaxUncompressedBlock,
+ v->DCCCMaxUncompressedBlock,
+ mode_lib->vba.VActive,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.WritebackEnable,
+ v->ReadBandwidthSurfaceLuma,
+ v->ReadBandwidthSurfaceChroma,
+ v->meta_row_bw, v->dpte_row_bw,
+
+ /* Output */
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0],
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1],
+ &dummy_integer[0],
+ &v->Z8StutterEfficiencyNotIncludingVBlankBestCase,
+ &v->Z8StutterEfficiencyBestCase,
+ &v->Z8NumberOfStutterBurstsPerFrameBestCase,
+ &v->StutterPeriodBestCase,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean);
+ }
+#else
+ v->Z8StutterEfficiencyNotIncludingVBlankBestCase = v->Z8StutterEfficiencyNotIncludingVBlank;
+ v->Z8StutterEfficiencyBestCase = v->Z8StutterEfficiency;
+ v->Z8NumberOfStutterBurstsPerFrameBestCase = v->Z8NumberOfStutterBurstsPerFrame;
+ v->StutterPeriodBestCase = v->StutterPeriod;
+#endif
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- END ---\n", __func__);
+#endif
+}
+
+static void mode_support_configuration(struct vba_vars_st *v,
+ struct display_mode_lib *mode_lib)
+{
+ int i, j;
+
+ for (i = v->soc.num_states - 1; i >= 0; i--) {
+ for (j = 0; j < 2; j++) {
+ if (mode_lib->vba.ScaleRatioAndTapsSupport == true
+ && mode_lib->vba.SourceFormatPixelAndScanSupport == true
+ && mode_lib->vba.ViewportSizeSupport[i][j] == true
+ && !mode_lib->vba.LinkRateDoesNotMatchDPVersion
+ && !mode_lib->vba.LinkRateForMultistreamNotIndicated
+ && !mode_lib->vba.BPPForMultistreamNotIndicated
+ && !mode_lib->vba.MultistreamWithHDMIOreDP
+ && !mode_lib->vba.ExceededMultistreamSlots[i]
+ && !mode_lib->vba.MSOOrODMSplitWithNonDPLink
+ && !mode_lib->vba.NotEnoughLanesForMSO
+ && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420
+ && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP
+ && !mode_lib->vba.DSC422NativeNotSupported
+ && !mode_lib->vba.MPCCombineMethodIncompatible
+ && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true
+ && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true
+ && mode_lib->vba.NotEnoughDSCUnits[i] == false
+ && !mode_lib->vba.NotEnoughDSCSlices[i]
+ && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
+ && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen
+ && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false
+ && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i]
+ && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false
+ && !mode_lib->vba.InvalidCombinationOfMALLUseForPState
+ && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
+ && mode_lib->vba.ROBSupport[i][j] == true
+ && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true
+ && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true
+ && mode_lib->vba.NumberOfOTGSupport == true
+ && mode_lib->vba.NumberOfHDMIFRLSupport == true
+ && mode_lib->vba.EnoughWritebackUnits == true
+ && mode_lib->vba.WritebackLatencySupport == true
+ && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true
+ && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true
+ && mode_lib->vba.ViewportExceedsSurface == false
+ && mode_lib->vba.PrefetchSupported[i][j] == true
+ && mode_lib->vba.VActiveBandwithSupport[i][j] == true
+ && mode_lib->vba.DynamicMetadataSupported[i][j] == true
+ && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true
+ && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true
+ && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true
+ && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true
+ && mode_lib->vba.NonsupportedDSCInputBPC == false
+ && !mode_lib->vba.ExceededMALLSize
+ && ((mode_lib->vba.HostVMEnable == false
+ && !mode_lib->vba.ImmediateFlipRequiredFinal)
+ || mode_lib->vba.ImmediateFlipSupportedForState[i][j])
+ && (!mode_lib->vba.DRAMClockChangeRequirementFinal
+ || i == v->soc.num_states - 1
+ || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported)
+ && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1
+ || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported)
+ && (!mode_lib->vba.USRRetrainingRequiredFinal
+ || mode_lib->vba.USRRetrainingSupport[i][j])) {
+ mode_lib->vba.ModeSupport[i][j] = true;
+ } else {
+ mode_lib->vba.ModeSupport[i][j] = false;
+ }
+ }
+ }
+}
+
+void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
+{
+ struct vba_vars_st *v = &mode_lib->vba;
+ int i, j;
+ unsigned int k, m;
+ unsigned int MaximumMPCCombine;
+ unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth;
+ unsigned int TotalSlots;
+ bool CompBufReservedSpaceNeedAdjustment;
+ bool CompBufReservedSpaceNeedAdjustmentSingleDPP;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: called\n", __func__);
+#endif
+
+ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+
+ /*Scale Ratio, taps Support Check*/
+
+ mode_lib->vba.ScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.ScalerEnabled[k] == false
+ && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha)
+ || mode_lib->vba.HRatio[k] != 1.0 || mode_lib->vba.htaps[k] != 1.0
+ || mode_lib->vba.VRatio[k] != 1.0 || mode_lib->vba.vtaps[k] != 1.0)) {
+ mode_lib->vba.ScaleRatioAndTapsSupport = false;
+ } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 || mode_lib->vba.htaps[k] < 1.0
+ || mode_lib->vba.htaps[k] > 8.0
+ || (mode_lib->vba.htaps[k] > 1.0 && (mode_lib->vba.htaps[k] % 2) == 1)
+ || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio
+ || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio
+ || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k]
+ || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k]
+ || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe
+ && (mode_lib->vba.VTAPsChroma[k] < 1
+ || mode_lib->vba.VTAPsChroma[k] > 8
+ || mode_lib->vba.HTAPsChroma[k] < 1
+ || mode_lib->vba.HTAPsChroma[k] > 8
+ || (mode_lib->vba.HTAPsChroma[k] > 1
+ && mode_lib->vba.HTAPsChroma[k] % 2
+ == 1)
+ || mode_lib->vba.HRatioChroma[k]
+ > mode_lib->vba.MaxHSCLRatio
+ || mode_lib->vba.VRatioChroma[k]
+ > mode_lib->vba.MaxVSCLRatio
+ || mode_lib->vba.HRatioChroma[k]
+ > mode_lib->vba.HTAPsChroma[k]
+ || mode_lib->vba.VRatioChroma[k]
+ > mode_lib->vba.VTAPsChroma[k]))) {
+ mode_lib->vba.ScaleRatioAndTapsSupport = false;
+ }
+ }
+
+ /*Source Format, Pixel Format and Scan Support Check*/
+ mode_lib->vba.SourceFormatPixelAndScanSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
+ && (!(!IsVertical((enum dm_rotation_angle) mode_lib->vba.SourceScan[k]))
+ || mode_lib->vba.DCCEnable[k] == true)) {
+ mode_lib->vba.SourceFormatPixelAndScanSupport = false;
+ }
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateBytePerPixelAndBlockSizes(
+ mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.SurfaceTiling[k],
+
+ /* Output */
+ &mode_lib->vba.BytePerPixelY[k],
+ &mode_lib->vba.BytePerPixelC[k],
+ &mode_lib->vba.BytePerPixelInDETY[k],
+ &mode_lib->vba.BytePerPixelInDETC[k],
+ &mode_lib->vba.Read256BlockHeightY[k],
+ &mode_lib->vba.Read256BlockHeightC[k],
+ &mode_lib->vba.Read256BlockWidthY[k],
+ &mode_lib->vba.Read256BlockWidthC[k],
+ &mode_lib->vba.MicroTileHeightY[k],
+ &mode_lib->vba.MicroTileHeightC[k],
+ &mode_lib->vba.MicroTileWidthY[k],
+ &mode_lib->vba.MicroTileWidthC[k]);
+ }
+
+ /*Bandwidth Support Check*/
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!IsVertical(mode_lib->vba.SourceRotation[k])) {
+ v->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k];
+ v->SwathWidthCSingleDPP[k] = mode_lib->vba.ViewportWidthChroma[k];
+ } else {
+ v->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k];
+ v->SwathWidthCSingleDPP[k] = mode_lib->vba.ViewportHeightChroma[k];
+ }
+ }
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]
+ / 2.0;
+ }
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true && mode_lib->vba.WritebackPixelFormat[k] == dm_444_64) {
+ v->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.WritebackSourceHeight[k] * mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k]) * 8.0;
+ } else if (mode_lib->vba.WritebackEnable[k] == true) {
+ v->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
+ * mode_lib->vba.WritebackDestinationHeight[k]
+ / (mode_lib->vba.WritebackSourceHeight[k] * mode_lib->vba.HTotal[k]
+ / mode_lib->vba.PixelClock[k]) * 4.0;
+ } else {
+ v->WriteBandwidth[k] = 0.0;
+ }
+ }
+
+ /*Writeback Latency support check*/
+
+ mode_lib->vba.WritebackLatencySupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true
+ && (v->WriteBandwidth[k]
+ > mode_lib->vba.WritebackInterfaceBufferSize * 1024
+ / mode_lib->vba.WritebackLatency)) {
+ mode_lib->vba.WritebackLatencySupport = false;
+ }
+ }
+
+ /*Writeback Mode Support Check*/
+ mode_lib->vba.EnoughWritebackUnits = true;
+ mode_lib->vba.TotalNumberOfActiveWriteback = 0;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true)
+ mode_lib->vba.TotalNumberOfActiveWriteback = mode_lib->vba.TotalNumberOfActiveWriteback + 1;
+ }
+
+ if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback)
+ mode_lib->vba.EnoughWritebackUnits = false;
+
+ /*Writeback Scale Ratio and Taps Support Check*/
+ mode_lib->vba.WritebackScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio
+ || mode_lib->vba.WritebackVRatio[k] > mode_lib->vba.WritebackMaxVSCLRatio
+ || mode_lib->vba.WritebackHRatio[k] < mode_lib->vba.WritebackMinHSCLRatio
+ || mode_lib->vba.WritebackVRatio[k] < mode_lib->vba.WritebackMinVSCLRatio
+ || mode_lib->vba.WritebackHTaps[k] > mode_lib->vba.WritebackMaxHSCLTaps
+ || mode_lib->vba.WritebackVTaps[k] > mode_lib->vba.WritebackMaxVSCLTaps
+ || mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackHTaps[k]
+ || mode_lib->vba.WritebackVRatio[k] > mode_lib->vba.WritebackVTaps[k]
+ || (mode_lib->vba.WritebackHTaps[k] > 2.0
+ && ((mode_lib->vba.WritebackHTaps[k] % 2) == 1))) {
+ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
+ }
+ if (2.0 * mode_lib->vba.WritebackDestinationWidth[k] * (mode_lib->vba.WritebackVTaps[k] - 1)
+ * 57 > mode_lib->vba.WritebackLineBufferSize) {
+ mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(mode_lib->vba.HRatio[k], mode_lib->vba.HRatioChroma[k],
+ mode_lib->vba.VRatio[k], mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.MaxDCHUBToPSCLThroughput, mode_lib->vba.MaxPSCLToLBThroughput,
+ mode_lib->vba.PixelClock[k], mode_lib->vba.SourcePixelFormat[k],
+ mode_lib->vba.htaps[k], mode_lib->vba.HTAPsChroma[k], mode_lib->vba.vtaps[k],
+ mode_lib->vba.VTAPsChroma[k],
+ /* Output */
+ &mode_lib->vba.PSCL_FACTOR[k], &mode_lib->vba.PSCL_FACTOR_CHROMA[k],
+ &mode_lib->vba.MinDPPCLKUsingSingleDPP[k]);
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+
+ if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 8192;
+ } else if (!IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelC[k] > 0
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 7680;
+ } else if (IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelC[k] > 0
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 4320;
+ } else if (mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 3840;
+ } else if (IsVertical(mode_lib->vba.SourceRotation[k]) && v->BytePerPixelY[k] == 8 &&
+ mode_lib->vba.DCCEnable[k] == true) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 3072;
+ } else {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma = 6144;
+ }
+
+ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 || mode_lib->vba.SourcePixelFormat[k] == dm_420_10
+ || mode_lib->vba.SourcePixelFormat[k] == dm_420_12) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma / 2.0;
+ } else {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma;
+ }
+ v->MaximumSwathWidthInLineBufferLuma = mode_lib->vba.LineBufferSizeFinal
+ * dml_max(mode_lib->vba.HRatio[k], 1.0) / mode_lib->vba.LBBitPerPixel[k]
+ / (mode_lib->vba.vtaps[k] + dml_max(dml_ceil(mode_lib->vba.VRatio[k], 1.0) - 2, 0.0));
+ if (v->BytePerPixelC[k] == 0.0) {
+ v->MaximumSwathWidthInLineBufferChroma = 0;
+ } else {
+ v->MaximumSwathWidthInLineBufferChroma = mode_lib->vba.LineBufferSizeFinal
+ * dml_max(mode_lib->vba.HRatioChroma[k], 1.0) / mode_lib->vba.LBBitPerPixel[k]
+ / (mode_lib->vba.VTAPsChroma[k]
+ + dml_max(dml_ceil(mode_lib->vba.VRatioChroma[k], 1.0) - 2,
+ 0.0));
+ }
+ v->MaximumSwathWidthLuma[k] = dml_min(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportLuma,
+ v->MaximumSwathWidthInLineBufferLuma);
+ v->MaximumSwathWidthChroma[k] = dml_min(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaximumSwathWidthSupportChroma,
+ v->MaximumSwathWidthInLineBufferChroma);
+ }
+
+ dml32_CalculateSwathAndDETConfiguration(
+ &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
+ mode_lib->vba.DETSizeOverride,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.MaxTotalDETInKByte,
+ mode_lib->vba.MinCompressedBufferSizeInKByte,
+ 1, /* ForceSingleDPP */
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.nomDETInKByte,
+ mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.ip.pixel_chunk_size_kbytes,
+ mode_lib->vba.ip.rob_buffer_size_kbytes,
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
+ mode_lib->vba.Output,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.MaximumSwathWidthLuma,
+ mode_lib->vba.MaximumSwathWidthChroma,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SurfaceTiling,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.Read256BlockHeightY,
+ mode_lib->vba.Read256BlockHeightC,
+ mode_lib->vba.Read256BlockWidthY,
+ mode_lib->vba.Read256BlockWidthC,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_odm_mode,
+ mode_lib->vba.BlendingAndTiming,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.BytePerPixelInDETY,
+ mode_lib->vba.BytePerPixelInDETC,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0], /* Integer DPPPerSurface[] */
+
+ /* Output */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1], /* Long swath_width_luma_ub[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[2], /* Long swath_width_chroma_ub[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_double_array[0], /* Long SwathWidth[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_double_array[1], /* Long SwathWidthChroma[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[3], /* Integer SwathHeightY[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[4], /* Integer SwathHeightC[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[5], /* Long DETBufferSizeInKByte[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[6], /* Long DETBufferSizeY[] */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7], /* Long DETBufferSizeC[] */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0][0], /* bool *UnboundedRequestEnabled */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0][0], /* Long *CompressedBufferSizeInkByte */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1][0], /* Long *CompBufReservedSpaceKBytes */
+ &CompBufReservedSpaceNeedAdjustmentSingleDPP,
+ mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool *ViewportSizeSupport */
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_reduce_voltage_and_clocks)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = true;
+ if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_always_when_possible)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = true;
+ }
+ mode_lib->vba.MPCCombineMethodIncompatible = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage
+ && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible;
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] = 0;
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = true;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC = dm_odm_combine_mode_disabled;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC = dm_odm_combine_mode_disabled;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateODMMode(
+ mode_lib->vba.MaximumPixelsPerLinePerDSCUnit,
+ mode_lib->vba.HActive[k],
+ mode_lib->vba.Output[k],
+ mode_lib->vba.ODMUse[k],
+ mode_lib->vba.MaxDispclk[i],
+ mode_lib->vba.MaxDispclk[v->soc.num_states - 1],
+ false,
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j],
+ mode_lib->vba.MaxNumDPP,
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKRampingMargin,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed,
+
+ /* Output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC);
+
+ dml32_CalculateODMMode(
+ mode_lib->vba.MaximumPixelsPerLinePerDSCUnit,
+ mode_lib->vba.HActive[k],
+ mode_lib->vba.Output[k],
+ mode_lib->vba.ODMUse[k],
+ mode_lib->vba.MaxDispclk[i],
+ mode_lib->vba.MaxDispclk[v->soc.num_states - 1],
+ true,
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j],
+ mode_lib->vba.MaxNumDPP,
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKRampingMargin,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed,
+
+ /* Output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC);
+
+ dml32_CalculateOutputLink(
+ mode_lib->vba.PHYCLKPerState[i],
+ mode_lib->vba.PHYCLKD18PerState[i],
+ mode_lib->vba.PHYCLKD32PerState[i],
+ mode_lib->vba.Downspreading,
+ (mode_lib->vba.BlendingAndTiming[k] == k),
+ mode_lib->vba.Output[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.HTotal[k],
+ mode_lib->vba.HActive[k],
+ mode_lib->vba.PixelClockBackEnd[k],
+ mode_lib->vba.ForcedOutputLinkBPP[k],
+ mode_lib->vba.DSCInputBitPerComponent[k],
+ mode_lib->vba.NumberOfDSCSlices[k],
+ mode_lib->vba.AudioSampleRate[k],
+ mode_lib->vba.AudioSampleLayout[k],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC,
+ mode_lib->vba.DSCEnable[k],
+ mode_lib->vba.OutputLinkDPLanes[k],
+ mode_lib->vba.OutputLinkDPRate[k],
+
+ /* Output */
+ &mode_lib->vba.RequiresDSC[i][k],
+ &mode_lib->vba.RequiresFEC[i][k],
+ &mode_lib->vba.OutputBppPerState[i][k],
+ &mode_lib->vba.OutputTypePerState[i][k],
+ &mode_lib->vba.OutputRatePerState[i][k],
+ &mode_lib->vba.RequiredSlots[i][k]);
+
+ if (mode_lib->vba.RequiresDSC[i][k] == false) {
+ mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC;
+ mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] =
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC;
+ if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC)
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC;
+ } else {
+ mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC;
+ mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] =
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC;
+ if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC)
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC;
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 4;
+ } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 2;
+ } else if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_never) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 1;
+ } else if (dml32_RoundToDFSGranularity(
+ mode_lib->vba.MinDPPCLKUsingSingleDPP[k]
+ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
+ / 100), 1,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed) <= mode_lib->vba.MaxDppclk[i] &&
+ mode_lib->vba.SingleDPPViewportSizeSupportPerSurface[k] == true) {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 1;
+ } else if (mode_lib->vba.TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP) {
+ mode_lib->vba.MPCCombine[i][j][k] = true;
+ mode_lib->vba.NoOfDPP[i][j][k] = 2;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + 1;
+ } else {
+ mode_lib->vba.MPCCombine[i][j][k] = false;
+ mode_lib->vba.NoOfDPP[i][j][k] = 1;
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ }
+ }
+
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = true;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.NoOfDPP[i][j][k] == 1)
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] =
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] + 1;
+ if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
+ || mode_lib->vba.SourcePixelFormat[k] == dm_420_10
+ || mode_lib->vba.SourcePixelFormat[k] == dm_420_12
+ || mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = false;
+ }
+ }
+
+ // if TotalNumberOfActiveDPP is > 1, then there should be no unbounded req mode (hw limitation), the comp buf reserved adjustment is not needed regardless
+ // if TotalNumberOfActiveDPP is == 1, then will use the SingleDPP version of unbounded_req for the decision
+ CompBufReservedSpaceNeedAdjustment = (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > 1) ? 0 : CompBufReservedSpaceNeedAdjustmentSingleDPP;
+
+
+
+ if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j], v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma,
+ mode_lib->vba.Output[0],
+ mode_lib->vba.SurfaceTiling[0],
+ CompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) {
+ while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP
+ || mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = 0;
+ NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.MPCCombineUse[k]
+ != dm_mpc_never &&
+ mode_lib->vba.MPCCombineUse[k] != dm_mpc_reduce_voltage &&
+ mode_lib->vba.ReadBandwidthLuma[k] +
+ mode_lib->vba.ReadBandwidthChroma[k] >
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth &&
+ (mode_lib->vba.ODMCombineEnablePerState[i][k] !=
+ dm_odm_combine_mode_2to1 &&
+ mode_lib->vba.ODMCombineEnablePerState[i][k] !=
+ dm_odm_combine_mode_4to1) &&
+ mode_lib->vba.MPCCombine[i][j][k] == false) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth =
+ mode_lib->vba.ReadBandwidthLuma[k]
+ + mode_lib->vba.ReadBandwidthChroma[k];
+ NumberOfNonCombinedSurfaceOfMaximumBandwidth = k;
+ }
+ }
+ mode_lib->vba.MPCCombine[i][j][NumberOfNonCombinedSurfaceOfMaximumBandwidth] =
+ true;
+ mode_lib->vba.NoOfDPP[i][j][NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2;
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfActiveDPP[i][j] + 1;
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] =
+ mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] - 1;
+ }
+ }
+
+ //DISPCLK/DPPCLK
+ mode_lib->vba.WritebackRequiredDISPCLK = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.WritebackEnable[k]) {
+ mode_lib->vba.WritebackRequiredDISPCLK = dml_max(
+ mode_lib->vba.WritebackRequiredDISPCLK,
+ dml32_CalculateWriteBackDISPCLK(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.PixelClock[k],
+ mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackHTaps[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackSourceWidth[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.HTotal[k],
+ mode_lib->vba.WritebackLineBufferSize,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed));
+ }
+ }
+
+ mode_lib->vba.RequiredDISPCLK[i][j] = mode_lib->vba.WritebackRequiredDISPCLK;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.RequiredDISPCLK[i][j] = dml_max(mode_lib->vba.RequiredDISPCLK[i][j],
+ mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k]);
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+
+ dml32_CalculateDPPCLK(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading,
+ mode_lib->vba.DISPCLKDPPCLKVCOSpeed, mode_lib->vba.MinDPPCLKUsingSingleDPP,
+ mode_lib->vba.NoOfDPPThisState,
+ /* Output */
+ &mode_lib->vba.GlobalDPPCLK, mode_lib->vba.RequiredDPPCLKThisState);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ mode_lib->vba.RequiredDPPCLK[i][j][k] = mode_lib->vba.RequiredDPPCLKThisState[k];
+
+ mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] = !((mode_lib->vba.RequiredDISPCLK[i][j]
+ > mode_lib->vba.MaxDispclk[i])
+ || (mode_lib->vba.GlobalDPPCLK > mode_lib->vba.MaxDppclk[i]));
+
+ if (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP)
+ mode_lib->vba.TotalAvailablePipesSupport[i][j] = false;
+ } // j
+ } // i (VOLTAGE_STATE)
+
+ /* Total Available OTG, HDMIFRL, DP Support Check */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = 0;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = 0;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG + 1;
+ if (mode_lib->vba.Output[k] == dm_dp2p0) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 + 1;
+ if (mode_lib->vba.OutputMultistreamId[k]
+ == k || mode_lib->vba.OutputMultistreamEn[k] == false) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs + 1;
+ }
+ }
+ }
+ }
+
+ mode_lib->vba.NumberOfOTGSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG);
+ mode_lib->vba.NumberOfHDMIFRLSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs);
+ mode_lib->vba.NumberOfDP2p0Support = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams
+ && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs);
+
+ /* Display IO and DSC Support Check */
+ mode_lib->vba.NonsupportedDSCInputBPC = false;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0
+ || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0
+ || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0
+ || mode_lib->vba.DSCInputBitPerComponent[k] >
+ mode_lib->vba.MaximumDSCBitsPerComponent)) {
+ mode_lib->vba.NonsupportedDSCInputBPC = true;
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ mode_lib->vba.ExceededMultistreamSlots[i] = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k) {
+ TotalSlots = mode_lib->vba.RequiredSlots[i][k];
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.OutputMultistreamId[j] == k)
+ TotalSlots = TotalSlots + mode_lib->vba.RequiredSlots[i][j];
+ }
+ if (mode_lib->vba.Output[k] == dm_dp && TotalSlots > 63)
+ mode_lib->vba.ExceededMultistreamSlots[i] = true;
+ if (mode_lib->vba.Output[k] == dm_dp2p0 && TotalSlots > 64)
+ mode_lib->vba.ExceededMultistreamSlots[i] = true;
+ }
+ }
+ mode_lib->vba.LinkCapacitySupport[i] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0
+ || mode_lib->vba.Output[k] == dm_edp
+ || mode_lib->vba.Output[k] == dm_hdmi)
+ && mode_lib->vba.OutputBppPerState[i][k] == 0) {
+ mode_lib->vba.LinkCapacitySupport[i] = false;
+ }
+ }
+ }
+
+ mode_lib->vba.P2IWith420 = false;
+ mode_lib->vba.DSCOnlyIfNecessaryWithBPP = false;
+ mode_lib->vba.DSC422NativeNotSupported = false;
+ mode_lib->vba.LinkRateDoesNotMatchDPVersion = false;
+ mode_lib->vba.LinkRateForMultistreamNotIndicated = false;
+ mode_lib->vba.BPPForMultistreamNotIndicated = false;
+ mode_lib->vba.MultistreamWithHDMIOreDP = false;
+ mode_lib->vba.MSOOrODMSplitWithNonDPLink = false;
+ mode_lib->vba.NotEnoughLanesForMSO = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0
+ || mode_lib->vba.Output[k] == dm_edp
+ || mode_lib->vba.Output[k] == dm_hdmi)) {
+ if (mode_lib->vba.OutputFormat[k]
+ == dm_420 && mode_lib->vba.Interlace[k] == 1 &&
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)
+ mode_lib->vba.P2IWith420 = true;
+
+ if (mode_lib->vba.DSCEnable[k] && mode_lib->vba.ForcedOutputLinkBPP[k] != 0)
+ mode_lib->vba.DSCOnlyIfNecessaryWithBPP = true;
+ if ((mode_lib->vba.DSCEnable[k] || mode_lib->vba.DSCEnable[k])
+ && mode_lib->vba.OutputFormat[k] == dm_n422
+ && !mode_lib->vba.DSC422NativeSupport)
+ mode_lib->vba.DSC422NativeNotSupported = true;
+
+ if (((mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr2
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_hbr3)
+ && mode_lib->vba.Output[k] != dm_dp && mode_lib->vba.Output[k] != dm_edp)
+ || ((mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr10
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5
+ || mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_uhbr20)
+ && mode_lib->vba.Output[k] != dm_dp2p0))
+ mode_lib->vba.LinkRateDoesNotMatchDPVersion = true;
+
+ if (mode_lib->vba.OutputMultistreamEn[k] == true) {
+ if (mode_lib->vba.OutputMultistreamId[k] == k
+ && mode_lib->vba.OutputLinkDPRate[k] == dm_dp_rate_na)
+ mode_lib->vba.LinkRateForMultistreamNotIndicated = true;
+ if (mode_lib->vba.OutputMultistreamId[k] == k && mode_lib->vba.ForcedOutputLinkBPP[k] == 0)
+ mode_lib->vba.BPPForMultistreamNotIndicated = true;
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.OutputMultistreamId[k] == j && mode_lib->vba.OutputMultistreamEn[k]
+ && mode_lib->vba.ForcedOutputLinkBPP[k] == 0)
+ mode_lib->vba.BPPForMultistreamNotIndicated = true;
+ }
+ }
+
+ if ((mode_lib->vba.Output[k] == dm_edp || mode_lib->vba.Output[k] == dm_hdmi)) {
+ if (mode_lib->vba.OutputMultistreamId[k] == k && mode_lib->vba.OutputMultistreamEn[k])
+ mode_lib->vba.MultistreamWithHDMIOreDP = true;
+
+ for (j = 0; j < mode_lib->vba.NumberOfActiveSurfaces; ++j) {
+ if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == j)
+ mode_lib->vba.MultistreamWithHDMIOreDP = true;
+ }
+ }
+
+ if (mode_lib->vba.Output[k] != dm_dp
+ && (mode_lib->vba.ODMUse[k] == dm_odm_split_policy_1to2
+ || mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to2
+ || mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to4))
+ mode_lib->vba.MSOOrODMSplitWithNonDPLink = true;
+
+ if ((mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to2
+ && mode_lib->vba.OutputLinkDPLanes[k] < 2)
+ || (mode_lib->vba.ODMUse[k] == dm_odm_mso_policy_1to4
+ && mode_lib->vba.OutputLinkDPLanes[k] < 4))
+ mode_lib->vba.NotEnoughLanesForMSO = true;
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && dml32_RequiredDTBCLK(mode_lib->vba.RequiresDSC[i][k],
+ mode_lib->vba.PixelClockBackEnd[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.OutputBppPerState[i][k],
+ mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.HTotal[k],
+ mode_lib->vba.HActive[k], mode_lib->vba.AudioSampleRate[k],
+ mode_lib->vba.AudioSampleLayout[k])
+ > mode_lib->vba.DTBCLKPerState[i]) {
+ mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] = true;
+ }
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ mode_lib->vba.ODMCombine2To1SupportCheckOK[i] = true;
+ mode_lib->vba.ODMCombine4To1SupportCheckOK[i] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1
+ && mode_lib->vba.Output[k] == dm_hdmi) {
+ mode_lib->vba.ODMCombine2To1SupportCheckOK[i] = false;
+ }
+ if (mode_lib->vba.BlendingAndTiming[k] == k
+ && mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
+ && (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_edp
+ || mode_lib->vba.Output[k] == dm_hdmi)) {
+ mode_lib->vba.ODMCombine4To1SupportCheckOK[i] = false;
+ }
+ }
+ }
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = false;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ if (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0
+ || mode_lib->vba.Output[k] == dm_edp) {
+ if (mode_lib->vba.OutputFormat[k] == dm_420) {
+ mode_lib->vba.DSCFormatFactor = 2;
+ } else if (mode_lib->vba.OutputFormat[k] == dm_444) {
+ mode_lib->vba.DSCFormatFactor = 1;
+ } else if (mode_lib->vba.OutputFormat[k] == dm_n422) {
+ mode_lib->vba.DSCFormatFactor = 2;
+ } else {
+ mode_lib->vba.DSCFormatFactor = 1;
+ }
+ if (mode_lib->vba.RequiresDSC[i][k] == true) {
+ if (mode_lib->vba.ODMCombineEnablePerState[i][k]
+ == dm_odm_combine_mode_4to1) {
+ if (mode_lib->vba.PixelClockBackEnd[k] / 12.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i])
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true;
+ } else if (mode_lib->vba.ODMCombineEnablePerState[i][k]
+ == dm_odm_combine_mode_2to1) {
+ if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i])
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true;
+ } else {
+ if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i])
+ mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] = true;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* Check DSC Unit and Slices Support */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0;
+
+ for (i = 0; i < v->soc.num_states; ++i) {
+ mode_lib->vba.NotEnoughDSCUnits[i] = false;
+ mode_lib->vba.NotEnoughDSCSlices[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0;
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.RequiresDSC[i][k] == true) {
+ if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
+ if (mode_lib->vba.HActive[k]
+ > 4 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit)
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 4;
+ if (mode_lib->vba.NumberOfDSCSlices[k] > 16)
+ mode_lib->vba.NotEnoughDSCSlices[i] = true;
+ } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
+ if (mode_lib->vba.HActive[k]
+ > 2 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit)
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 2;
+ if (mode_lib->vba.NumberOfDSCSlices[k] > 8)
+ mode_lib->vba.NotEnoughDSCSlices[i] = true;
+ } else {
+ if (mode_lib->vba.HActive[k] > mode_lib->vba.MaximumPixelsPerLinePerDSCUnit)
+ mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 1;
+ if (mode_lib->vba.NumberOfDSCSlices[k] > 4)
+ mode_lib->vba.NotEnoughDSCSlices[i] = true;
+ }
+ }
+ }
+ if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC)
+ mode_lib->vba.NotEnoughDSCUnits[i] = true;
+ }
+
+ /*DSC Delay per state*/
+ for (i = 0; i < v->soc.num_states; ++i) {
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.DSCDelayPerState[i][k] = dml32_DSCDelayRequirement(
+ mode_lib->vba.RequiresDSC[i][k], mode_lib->vba.ODMCombineEnablePerState[i][k],
+ mode_lib->vba.DSCInputBitPerComponent[k],
+ mode_lib->vba.OutputBppPerState[i][k], mode_lib->vba.HActive[k],
+ mode_lib->vba.HTotal[k], mode_lib->vba.NumberOfDSCSlices[k],
+ mode_lib->vba.OutputFormat[k], mode_lib->vba.Output[k],
+ mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k]);
+ }
+
+ m = 0;
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) {
+ for (j = 0; j <= mode_lib->vba.NumberOfActiveSurfaces - 1; j++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == m &&
+ mode_lib->vba.RequiresDSC[i][m] == true) {
+ mode_lib->vba.DSCDelayPerState[i][k] =
+ mode_lib->vba.DSCDelayPerState[i][m];
+ }
+ }
+ }
+ }
+ }
+
+ //Calculate Swath, DET Configuration, DCFCLKDeepSleep
+ //
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.RequiredDPPCLKThisState[k] = mode_lib->vba.RequiredDPPCLK[i][j][k];
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+ mode_lib->vba.ODMCombineEnableThisState[k] =
+ mode_lib->vba.ODMCombineEnablePerState[i][k];
+ }
+
+ dml32_CalculateSwathAndDETConfiguration(
+ &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
+ mode_lib->vba.DETSizeOverride,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.ConfigReturnBufferSizeInKByte,
+ mode_lib->vba.MaxTotalDETInKByte,
+ mode_lib->vba.MinCompressedBufferSizeInKByte,
+ false, /* ForceSingleDPP */
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.nomDETInKByte,
+ mode_lib->vba.UseUnboundedRequesting,
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ mode_lib->vba.ip.pixel_chunk_size_kbytes,
+ mode_lib->vba.ip.rob_buffer_size_kbytes,
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
+ mode_lib->vba.Output,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.MaximumSwathWidthLuma,
+ mode_lib->vba.MaximumSwathWidthChroma,
+ mode_lib->vba.SourceRotation,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.SourcePixelFormat,
+ mode_lib->vba.SurfaceTiling,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.Read256BlockHeightY,
+ mode_lib->vba.Read256BlockHeightC,
+ mode_lib->vba.Read256BlockWidthY,
+ mode_lib->vba.Read256BlockWidthC,
+ mode_lib->vba.ODMCombineEnableThisState,
+ mode_lib->vba.BlendingAndTiming,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.BytePerPixelInDETY,
+ mode_lib->vba.BytePerPixelInDETC,
+ mode_lib->vba.HActive,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.NoOfDPPThisState,
+ /* Output */
+ mode_lib->vba.swath_width_luma_ub_this_state,
+ mode_lib->vba.swath_width_chroma_ub_this_state,
+ mode_lib->vba.SwathWidthYThisState,
+ mode_lib->vba.SwathWidthCThisState,
+ mode_lib->vba.SwathHeightYThisState,
+ mode_lib->vba.SwathHeightCThisState,
+ mode_lib->vba.DETBufferSizeInKByteThisState,
+ mode_lib->vba.DETBufferSizeYThisState,
+ mode_lib->vba.DETBufferSizeCThisState,
+ &mode_lib->vba.UnboundedRequestEnabledThisState,
+ &mode_lib->vba.CompressedBufferSizeInkByteThisState,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], /* Long CompBufReservedSpaceKBytes */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0],
+ &mode_lib->vba.ViewportSizeSupport[i][j]);
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.swath_width_luma_ub_all_states[i][j][k] =
+ mode_lib->vba.swath_width_luma_ub_this_state[k];
+ mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k] =
+ mode_lib->vba.swath_width_chroma_ub_this_state[k];
+ mode_lib->vba.SwathWidthYAllStates[i][j][k] = mode_lib->vba.SwathWidthYThisState[k];
+ mode_lib->vba.SwathWidthCAllStates[i][j][k] = mode_lib->vba.SwathWidthCThisState[k];
+ mode_lib->vba.SwathHeightYAllStates[i][j][k] = mode_lib->vba.SwathHeightYThisState[k];
+ mode_lib->vba.SwathHeightCAllStates[i][j][k] = mode_lib->vba.SwathHeightCThisState[k];
+ mode_lib->vba.UnboundedRequestEnabledAllStates[i][j] =
+ mode_lib->vba.UnboundedRequestEnabledThisState;
+ mode_lib->vba.CompressedBufferSizeInkByteAllStates[i][j] =
+ mode_lib->vba.CompressedBufferSizeInkByteThisState;
+ mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k] =
+ mode_lib->vba.DETBufferSizeInKByteThisState[k];
+ mode_lib->vba.DETBufferSizeYAllStates[i][j][k] =
+ mode_lib->vba.DETBufferSizeYThisState[k];
+ mode_lib->vba.DETBufferSizeCAllStates[i][j][k] =
+ mode_lib->vba.DETBufferSizeCThisState[k];
+ }
+ }
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0]
+ * mode_lib->vba.CursorBPP[k][0] / 8.0
+ / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
+ }
+
+ dml32_CalculateSurfaceSizeInMall(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.DCCEnable,
+ mode_lib->vba.ViewportStationary,
+ mode_lib->vba.ViewportXStartY,
+ mode_lib->vba.ViewportYStartY,
+ mode_lib->vba.ViewportXStartC,
+ mode_lib->vba.ViewportYStartC,
+ mode_lib->vba.ViewportWidth,
+ mode_lib->vba.ViewportHeight,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.ViewportWidthChroma,
+ mode_lib->vba.ViewportHeightChroma,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.SurfaceWidthY,
+ mode_lib->vba.SurfaceWidthC,
+ mode_lib->vba.SurfaceHeightY,
+ mode_lib->vba.SurfaceHeightC,
+ mode_lib->vba.Read256BlockWidthY,
+ mode_lib->vba.Read256BlockWidthC,
+ mode_lib->vba.Read256BlockHeightY,
+ mode_lib->vba.Read256BlockHeightC,
+ mode_lib->vba.MicroTileWidthY,
+ mode_lib->vba.MicroTileWidthC,
+ mode_lib->vba.MicroTileHeightY,
+ mode_lib->vba.MicroTileHeightC,
+
+ /* Output */
+ mode_lib->vba.SurfaceSizeInMALL,
+ &mode_lib->vba.ExceededMALLSize);
+
+ for (i = 0; i < v->soc.num_states; i++) {
+ for (j = 0; j < 2; j++) {
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.swath_width_luma_ub_this_state[k] =
+ mode_lib->vba.swath_width_luma_ub_all_states[i][j][k];
+ mode_lib->vba.swath_width_chroma_ub_this_state[k] =
+ mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k];
+ mode_lib->vba.SwathWidthYThisState[k] = mode_lib->vba.SwathWidthYAllStates[i][j][k];
+ mode_lib->vba.SwathWidthCThisState[k] = mode_lib->vba.SwathWidthCAllStates[i][j][k];
+ mode_lib->vba.SwathHeightYThisState[k] = mode_lib->vba.SwathHeightYAllStates[i][j][k];
+ mode_lib->vba.SwathHeightCThisState[k] = mode_lib->vba.SwathHeightCAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeInKByteThisState[k] =
+ mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeYThisState[k] =
+ mode_lib->vba.DETBufferSizeYAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeCThisState[k] =
+ mode_lib->vba.DETBufferSizeCAllStates[i][j][k];
+ mode_lib->vba.RequiredDPPCLKThisState[k] = mode_lib->vba.RequiredDPPCLK[i][j][k];
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+ }
+
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] = 0;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.DCCEnable[k] == true) {
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] =
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j]
+ + mode_lib->vba.NoOfDPP[i][j][k];
+ }
+ }
+
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportHeight = mode_lib->vba.ViewportHeight[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportHeightChroma = mode_lib->vba.ViewportHeightChroma[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthY = mode_lib->vba.MicroTileWidthY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightY = mode_lib->vba.MicroTileHeightY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthC = mode_lib->vba.MicroTileWidthC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightC = mode_lib->vba.MicroTileHeightC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SurfaceTiling = mode_lib->vba.SurfaceTiling[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BytePerPixelY = mode_lib->vba.BytePerPixelY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BytePerPixelC = mode_lib->vba.BytePerPixelC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ProgressiveToInterlaceUnitInOPP =
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VRatio = mode_lib->vba.VRatio[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VRatioChroma = mode_lib->vba.VRatioChroma[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VTaps = mode_lib->vba.vtaps[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].VTapsChroma = mode_lib->vba.VTAPsChroma[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PitchY = mode_lib->vba.PitchY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCMetaPitchY = mode_lib->vba.DCCMetaPitchY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].PitchC = mode_lib->vba.PitchC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCMetaPitchC = mode_lib->vba.DCCMetaPitchC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportStationary = mode_lib->vba.ViewportStationary[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportXStart = mode_lib->vba.ViewportXStartY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportYStart = mode_lib->vba.ViewportYStartY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportXStartC = mode_lib->vba.ViewportXStartC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].ViewportYStartC = mode_lib->vba.ViewportYStartC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->vba.ForceOneRowForFrame[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SwathHeightY = mode_lib->vba.SwathHeightYThisState[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].SwathHeightC = mode_lib->vba.SwathHeightCThisState[k];
+ }
+
+ {
+ dml32_CalculateVMRowAndSwath(
+ &v->dummy_vars.dml32_CalculateVMRowAndSwath,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters,
+ mode_lib->vba.SurfaceSizeInMALL,
+ mode_lib->vba.PTEBufferSizeInRequestsLuma,
+ mode_lib->vba.PTEBufferSizeInRequestsChroma,
+ mode_lib->vba.DCCMetaBufferSizeBytes,
+ mode_lib->vba.UseMALLForStaticScreen,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.MALLAllocatedForDCNFinal,
+ mode_lib->vba.SwathWidthYThisState,
+ mode_lib->vba.SwathWidthCThisState,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.GPUVMMinPageSizeKBytes,
+ mode_lib->vba.HostVMMinPageSize,
+
+ /* Output */
+ mode_lib->vba.PTEBufferSizeNotExceededPerState,
+ mode_lib->vba.DCCMetaBufferSizeNotExceededPerState,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1],
+ mode_lib->vba.dpte_row_height,
+ mode_lib->vba.dpte_row_height_chroma,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[2],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[3],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[4],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[5],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[6],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[8],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[9],
+ mode_lib->vba.meta_row_height,
+ mode_lib->vba.meta_row_height_chroma,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[10],
+ mode_lib->vba.dpte_group_bytes,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[11],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[12],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[13],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[14],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[15],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[16],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[17],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[18],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[19],
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[20],
+ mode_lib->vba.PrefetchLinesYThisState,
+ mode_lib->vba.PrefetchLinesCThisState,
+ mode_lib->vba.PrefillY,
+ mode_lib->vba.PrefillC,
+ mode_lib->vba.MaxNumSwY,
+ mode_lib->vba.MaxNumSwC,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.DPTEBytesPerRowThisState,
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrameThisState,
+ mode_lib->vba.MetaRowBytesThisState,
+ mode_lib->vba.use_one_row_for_frame_this_state,
+ mode_lib->vba.use_one_row_for_frame_flip_this_state,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], // Boolean UsesMALLForStaticScreen[]
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1], // Boolean PTE_BUFFER_MODE[]
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[21]); // Long BIGK_FRAGMENT_SIZE[]
+ }
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.PrefetchLinesY[i][j][k] = mode_lib->vba.PrefetchLinesYThisState[k];
+ mode_lib->vba.PrefetchLinesC[i][j][k] = mode_lib->vba.PrefetchLinesCThisState[k];
+ mode_lib->vba.meta_row_bandwidth[i][j][k] =
+ mode_lib->vba.meta_row_bandwidth_this_state[k];
+ mode_lib->vba.dpte_row_bandwidth[i][j][k] =
+ mode_lib->vba.dpte_row_bandwidth_this_state[k];
+ mode_lib->vba.DPTEBytesPerRow[i][j][k] = mode_lib->vba.DPTEBytesPerRowThisState[k];
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k] =
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrameThisState[k];
+ mode_lib->vba.MetaRowBytes[i][j][k] = mode_lib->vba.MetaRowBytesThisState[k];
+ mode_lib->vba.use_one_row_for_frame[i][j][k] =
+ mode_lib->vba.use_one_row_for_frame_this_state[k];
+ mode_lib->vba.use_one_row_for_frame_flip[i][j][k] =
+ mode_lib->vba.use_one_row_for_frame_flip_this_state[k];
+ }
+
+ mode_lib->vba.PTEBufferSizeNotExceeded[i][j] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.PTEBufferSizeNotExceededPerState[k] == false)
+ mode_lib->vba.PTEBufferSizeNotExceeded[i][j] = false;
+ }
+
+ mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.DCCMetaBufferSizeNotExceededPerState[k] == false)
+ mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] = false;
+ }
+
+ mode_lib->vba.UrgLatency[i] = dml32_CalculateUrgentLatency(
+ mode_lib->vba.UrgentLatencyPixelDataOnly,
+ mode_lib->vba.UrgentLatencyPixelMixedWithVMData,
+ mode_lib->vba.UrgentLatencyVMDataOnly, mode_lib->vba.DoUrgentLatencyAdjustment,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockComponent,
+ mode_lib->vba.UrgentLatencyAdjustmentFabricClockReference,
+ mode_lib->vba.FabricClockPerState[i]);
+
+ //bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ dml32_CalculateUrgentBurstFactor(
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.swath_width_luma_ub_this_state[k],
+ mode_lib->vba.swath_width_chroma_ub_this_state[k],
+ mode_lib->vba.SwathHeightYThisState[k],
+ mode_lib->vba.SwathHeightCThisState[k],
+ (double) mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0],
+ mode_lib->vba.CursorBPP[k][0],
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.BytePerPixelInDETY[k],
+ mode_lib->vba.BytePerPixelInDETC[k],
+ mode_lib->vba.DETBufferSizeYThisState[k],
+ mode_lib->vba.DETBufferSizeCThisState[k],
+ /* Output */
+ &mode_lib->vba.UrgentBurstFactorCursor[k],
+ &mode_lib->vba.UrgentBurstFactorLuma[k],
+ &mode_lib->vba.UrgentBurstFactorChroma[k],
+ &mode_lib->vba.NoUrgentLatencyHiding[k]);
+ }
+
+ dml32_CalculateDCFCLKDeepSleep(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ mode_lib->vba.SwathWidthYThisState,
+ mode_lib->vba.SwathWidthCThisState,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.PSCL_FACTOR,
+ mode_lib->vba.PSCL_FACTOR_CHROMA,
+ mode_lib->vba.RequiredDPPCLKThisState,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.ReturnBusWidth,
+
+ /* Output */
+ &mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]);
+ }
+ }
+
+ m = 0;
+
+ //Calculate Return BW
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ if (mode_lib->vba.WritebackEnable[k] == true) {
+ mode_lib->vba.WritebackDelayTime[k] =
+ mode_lib->vba.WritebackLatency
+ + dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[k],
+ mode_lib->vba.WritebackHRatio[k],
+ mode_lib->vba.WritebackVRatio[k],
+ mode_lib->vba.WritebackVTaps[k],
+ mode_lib->vba.WritebackDestinationWidth[k],
+ mode_lib->vba.WritebackDestinationHeight[k],
+ mode_lib->vba.WritebackSourceHeight[k],
+ mode_lib->vba.HTotal[k])
+ / mode_lib->vba.RequiredDISPCLK[i][j];
+ } else {
+ mode_lib->vba.WritebackDelayTime[k] = 0.0;
+ }
+ for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) {
+ if (mode_lib->vba.BlendingAndTiming[m]
+ == k && mode_lib->vba.WritebackEnable[m] == true) {
+ mode_lib->vba.WritebackDelayTime[k] =
+ dml_max(mode_lib->vba.WritebackDelayTime[k],
+ mode_lib->vba.WritebackLatency
+ + dml32_CalculateWriteBackDelay(
+ mode_lib->vba.WritebackPixelFormat[m],
+ mode_lib->vba.WritebackHRatio[m],
+ mode_lib->vba.WritebackVRatio[m],
+ mode_lib->vba.WritebackVTaps[m],
+ mode_lib->vba.WritebackDestinationWidth[m],
+ mode_lib->vba.WritebackDestinationHeight[m],
+ mode_lib->vba.WritebackSourceHeight[m],
+ mode_lib->vba.HTotal[m]) /
+ mode_lib->vba.RequiredDISPCLK[i][j]);
+ }
+ }
+ }
+ }
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ for (m = 0; m <= mode_lib->vba.NumberOfActiveSurfaces - 1; m++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == m) {
+ mode_lib->vba.WritebackDelayTime[k] =
+ mode_lib->vba.WritebackDelayTime[m];
+ }
+ }
+ }
+ mode_lib->vba.MaxMaxVStartup[i][j] = 0;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.MaximumVStartup[i][j][k] = ((mode_lib->vba.Interlace[k] &&
+ !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ?
+ dml_floor((mode_lib->vba.VTotal[k] -
+ mode_lib->vba.VActive[k]) / 2.0, 1.0) :
+ mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k])
+ - dml_max(1.0, dml_ceil(1.0 *
+ mode_lib->vba.WritebackDelayTime[k] /
+ (mode_lib->vba.HTotal[k] /
+ mode_lib->vba.PixelClock[k]), 1.0));
+
+ // Clamp to max OTG vstartup register limit
+ if (mode_lib->vba.MaximumVStartup[i][j][k] > 1023)
+ mode_lib->vba.MaximumVStartup[i][j][k] = 1023;
+
+ mode_lib->vba.MaxMaxVStartup[i][j] = dml_max(mode_lib->vba.MaxMaxVStartup[i][j],
+ mode_lib->vba.MaximumVStartup[i][j][k]);
+ }
+ }
+ }
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes = mode_lib->vba.NumberOfChannels
+ * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
+
+ dml32_CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal,
+ &mode_lib->vba.MinPrefetchMode,
+ &mode_lib->vba.MaxPrefetchMode);
+
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j)
+ mode_lib->vba.DCFCLKState[i][j] = mode_lib->vba.DCFCLKPerState[i];
+ }
+
+ /* Immediate Flip and MALL parameters */
+ mode_lib->vba.ImmediateFlipRequiredFinal = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.ImmediateFlipRequiredFinal = mode_lib->vba.ImmediateFlipRequiredFinal
+ || (mode_lib->vba.ImmediateFlipRequirement[k] == dm_immediate_flip_required);
+ }
+
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified =
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
+ || ((mode_lib->vba.ImmediateFlipRequirement[k]
+ != dm_immediate_flip_required)
+ && (mode_lib->vba.ImmediateFlipRequirement[k]
+ != dm_immediate_flip_not_required));
+ }
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified =
+ mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
+ && mode_lib->vba.ImmediateFlipRequiredFinal;
+
+ mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
+ mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe ||
+ ((mode_lib->vba.HostVMEnable == true || mode_lib->vba.ImmediateFlipRequirement[k] !=
+ dm_immediate_flip_not_required) &&
+ (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame ||
+ mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe));
+ }
+
+ mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen =
+ mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen
+ || ((mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable
+ || mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_optimize)
+ && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe))
+ || ((mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_disable
+ || mode_lib->vba.UseMALLForStaticScreen[k] == dm_use_mall_static_screen_optimize)
+ && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame));
+ }
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = false;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = false;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = true;
+ if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = true;
+ if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = true;
+ }
+ mode_lib->vba.InvalidCombinationOfMALLUseForPState = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod
+ != v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod) || (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod);
+
+ if (mode_lib->vba.UseMinimumRequiredDCFCLK == true) {
+ dml32_UseMinimumDCFCLK(
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.DRRDisplay,
+ mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->vba.MaxInterDCNTileRepeaters,
+ mode_lib->vba.MaxPrefetchMode,
+ mode_lib->vba.DRAMClockChangeLatency,
+ mode_lib->vba.FCLKChangeLatency,
+ mode_lib->vba.SREnterPlusExitTime,
+ mode_lib->vba.ReturnBusWidth,
+ mode_lib->vba.RoundTripPingLatencyCycles,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes,
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.DynamicMetadataVMEnabled,
+ mode_lib->vba.ImmediateFlipRequiredFinal,
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+ mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
+ mode_lib->vba.PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
+ mode_lib->vba.VTotal,
+ mode_lib->vba.VActive,
+ mode_lib->vba.DynamicMetadataTransmittedBytes,
+ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired,
+ mode_lib->vba.Interlace,
+ mode_lib->vba.RequiredDPPCLK,
+ mode_lib->vba.RequiredDISPCLK,
+ mode_lib->vba.UrgLatency,
+ mode_lib->vba.NoOfDPP,
+ mode_lib->vba.ProjectedDCFCLKDeepSleep,
+ mode_lib->vba.MaximumVStartup,
+ mode_lib->vba.TotalNumberOfActiveDPP,
+ mode_lib->vba.TotalNumberOfDCCActiveDPP,
+ mode_lib->vba.dpte_group_bytes,
+ mode_lib->vba.PrefetchLinesY,
+ mode_lib->vba.PrefetchLinesC,
+ mode_lib->vba.swath_width_luma_ub_all_states,
+ mode_lib->vba.swath_width_chroma_ub_all_states,
+ mode_lib->vba.BytePerPixelY,
+ mode_lib->vba.BytePerPixelC,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrame,
+ mode_lib->vba.DPTEBytesPerRow,
+ mode_lib->vba.MetaRowBytes,
+ mode_lib->vba.DynamicMetadataEnable,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.DCFCLKPerState,
+
+ /* Output */
+ mode_lib->vba.DCFCLKState);
+ } // UseMinimumRequiredDCFCLK == true
+
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ mode_lib->vba.ReturnBWPerState[i][j] = dml32_get_return_bw_mbps(&mode_lib->vba.soc, i,
+ mode_lib->vba.HostVMEnable, mode_lib->vba.DCFCLKState[i][j],
+ mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]);
+ }
+ }
+
+ //Re-ordering Buffer Support Check
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024
+ / mode_lib->vba.ReturnBWPerState[i][j]
+ > (mode_lib->vba.RoundTripPingLatencyCycles + 32)
+ / mode_lib->vba.DCFCLKState[i][j]
+ + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) {
+ mode_lib->vba.ROBSupport[i][j] = true;
+ } else {
+ mode_lib->vba.ROBSupport[i][j] = false;
+ }
+ }
+ }
+
+ //Vertical Active BW support check
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth = 0;
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth += mode_lib->vba.ReadBandwidthLuma[k]
+ + mode_lib->vba.ReadBandwidthChroma[k];
+ }
+
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][j] =
+ dml_min3(mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKState[i][j]
+ * mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
+ mode_lib->vba.FabricClockPerState[i]
+ * mode_lib->vba.FabricDatapathToDCNDataReturn
+ * mode_lib->vba.MaxAveragePercentOfIdealFabricBWDisplayCanUseInNormalSystemOperation / 100,
+ mode_lib->vba.DRAMSpeedPerState[i]
+ * mode_lib->vba.NumberOfChannels
+ * mode_lib->vba.DRAMChannelWidth
+ * (i < 2 ? mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE : mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation) / 100);
+
+ if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MaxTotalVActiveRDBandwidth
+ <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
+ mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] = true;
+ } else {
+ mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] = false;
+ }
+ }
+ }
+
+ /* Prefetch Check */
+
+ for (i = 0; i < (int) v->soc.num_states; ++i) {
+ for (j = 0; j <= 1; ++j) {
+
+ mode_lib->vba.TimeCalc = 24 / mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j];
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.NoOfDPPThisState[k] = mode_lib->vba.NoOfDPP[i][j][k];
+ mode_lib->vba.swath_width_luma_ub_this_state[k] =
+ mode_lib->vba.swath_width_luma_ub_all_states[i][j][k];
+ mode_lib->vba.swath_width_chroma_ub_this_state[k] =
+ mode_lib->vba.swath_width_chroma_ub_all_states[i][j][k];
+ mode_lib->vba.SwathWidthYThisState[k] = mode_lib->vba.SwathWidthYAllStates[i][j][k];
+ mode_lib->vba.SwathWidthCThisState[k] = mode_lib->vba.SwathWidthCAllStates[i][j][k];
+ mode_lib->vba.SwathHeightYThisState[k] = mode_lib->vba.SwathHeightYAllStates[i][j][k];
+ mode_lib->vba.SwathHeightCThisState[k] = mode_lib->vba.SwathHeightCAllStates[i][j][k];
+ mode_lib->vba.UnboundedRequestEnabledThisState =
+ mode_lib->vba.UnboundedRequestEnabledAllStates[i][j];
+ mode_lib->vba.CompressedBufferSizeInkByteThisState =
+ mode_lib->vba.CompressedBufferSizeInkByteAllStates[i][j];
+ mode_lib->vba.DETBufferSizeInKByteThisState[k] =
+ mode_lib->vba.DETBufferSizeInKByteAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeYThisState[k] =
+ mode_lib->vba.DETBufferSizeYAllStates[i][j][k];
+ mode_lib->vba.DETBufferSizeCThisState[k] =
+ mode_lib->vba.DETBufferSizeCAllStates[i][j][k];
+ }
+
+ mode_lib->vba.VActiveBandwithSupport[i][j] = dml32_CalculateVActiveBandwithSupport(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.NoUrgentLatencyHiding,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor);
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i,
+ mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i],
+ mode_lib->vba.DRAMSpeedPerState[i]);
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = 1;
+
+ if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable)
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j]
+ / v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState;
+
+ mode_lib->vba.ExtraLatency = dml32_CalculateExtraLatency(
+ mode_lib->vba.RoundTripPingLatencyCycles, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes,
+ mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.TotalNumberOfActiveDPP[i][j],
+ mode_lib->vba.PixelChunkSizeInKByte,
+ mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize,
+ mode_lib->vba.ReturnBWPerState[i][j], mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.NoOfDPPThisState, mode_lib->vba.dpte_group_bytes,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels);
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = mode_lib->vba.MinPrefetchMode;
+
+ mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j];
+
+ do {
+ mode_lib->vba.PrefetchModePerState[i][j] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState;
+ mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup;
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.TWait = dml32_CalculateTWait(
+ mode_lib->vba.PrefetchModePerState[i][j],
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->vba.DRRDisplay[k],
+ mode_lib->vba.DRAMClockChangeLatency,
+ mode_lib->vba.FCLKChangeLatency, mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.SREnterPlusExitTime);
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.PixelClock = mode_lib->vba.PixelClock[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HTotal = mode_lib->vba.HTotal[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HActive = mode_lib->vba.HActive[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ProgressiveToInterlaceUnitInOPP =
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
+
+ mode_lib->vba.NoTimeForPrefetch[i][j][k] =
+ dml32_CalculatePrefetchSchedule(
+ &v->dummy_vars.dml32_CalculatePrefetchSchedule,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor,
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe,
+ mode_lib->vba.DSCDelayPerState[i][k],
+ mode_lib->vba.DPPCLKDelaySubtotal +
+ mode_lib->vba.DPPCLKDelayCNVCFormater,
+ mode_lib->vba.DPPCLKDelaySCL,
+ mode_lib->vba.DPPCLKDelaySCLLBOnly,
+ mode_lib->vba.DPPCLKDelayCNVCCursor,
+ mode_lib->vba.DISPCLKDelaySubtotal,
+ mode_lib->vba.SwathWidthYThisState[k] /
+ mode_lib->vba.HRatio[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.MaxInterDCNTileRepeaters,
+ dml_min(mode_lib->vba.MaxVStartup,
+ mode_lib->vba.MaximumVStartup[i][j][k]),
+ mode_lib->vba.MaximumVStartup[i][j][k],
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.DynamicMetadataEnable[k],
+ mode_lib->vba.DynamicMetadataVMEnabled,
+ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
+ mode_lib->vba.DynamicMetadataTransmittedBytes[k],
+ mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.ExtraLatency,
+ mode_lib->vba.TimeCalc,
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k],
+ mode_lib->vba.MetaRowBytes[i][j][k],
+ mode_lib->vba.DPTEBytesPerRow[i][j][k],
+ mode_lib->vba.PrefetchLinesY[i][j][k],
+ mode_lib->vba.SwathWidthYThisState[k],
+ mode_lib->vba.PrefillY[k],
+ mode_lib->vba.MaxNumSwY[k],
+ mode_lib->vba.PrefetchLinesC[i][j][k],
+ mode_lib->vba.SwathWidthCThisState[k],
+ mode_lib->vba.PrefillC[k],
+ mode_lib->vba.MaxNumSwC[k],
+ mode_lib->vba.swath_width_luma_ub_this_state[k],
+ mode_lib->vba.swath_width_chroma_ub_this_state[k],
+ mode_lib->vba.SwathHeightYThisState[k],
+ mode_lib->vba.SwathHeightCThisState[k], mode_lib->vba.TWait,
+
+ /* Output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler[k],
+ &mode_lib->vba.LineTimesForPrefetch[k],
+ &mode_lib->vba.PrefetchBW[k],
+ &mode_lib->vba.LinesForMetaPTE[k],
+ &mode_lib->vba.LinesForMetaAndDPTERow[k],
+ &mode_lib->vba.VRatioPreY[i][j][k],
+ &mode_lib->vba.VRatioPreC[i][j][k],
+ &mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0][k],
+ &mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0][k],
+ &mode_lib->vba.NoTimeForDynamicMetadata[i][j][k],
+ &mode_lib->vba.Tno_bw[k],
+ &mode_lib->vba.prefetch_vmrow_bw[k],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // unsigned int *VUpdateOffsetPix
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[3], // unsigned int *VUpdateWidthPix
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[4]); // unsigned int *VReadyOffsetPix
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateUrgentBurstFactor(
+ mode_lib->vba.UsesMALLForPStateChange[k],
+ mode_lib->vba.swath_width_luma_ub_this_state[k],
+ mode_lib->vba.swath_width_chroma_ub_this_state[k],
+ mode_lib->vba.SwathHeightYThisState[k],
+ mode_lib->vba.SwathHeightCThisState[k],
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ mode_lib->vba.UrgLatency[i], mode_lib->vba.CursorBufferSize,
+ mode_lib->vba.CursorWidth[k][0], mode_lib->vba.CursorBPP[k][0],
+ mode_lib->vba.VRatioPreY[i][j][k],
+ mode_lib->vba.VRatioPreC[i][j][k],
+ mode_lib->vba.BytePerPixelInDETY[k],
+ mode_lib->vba.BytePerPixelInDETC[k],
+ mode_lib->vba.DETBufferSizeYThisState[k],
+ mode_lib->vba.DETBufferSizeCThisState[k],
+ /* Output */
+ &mode_lib->vba.UrgentBurstFactorCursorPre[k],
+ &mode_lib->vba.UrgentBurstFactorLumaPre[k],
+ &mode_lib->vba.UrgentBurstFactorChroma[k],
+ &mode_lib->vba.NotUrgentLatencyHidingPre[k]);
+ }
+
+ {
+ dml32_CalculatePrefetchBandwithSupport(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.NotUrgentLatencyHidingPre,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0],
+ mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0],
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.cursor_bw_pre,
+ mode_lib->vba.prefetch_vmrow_bw,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor,
+ mode_lib->vba.UrgentBurstFactorLumaPre,
+ mode_lib->vba.UrgentBurstFactorChromaPre,
+ mode_lib->vba.UrgentBurstFactorCursorPre,
+
+ /* output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // Single *PrefetchBandwidth
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // Single *FractionOfUrgentBandwidth
+ &mode_lib->vba.PrefetchSupported[i][j]);
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.LineTimesForPrefetch[k]
+ < 2.0 || mode_lib->vba.LinesForMetaPTE[k] >= 32.0
+ || mode_lib->vba.LinesForMetaAndDPTERow[k] >= 16.0
+ || mode_lib->vba.NoTimeForPrefetch[i][j][k] == true) {
+ mode_lib->vba.PrefetchSupported[i][j] = false;
+ }
+ }
+
+ mode_lib->vba.DynamicMetadataSupported[i][j] = true;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.NoTimeForDynamicMetadata[i][j][k] == true)
+ mode_lib->vba.DynamicMetadataSupported[i][j] = false;
+ }
+
+ mode_lib->vba.VRatioInPrefetchSupported[i][j] = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.VRatioPreY[i][j][k] > __DML_MAX_VRATIO_PRE__
+ || mode_lib->vba.VRatioPreC[i][j][k] > __DML_MAX_VRATIO_PRE__
+ || mode_lib->vba.NoTimeForPrefetch[i][j][k] == true) {
+ mode_lib->vba.VRatioInPrefetchSupported[i][j] = false;
+ }
+ }
+ mode_lib->vba.AnyLinesForVMOrRowTooLarge = false;
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ if (mode_lib->vba.LinesForMetaAndDPTERow[k] >= 16
+ || mode_lib->vba.LinesForMetaPTE[k] >= 32) {
+ mode_lib->vba.AnyLinesForVMOrRowTooLarge = true;
+ }
+ }
+
+ if (mode_lib->vba.PrefetchSupported[i][j] == true
+ && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) {
+ mode_lib->vba.BandwidthAvailableForImmediateFlip =
+ dml32_CalculateBandwidthAvailableForImmediateFlip(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0],
+ mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0],
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.cursor_bw_pre,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor,
+ mode_lib->vba.UrgentBurstFactorLumaPre,
+ mode_lib->vba.UrgentBurstFactorChromaPre,
+ mode_lib->vba.UrgentBurstFactorCursorPre);
+
+ mode_lib->vba.TotImmediateFlipBytes = 0.0;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!(mode_lib->vba.ImmediateFlipRequirement[k] ==
+ dm_immediate_flip_not_required)) {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + mode_lib->vba.NoOfDPP[i][j][k]
+ * mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k]
+ + mode_lib->vba.MetaRowBytes[i][j][k];
+ if (mode_lib->vba.use_one_row_for_frame_flip[i][j][k]) {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes + 2
+ * mode_lib->vba.DPTEBytesPerRow[i][j][k];
+ } else {
+ mode_lib->vba.TotImmediateFlipBytes =
+ mode_lib->vba.TotImmediateFlipBytes
+ + mode_lib->vba.DPTEBytesPerRow[i][j][k];
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ dml32_CalculateFlipSchedule(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor,
+ mode_lib->vba.ExtraLatency,
+ mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.HostVMEnable,
+ mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
+ mode_lib->vba.GPUVMEnable,
+ mode_lib->vba.HostVMMinPageSize,
+ mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k],
+ mode_lib->vba.MetaRowBytes[i][j][k],
+ mode_lib->vba.DPTEBytesPerRow[i][j][k],
+ mode_lib->vba.BandwidthAvailableForImmediateFlip,
+ mode_lib->vba.TotImmediateFlipBytes,
+ mode_lib->vba.SourcePixelFormat[k],
+ (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]),
+ mode_lib->vba.VRatio[k],
+ mode_lib->vba.VRatioChroma[k],
+ mode_lib->vba.Tno_bw[k],
+ mode_lib->vba.DCCEnable[k],
+ mode_lib->vba.dpte_row_height[k],
+ mode_lib->vba.meta_row_height[k],
+ mode_lib->vba.dpte_row_height_chroma[k],
+ mode_lib->vba.meta_row_height_chroma[k],
+ mode_lib->vba.use_one_row_for_frame_flip[i][j][k], // 24
+
+ /* Output */
+ &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k],
+ &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k],
+ &mode_lib->vba.final_flip_bw[k],
+ &mode_lib->vba.ImmediateFlipSupportedForPipe[k]);
+ }
+
+ {
+ dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.ImmediateFlipRequirement,
+ mode_lib->vba.final_flip_bw,
+ mode_lib->vba.ReadBandwidthLuma,
+ mode_lib->vba.ReadBandwidthChroma,
+ mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0],
+ mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0],
+ mode_lib->vba.cursor_bw,
+ mode_lib->vba.meta_row_bandwidth_this_state,
+ mode_lib->vba.dpte_row_bandwidth_this_state,
+ mode_lib->vba.cursor_bw_pre,
+ mode_lib->vba.prefetch_vmrow_bw,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.UrgentBurstFactorLuma,
+ mode_lib->vba.UrgentBurstFactorChroma,
+ mode_lib->vba.UrgentBurstFactorCursor,
+ mode_lib->vba.UrgentBurstFactorLumaPre,
+ mode_lib->vba.UrgentBurstFactorChromaPre,
+ mode_lib->vba.UrgentBurstFactorCursorPre,
+
+ /* output */
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // Single *TotalBandwidth
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // Single *FractionOfUrgentBandwidth
+ &mode_lib->vba.ImmediateFlipSupportedForState[i][j]); // Boolean *ImmediateFlipBandwidthSupport
+ }
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (!(mode_lib->vba.ImmediateFlipRequirement[k]
+ == dm_immediate_flip_not_required)
+ && (mode_lib->vba.ImmediateFlipSupportedForPipe[k]
+ == false))
+ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
+ }
+ } else { // if prefetch not support, assume iflip not supported
+ mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false;
+ }
+
+ if (mode_lib->vba.MaxVStartup <= __DML_VBA_MIN_VSTARTUP__
+ || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) {
+ mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState + 1;
+ } else {
+ mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1;
+ }
+ } while (!((mode_lib->vba.PrefetchSupported[i][j] == true
+ && mode_lib->vba.DynamicMetadataSupported[i][j] == true
+ && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true &&
+ // consider flip support is okay if when there is no hostvm and the
+ // user does't require a iflip OR the flip bw is ok
+ // If there is hostvm, DCN needs to support iflip for invalidation
+ ((mode_lib->vba.HostVMEnable == false
+ && !mode_lib->vba.ImmediateFlipRequiredFinal)
+ || mode_lib->vba.ImmediateFlipSupportedForState[i][j] == true))
+ || (mode_lib->vba.NextMaxVStartup == mode_lib->vba.MaxMaxVStartup[i][j]
+ && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode)));
+
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
+ mode_lib->vba.use_one_row_for_frame_this_state[k] =
+ mode_lib->vba.use_one_row_for_frame[i][j][k];
+ }
+
+
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.UrgentLatency = mode_lib->vba.UrgLatency[i];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.ExtraLatency = mode_lib->vba.ExtraLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SRExitTime = mode_lib->vba.SRExitTime;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency;
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters.SMNLatency = mode_lib->vba.SMNLatency;
+
+ {
+ dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,
+ mode_lib->vba.USRRetrainingRequiredFinal,
+ mode_lib->vba.UsesMALLForPStateChange,
+ mode_lib->vba.PrefetchModePerState[i][j],
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.MaxLineBufferLines,
+ mode_lib->vba.LineBufferSizeFinal,
+ mode_lib->vba.WritebackInterfaceBufferSize,
+ mode_lib->vba.DCFCLKState[i][j],
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.SynchronizeTimingsFinal,
+ mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->vba.DRRDisplay,
+ mode_lib->vba.dpte_group_bytes,
+ mode_lib->vba.meta_row_height,
+ mode_lib->vba.meta_row_height_chroma,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters,
+ mode_lib->vba.WritebackChunkSize,
+ mode_lib->vba.SOCCLKPerState[i],
+ mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j],
+ mode_lib->vba.DETBufferSizeYThisState,
+ mode_lib->vba.DETBufferSizeCThisState,
+ mode_lib->vba.SwathHeightYThisState,
+ mode_lib->vba.SwathHeightCThisState,
+ mode_lib->vba.LBBitPerPixel,
+ mode_lib->vba.SwathWidthYThisState, // 24
+ mode_lib->vba.SwathWidthCThisState,
+ mode_lib->vba.HRatio,
+ mode_lib->vba.HRatioChroma,
+ mode_lib->vba.vtaps,
+ mode_lib->vba.VTAPsChroma,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.VTotal,
+ mode_lib->vba.VActive,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.BlendingAndTiming,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.BytePerPixelInDETY,
+ mode_lib->vba.BytePerPixelInDETC,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler,
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler,
+ mode_lib->vba.WritebackEnable,
+ mode_lib->vba.WritebackPixelFormat,
+ mode_lib->vba.WritebackDestinationWidth,
+ mode_lib->vba.WritebackDestinationHeight,
+ mode_lib->vba.WritebackSourceHeight,
+ mode_lib->vba.UnboundedRequestEnabledThisState,
+ mode_lib->vba.CompressedBufferSizeInkByteThisState,
+
+ /* Output */
+ &mode_lib->vba.Watermark, // Store the values in vba
+ &mode_lib->vba.DRAMClockChangeSupport[i][j],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[]
+ &mode_lib->vba.FCLKChangeSupport[i][j],
+ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported
+ &mode_lib->vba.USRRetrainingSupport[i][j],
+ mode_lib->vba.ActiveDRAMClockChangeLatencyMargin);
+ }
+ }
+ } // End of Prefetch Check
+
+ /*Cursor Support Check*/
+ mode_lib->vba.CursorSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.CursorWidth[k][0] > 0.0) {
+ if (mode_lib->vba.CursorBPP[k][0] == 64 && mode_lib->vba.Cursor64BppSupport == false)
+ mode_lib->vba.CursorSupport = false;
+ }
+ }
+
+ /*Valid Pitch Check*/
+ mode_lib->vba.PitchSupport = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.AlignedYPitch[k] = dml_ceil(
+ dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.SurfaceWidthY[k]),
+ mode_lib->vba.MacroTileWidthY[k]);
+ if (mode_lib->vba.DCCEnable[k] == true) {
+ mode_lib->vba.AlignedDCCMetaPitchY[k] = dml_ceil(
+ dml_max(mode_lib->vba.DCCMetaPitchY[k], mode_lib->vba.SurfaceWidthY[k]),
+ 64.0 * mode_lib->vba.Read256BlockWidthY[k]);
+ } else {
+ mode_lib->vba.AlignedDCCMetaPitchY[k] = mode_lib->vba.DCCMetaPitchY[k];
+ }
+ if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe
+ && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) {
+ mode_lib->vba.AlignedCPitch[k] = dml_ceil(
+ dml_max(mode_lib->vba.PitchC[k], mode_lib->vba.SurfaceWidthC[k]),
+ mode_lib->vba.MacroTileWidthC[k]);
+ if (mode_lib->vba.DCCEnable[k] == true) {
+ mode_lib->vba.AlignedDCCMetaPitchC[k] = dml_ceil(
+ dml_max(mode_lib->vba.DCCMetaPitchC[k],
+ mode_lib->vba.SurfaceWidthC[k]),
+ 64.0 * mode_lib->vba.Read256BlockWidthC[k]);
+ } else {
+ mode_lib->vba.AlignedDCCMetaPitchC[k] = mode_lib->vba.DCCMetaPitchC[k];
+ }
+ } else {
+ mode_lib->vba.AlignedCPitch[k] = mode_lib->vba.PitchC[k];
+ mode_lib->vba.AlignedDCCMetaPitchC[k] = mode_lib->vba.DCCMetaPitchC[k];
+ }
+ if (mode_lib->vba.AlignedYPitch[k] > mode_lib->vba.PitchY[k]
+ || mode_lib->vba.AlignedCPitch[k] > mode_lib->vba.PitchC[k]
+ || mode_lib->vba.AlignedDCCMetaPitchY[k] > mode_lib->vba.DCCMetaPitchY[k]
+ || mode_lib->vba.AlignedDCCMetaPitchC[k] > mode_lib->vba.DCCMetaPitchC[k]) {
+ mode_lib->vba.PitchSupport = false;
+ }
+ }
+
+ mode_lib->vba.ViewportExceedsSurface = false;
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.ViewportWidth[k] > mode_lib->vba.SurfaceWidthY[k]
+ || mode_lib->vba.ViewportHeight[k] > mode_lib->vba.SurfaceHeightY[k]) {
+ mode_lib->vba.ViewportExceedsSurface = true;
+ if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
+ && mode_lib->vba.SourcePixelFormat[k] != dm_444_8
+ && mode_lib->vba.SourcePixelFormat[k] != dm_rgbe) {
+ if (mode_lib->vba.ViewportWidthChroma[k] > mode_lib->vba.SurfaceWidthC[k]
+ || mode_lib->vba.ViewportHeightChroma[k]
+ > mode_lib->vba.SurfaceHeightC[k]) {
+ mode_lib->vba.ViewportExceedsSurface = true;
+ }
+ }
+ }
+ }
+
+ /*Mode Support, Voltage State and SOC Configuration*/
+ mode_support_configuration(v, mode_lib);
+
+ MaximumMPCCombine = 0;
+
+ for (i = v->soc.num_states; i >= 0; i--) {
+ if (i == v->soc.num_states || mode_lib->vba.ModeSupport[i][0] == true ||
+ mode_lib->vba.ModeSupport[i][1] == true) {
+ mode_lib->vba.VoltageLevel = i;
+ mode_lib->vba.ModeIsSupported = mode_lib->vba.ModeSupport[i][0] == true
+ || mode_lib->vba.ModeSupport[i][1] == true;
+
+ if (mode_lib->vba.ModeSupport[i][0] == true) {
+ MaximumMPCCombine = 0;
+ } else {
+ MaximumMPCCombine = 1;
+ }
+ }
+ }
+
+ mode_lib->vba.ImmediateFlipSupport =
+ mode_lib->vba.ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.UnboundedRequestEnabled =
+ mode_lib->vba.UnboundedRequestEnabledAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.CompressedBufferSizeInkByte =
+ mode_lib->vba.CompressedBufferSizeInkByteAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; // Not used, informational
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ mode_lib->vba.MPCCombineEnable[k] =
+ mode_lib->vba.MPCCombine[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DPPPerPlane[k] = mode_lib->vba.NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.SwathHeightY[k] =
+ mode_lib->vba.SwathHeightYAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.SwathHeightC[k] =
+ mode_lib->vba.SwathHeightCAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DETBufferSizeInKByte[k] =
+ mode_lib->vba.DETBufferSizeInKByteAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DETBufferSizeY[k] =
+ mode_lib->vba.DETBufferSizeYAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.DETBufferSizeC[k] =
+ mode_lib->vba.DETBufferSizeCAllStates[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
+ mode_lib->vba.OutputType[k] = mode_lib->vba.OutputTypePerState[mode_lib->vba.VoltageLevel][k];
+ mode_lib->vba.OutputRate[k] = mode_lib->vba.OutputRatePerState[mode_lib->vba.VoltageLevel][k];
+ }
+
+ mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.DISPCLK = mode_lib->vba.RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+ mode_lib->vba.maxMpcComb = MaximumMPCCombine;
+
+ for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
+ if (mode_lib->vba.BlendingAndTiming[k] == k) {
+ mode_lib->vba.ODMCombineEnabled[k] =
+ mode_lib->vba.ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
+ } else {
+ mode_lib->vba.ODMCombineEnabled[k] = dm_odm_combine_mode_disabled;
+ }
+
+ mode_lib->vba.DSCEnabled[k] = mode_lib->vba.RequiresDSC[mode_lib->vba.VoltageLevel][k];
+ mode_lib->vba.FECEnable[k] = mode_lib->vba.RequiresFEC[mode_lib->vba.VoltageLevel][k];
+ mode_lib->vba.OutputBpp[k] = mode_lib->vba.OutputBppPerState[mode_lib->vba.VoltageLevel][k];
+ }
+
+ mode_lib->vba.UrgentWatermark = mode_lib->vba.Watermark.UrgentWatermark;
+ mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.Watermark.StutterEnterPlusExitWatermark;
+ mode_lib->vba.StutterExitWatermark = mode_lib->vba.Watermark.StutterExitWatermark;
+ mode_lib->vba.WritebackDRAMClockChangeWatermark = mode_lib->vba.Watermark.WritebackDRAMClockChangeWatermark;
+ mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.Watermark.DRAMClockChangeWatermark;
+ mode_lib->vba.UrgentLatency = mode_lib->vba.UrgLatency[mode_lib->vba.VoltageLevel];
+ mode_lib->vba.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
+
+ /* VBA has Error type to Error Msg output here, but not necessary for DML-C */
+} // ModeSupportAndSystemConfigurationFull
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h
new file mode 100644
index 000000000000..c62e0991358b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML32_DISPLAY_MODE_VBA_H__
+#define __DML32_DISPLAY_MODE_VBA_H__
+
+#include "../display_mode_enums.h"
+
+// To enable a lot of debug msg
+//#define __DML_VBA_DEBUG__
+// For DML-C changes that hasn't been propagated to VBA yet
+//#define __DML_VBA_ALLOW_DELTA__
+
+// Move these to ip parameters/constant
+// At which vstartup the DML start to try if the mode can be supported
+#define __DML_VBA_MIN_VSTARTUP__ 9
+
+// Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
+#define __DML_ARB_TO_RET_DELAY__ 7 + 95
+
+// fudge factor for min dcfclk calclation
+#define __DML_MIN_DCFCLK_FACTOR__ 1.15
+
+// Prefetch schedule max vratio
+#define __DML_MAX_VRATIO_PRE__ 4.0
+
+#define BPP_INVALID 0
+#define BPP_BLENDED_PIPE 0xffffffff
+
+struct display_mode_lib;
+
+void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
+void dml32_recalculate(struct display_mode_lib *mode_lib);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
new file mode 100644
index 000000000000..07f8f3b8626b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -0,0 +1,6175 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "display_mode_vba_util_32.h"
+#include "../dml_inline_defs.h"
+#include "display_mode_vba_32.h"
+#include "../display_mode_lib.h"
+
+unsigned int dml32_dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output)
+{
+ // valid bpc = source bits per component in the set of {8, 10, 12}
+ // valid bpp = increments of 1/16 of a bit
+ // min = 6/7/8 in N420/N422/444, respectively
+ // max = such that compression is 1:1
+ //valid sliceWidth = number of pixels per slice line,
+ // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
+ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
+ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
+
+ // fixed value
+ unsigned int rcModelSize = 8192;
+
+ // N422/N420 operate at 2 pixels per clock
+ unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
+ Delay, pixels;
+
+ if (pixelFormat == dm_420)
+ pixelsPerClock = 2;
+ else if (pixelFormat == dm_n422)
+ pixelsPerClock = 2;
+ // #all other modes operate at 1 pixel per clock
+ else
+ pixelsPerClock = 1;
+
+ //initial transmit delay as per PPS
+ initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
+
+ //compute ssm delay
+ if (bpc == 8)
+ D = 81;
+ else if (bpc == 10)
+ D = 89;
+ else
+ D = 113;
+
+ //divide by pixel per cycle to compute slice width as seen by DSC
+ w = sliceWidth / pixelsPerClock;
+
+ //422 mode has an additional cycle of delay
+ if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
+ s = 0;
+ else
+ s = 1;
+
+ //main calculation for the dscce
+ ix = initalXmitDelay + 45;
+ wx = (w + 2) / 3;
+ p = 3 * wx - w;
+ l0 = ix / w;
+ a = ix + p * l0;
+ ax = (a + 2) / 3 + D + 6 + 1;
+ L = (ax + wx - 1) / wx;
+ if ((ix % w) == 0 && p != 0)
+ lstall = 1;
+ else
+ lstall = 0;
+ Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
+
+ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
+ pixels = Delay * 3 * pixelsPerClock;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: bpc: %d\n", __func__, bpc);
+ dml_print("DML::%s: BPP: %f\n", __func__, BPP);
+ dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
+ dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
+ dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
+ dml_print("DML::%s: Output: %d\n", __func__, Output);
+ dml_print("DML::%s: pixels: %d\n", __func__, pixels);
+#endif
+
+ return pixels;
+}
+
+unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
+{
+ unsigned int Delay = 0;
+
+ if (pixelFormat == dm_420) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 2;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 13;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 3;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 1;
+ // dscc - input deserializer
+ Delay = Delay + 5;
+ // dscc - input cdc fifo
+ Delay = Delay + 25;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 10;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // sft
+ Delay = Delay + 1;
+ }
+
+ return Delay;
+}
+
+
+bool IsVertical(enum dm_rotation_angle Scan)
+{
+ bool is_vert = false;
+
+ if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
+ is_vert = true;
+ else
+ is_vert = false;
+ return is_vert;
+}
+
+void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
+ double HRatio,
+ double HRatioChroma,
+ double VRatio,
+ double VRatioChroma,
+ double MaxDCHUBToPSCLThroughput,
+ double MaxPSCLToLBThroughput,
+ double PixelClock,
+ enum source_format_class SourcePixelFormat,
+ unsigned int HTaps,
+ unsigned int HTapsChroma,
+ unsigned int VTaps,
+ unsigned int VTapsChroma,
+
+ /* output */
+ double *PSCL_THROUGHPUT,
+ double *PSCL_THROUGHPUT_CHROMA,
+ double *DPPCLKUsingSingleDPP)
+{
+ double DPPCLKUsingSingleDPPLuma;
+ double DPPCLKUsingSingleDPPChroma;
+
+ if (HRatio > 1) {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
+ dml_ceil((double) HTaps / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+
+ DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
+ *PSCL_THROUGHPUT, 1);
+
+ if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
+
+ if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
+ SourcePixelFormat != dm_rgbe_alpha)) {
+ *PSCL_THROUGHPUT_CHROMA = 0;
+ *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
+ } else {
+ if (HRatioChroma > 1) {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
+ HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+ DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
+ HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
+ if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
+ *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
+ }
+}
+
+void dml32_CalculateBytePerPixelAndBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+
+ /* Output */
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC,
+ unsigned int *MacroTileHeightY,
+ unsigned int *MacroTileHeightC,
+ unsigned int *MacroTileWidthY,
+ unsigned int *MacroTileWidthC)
+{
+ if (SourcePixelFormat == dm_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dm_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = 4.0 / 3;
+ *BytePerPixelDETC = 8.0 / 3;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
+ dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
+ dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
+ dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
+#endif
+ if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
+ || SourcePixelFormat == dm_444_16
+ || SourcePixelFormat == dm_444_8
+ || SourcePixelFormat == dm_mono_16
+ || SourcePixelFormat == dm_mono_8
+ || SourcePixelFormat == dm_rgbe)) {
+ if (SurfaceTiling == dm_sw_linear)
+ *BlockHeight256BytesY = 1;
+ else if (SourcePixelFormat == dm_444_64)
+ *BlockHeight256BytesY = 4;
+ else if (SourcePixelFormat == dm_444_8)
+ *BlockHeight256BytesY = 16;
+ else
+ *BlockHeight256BytesY = 8;
+
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
+ dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
+ dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
+ dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
+#endif
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *MacroTileHeightY = *BlockHeight256BytesY;
+ *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
+ } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
+ SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
+ *MacroTileHeightY = 16 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 16 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
+ } else {
+ *MacroTileHeightY = 32 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 32 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
+ dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
+ dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
+ dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
+#endif
+} // CalculateBytePerPixelAndBlockSizes
+
+void dml32_CalculateSwathAndDETConfiguration(
+ struct dml32_CalculateSwathAndDETConfiguration *st_vars,
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ double ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int nomDETInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ unsigned int PixelChunkSizeKBytes,
+ unsigned int ROBSizeKBytes,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class Output[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum dm_rotation_angle SourceRotation[],
+ bool ViewportStationary[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BlendingAndTiming[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ unsigned int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool *UnboundedRequestEnabled,
+ unsigned int *CompressedBufferSizeInkByte,
+ unsigned int *CompBufReservedSpaceKBytes,
+ bool *CompBufReservedSpaceNeedAdjustment,
+ bool ViewportSizeSupportPerSurface[],
+ bool *ViewportSizeSupport)
+{
+ unsigned int k;
+
+ st_vars->TotalActiveDPP = 0;
+ st_vars->NoChromaSurfaces = true;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
+ dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
+#endif
+ dml32_CalculateSwathWidth(ForceSingleDPP,
+ NumberOfActiveSurfaces,
+ SourcePixelFormat,
+ SourceRotation,
+ ViewportStationary,
+ ViewportWidth,
+ ViewportHeight,
+ ViewportXStart,
+ ViewportYStart,
+ ViewportXStartC,
+ ViewportYStartC,
+ SurfaceWidthY,
+ SurfaceWidthC,
+ SurfaceHeightY,
+ SurfaceHeightC,
+ ODMMode,
+ BytePerPixY,
+ BytePerPixC,
+ Read256BytesBlockHeightY,
+ Read256BytesBlockHeightC,
+ Read256BytesBlockWidthY,
+ Read256BytesBlockWidthC,
+ BlendingAndTiming,
+ HActive,
+ HRatio,
+ DPPPerSurface,
+
+ /* Output */
+ st_vars->SwathWidthdoubleDPP,
+ st_vars->SwathWidthdoubleDPPChroma,
+ SwathWidth,
+ SwathWidthChroma,
+ st_vars->MaximumSwathHeightY,
+ st_vars->MaximumSwathHeightC,
+ swath_width_luma_ub,
+ swath_width_chroma_ub);
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k];
+ st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
+ st_vars->RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
+ st_vars->RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dm_420_10) {
+ st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256);
+ st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256);
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
+ st_vars->NoChromaSurfaces = false;
+ }
+ }
+
+ // By default, just set the reserved space to 2 pixel chunks size
+ *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
+
+ // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
+ // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
+ // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
+ *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512);
+
+ if (*CompBufReservedSpaceNeedAdjustment == 1) {
+ *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512;
+ }
+
+ #ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes);
+ dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
+ #endif
+
+ *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+
+ dml32_CalculateDETBufferSize(DETSizeOverride,
+ UseMALLForPStateChange,
+ ForceSingleDPP,
+ NumberOfActiveSurfaces,
+ *UnboundedRequestEnabled,
+ nomDETInKByte,
+ MaxTotalDETInKByte,
+ ConfigReturnBufferSizeInKByte,
+ MinCompressedBufferSizeInKByte,
+ CompressedBufferSegmentSizeInkByteFinal,
+ SourcePixelFormat,
+ ReadBandwidthLuma,
+ ReadBandwidthChroma,
+ st_vars->RoundedUpMaxSwathSizeBytesY,
+ st_vars->RoundedUpMaxSwathSizeBytesC,
+ DPPPerSurface,
+
+ /* Output */
+ DETBufferSizeInKByte, // per hubp pipe
+ CompressedBufferSizeInkByte);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
+#endif
+
+ *ViewportSizeSupport = true;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
+ dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
+ st_vars->DETBufferSizeInKByteForSwathCalculation);
+#endif
+
+ if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
+ st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
+ SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
+ st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
+ st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
+ } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
+ st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
+ st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
+ st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
+ st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
+ } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
+ st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <=
+ st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
+ SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
+ st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
+ st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
+ } else {
+ SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
+ st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
+ st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
+ }
+
+ if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 >
+ st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
+ || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
+ SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
+ *ViewportSizeSupport = false;
+ ViewportSizeSupportPerSurface[k] = false;
+ } else {
+ ViewportSizeSupportPerSurface[k] = true;
+ }
+
+ if (SwathHeightC[k] == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
+ DETBufferSizeC[k] = 0;
+ } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
+ DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
+ } else {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
+ DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
+ dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
+ k, st_vars->RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
+ k, st_vars->RoundedUpMaxSwathSizeBytesC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC);
+ dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
+ dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
+ ViewportSizeSupportPerSurface[k]);
+#endif
+
+ }
+} // CalculateSwathAndDETConfiguration
+
+void dml32_CalculateSwathWidth(
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ enum source_format_class SourcePixelFormat[],
+ enum dm_rotation_angle SourceRotation[],
+ bool ViewportStationary[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int BlendingAndTiming[],
+ unsigned int HActive[],
+ double HRatio[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ double SwathWidthdoubleDPPY[],
+ double SwathWidthdoubleDPPC[],
+ double SwathWidthY[], // per-pipe
+ double SwathWidthC[], // per-pipe
+ unsigned int MaximumSwathHeightY[],
+ unsigned int MaximumSwathHeightC[],
+ unsigned int swath_width_luma_ub[], // per-pipe
+ unsigned int swath_width_chroma_ub[]) // per-pipe
+{
+ unsigned int k, j;
+ enum odm_combine_mode MainSurfaceODMMode;
+
+ unsigned int surface_width_ub_l;
+ unsigned int surface_height_ub_l;
+ unsigned int surface_width_ub_c;
+ unsigned int surface_height_ub_c;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!IsVertical(SourceRotation[k]))
+ SwathWidthdoubleDPPY[k] = ViewportWidth[k];
+ else
+ SwathWidthdoubleDPPY[k] = ViewportHeight[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
+ dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
+#endif
+
+ MainSurfaceODMMode = ODMMode[k];
+ for (j = 0; j < NumberOfActiveSurfaces; ++j) {
+ if (BlendingAndTiming[k] == j)
+ MainSurfaceODMMode = ODMMode[j];
+ }
+
+ if (ForceSingleDPP) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ } else {
+ if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
+ SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
+ dml_round(HActive[k] / 4.0 * HRatio[k]));
+ } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
+ SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
+ dml_round(HActive[k] / 2.0 * HRatio[k]));
+ } else if (DPPPerSurface[k] == 2) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
+ } else {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
+ dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
+ dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
+ dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12) {
+ SwathWidthC[k] = SwathWidthY[k] / 2;
+ SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
+ } else {
+ SwathWidthC[k] = SwathWidthY[k];
+ SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
+ }
+
+ if (ForceSingleDPP == true) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ SwathWidthC[k] = SwathWidthdoubleDPPC[k];
+ }
+
+ surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
+ surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
+ surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+ surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
+ dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
+ dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
+ dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
+ dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
+ dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
+ dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
+#endif
+
+ if (!IsVertical(SourceRotation[k])) {
+ MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
+ dml_floor(ViewportXStart[k] +
+ SwathWidthY[k] +
+ Read256BytesBlockWidthY[k] - 1,
+ Read256BytesBlockWidthY[k]) -
+ dml_floor(ViewportXStart[k],
+ Read256BytesBlockWidthY[k]));
+ } else {
+ swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
+ dml_ceil(SwathWidthY[k] - 1,
+ Read256BytesBlockWidthY[k]) +
+ Read256BytesBlockWidthY[k]);
+ }
+ if (BytePerPixC[k] > 0) {
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
+ dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
+ Read256BytesBlockWidthC[k] - 1,
+ Read256BytesBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k],
+ Read256BytesBlockWidthC[k]));
+ } else {
+ swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
+ dml_ceil(SwathWidthC[k] - 1,
+ Read256BytesBlockWidthC[k]) +
+ Read256BytesBlockWidthC[k]);
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ } else {
+ MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
+
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
+ SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
+ Read256BytesBlockHeightY[k]) -
+ dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
+ } else {
+ swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
+ Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
+ }
+ if (BytePerPixC[k] > 0) {
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
+ dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
+ Read256BytesBlockHeightC[k] - 1,
+ Read256BytesBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k],
+ Read256BytesBlockHeightC[k]));
+ } else {
+ swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
+ dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
+ Read256BytesBlockHeightC[k]);
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
+#endif
+
+ }
+} // CalculateSwathWidth
+
+bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ unsigned int TotalNumberOfActiveDPP,
+ bool NoChroma,
+ enum output_encoder_class Output,
+ enum dm_swizzle_mode SurfaceTiling,
+ bool CompBufReservedSpaceNeedAdjustment,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
+{
+ bool ret_val = false;
+
+ ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
+ TotalNumberOfActiveDPP == 1 && NoChroma);
+ if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
+ ret_val = false;
+
+ if (SurfaceTiling == dm_sw_linear)
+ ret_val = false;
+
+ if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
+ ret_val = false;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment);
+ dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+ dml_print("DML::%s: ret_val = %d\n", __func__, ret_val);
+#endif
+
+ return (ret_val);
+}
+
+void dml32_CalculateDETBufferSize(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ bool UnboundedRequestEnabled,
+ unsigned int nomDETInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum source_format_class SourcePixelFormat[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int RoundedUpMaxSwathSizeBytesY[],
+ unsigned int RoundedUpMaxSwathSizeBytesC[],
+ unsigned int DPPPerSurface[],
+ /* Output */
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int *CompressedBufferSizeInkByte)
+{
+ unsigned int DETBufferSizePoolInKByte;
+ unsigned int NextDETBufferPieceInKByte;
+ bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
+ bool NextPotentialSurfaceToAssignDETPieceFound;
+ unsigned int NextSurfaceToAssignDETPiece;
+ double TotalBandwidth;
+ double BandwidthOfSurfacesNotAssignedDETPiece;
+ unsigned int max_minDET;
+ unsigned int minDET;
+ unsigned int minDET_pipe;
+ unsigned int j, k;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
+ dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
+ dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
+ CompressedBufferSegmentSizeInkByteFinal);
+#endif
+
+ // Note: Will use default det size if that fits 2 swaths
+ if (UnboundedRequestEnabled) {
+ if (DETSizeOverride[0] > 0) {
+ DETBufferSizeInKByte[0] = DETSizeOverride[0];
+ } else {
+ DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
+ ((double) RoundedUpMaxSwathSizeBytesY[0] +
+ (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
+ }
+ *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
+ } else {
+ DETBufferSizePoolInKByte = MaxTotalDETInKByte;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DETBufferSizeInKByte[k] = nomDETInKByte;
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12) {
+ max_minDET = nomDETInKByte - 64;
+ } else {
+ max_minDET = nomDETInKByte;
+ }
+ minDET = 128;
+ minDET_pipe = 0;
+
+ // add DET resource until can hold 2 full swaths
+ while (minDET <= max_minDET && minDET_pipe == 0) {
+ if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
+ (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
+ minDET_pipe = minDET;
+ minDET = minDET + 64;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
+ dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
+ dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+
+ if (minDET_pipe == 0) {
+ minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
+ (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
+ __func__, k, minDET_pipe);
+#endif
+ }
+
+ if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
+ DETBufferSizeInKByte[k] = 0;
+ } else if (DETSizeOverride[k] > 0) {
+ DETBufferSizeInKByte[k] = DETSizeOverride[k];
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
+ (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
+ } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
+ DETBufferSizeInKByte[k] = minDET_pipe;
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
+ (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
+#endif
+ }
+
+ TotalBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
+ TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
+ for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
+ dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
+#endif
+ BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
+ (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
+ ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
+ ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
+ } else {
+ DETPieceAssignedToThisSurfaceAlready[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
+ DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
+ BandwidthOfSurfacesNotAssignedDETPiece);
+#endif
+ }
+
+ for (j = 0; j < NumberOfActiveSurfaces; ++j) {
+ NextPotentialSurfaceToAssignDETPieceFound = false;
+ NextSurfaceToAssignDETPiece = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
+ ReadBandwidthLuma[k]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
+ ReadBandwidthChroma[k]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
+ ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
+ NextSurfaceToAssignDETPiece);
+#endif
+ if (!DETPieceAssignedToThisSurfaceAlready[k] &&
+ (!NextPotentialSurfaceToAssignDETPieceFound ||
+ ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
+ ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
+ NextSurfaceToAssignDETPiece = k;
+ NextPotentialSurfaceToAssignDETPieceFound = true;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
+ __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
+ __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
+#endif
+ }
+
+ if (NextPotentialSurfaceToAssignDETPieceFound) {
+ // Note: To show the banker's rounding behavior in VBA and also the fact
+ // that the DET buffer size varies due to precision issue
+ //
+ //double tmp1 = ((double) DETBufferSizePoolInKByte *
+ // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ // BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
+ // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ //BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //
+ //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
+ //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
+
+ NextDETBufferPieceInKByte = dml_min(
+ dml_round((double) DETBufferSizePoolInKByte *
+ (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ BandwidthOfSurfacesNotAssignedDETPiece /
+ ((ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
+ (ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
+ dml_floor((double) DETBufferSizePoolInKByte,
+ (ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+
+ // Above calculation can assign the entire DET buffer allocation to a single pipe.
+ // We should limit the per-pipe DET size to the nominal / max per pipe.
+ if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
+ if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
+ nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
+ NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
+ } else {
+ // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
+ // already has the max per-pipe value
+ NextDETBufferPieceInKByte = 0;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
+ DETBufferSizePoolInKByte);
+ dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
+ NextSurfaceToAssignDETPiece);
+ dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
+ NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
+ NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
+ __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
+ dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
+ NextDETBufferPieceInKByte);
+ dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
+ __func__, j, NextSurfaceToAssignDETPiece,
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
+ + NextDETBufferPieceInKByte
+ / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
+ DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
+ (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ }
+ }
+ *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
+ }
+ *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
+ for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
+ __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+ }
+#endif
+} // CalculateDETBufferSize
+
+void dml32_CalculateODMMode(
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ unsigned int HActive,
+ enum output_encoder_class Output,
+ enum odm_combine_policy ODMUse,
+ double StateDispclk,
+ double MaxDispclk,
+ bool DSCEnable,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int MaxNumDPP,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+
+ /* Output */
+ bool *TotalAvailablePipesSupport,
+ unsigned int *NumberOfDPP,
+ enum odm_combine_mode *ODMMode,
+ double *RequiredDISPCLKPerSurface)
+{
+
+ double SurfaceRequiredDISPCLKWithoutODMCombine;
+ double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+
+ SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ *TotalAvailablePipesSupport = true;
+ *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
+
+ if (ODMUse == dm_odm_combine_policy_none)
+ *ODMMode = dm_odm_combine_mode_disabled;
+
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
+ *NumberOfDPP = 0;
+
+ // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
+ // (ODMUse == "" || ODMUse == "CombineAsNeeded")
+
+ if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
+ ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
+ (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) {
+ if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
+ *ODMMode = dm_odm_combine_mode_4to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ *NumberOfDPP = 4;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
+ (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
+ (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) {
+ if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
+ *ODMMode = dm_odm_combine_mode_2to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ *NumberOfDPP = 2;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else {
+ if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
+ *NumberOfDPP = 1;
+ else
+ *TotalAvailablePipesSupport = false;
+ }
+}
+
+double dml32_CalculateRequiredDispclk(
+ enum odm_combine_mode ODMMode,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+ double MaxDispclk)
+{
+ double RequiredDispclk = 0.;
+ double PixelClockAfterODM;
+ double DISPCLKWithRampingRoundedToDFSGranularity;
+ double DISPCLKWithoutRampingRoundedToDFSGranularity;
+ double MaxDispclkRoundedDownToDFSGranularity;
+
+ if (ODMMode == dm_odm_combine_mode_4to1)
+ PixelClockAfterODM = PixelClock / 4;
+ else if (ODMMode == dm_odm_combine_mode_2to1)
+ PixelClockAfterODM = PixelClock / 2;
+ else
+ PixelClockAfterODM = PixelClock;
+
+
+ DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
+ PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
+ * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
+
+ DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
+ PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
+
+ MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
+
+ if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
+ RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
+ else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
+ RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
+ else
+ RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
+
+ return RequiredDispclk;
+}
+
+double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
+{
+ if (Clock <= 0.0)
+ return 0.0;
+
+ if (round_up)
+ return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
+ else
+ return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
+}
+
+void dml32_CalculateOutputLink(
+ double PHYCLKPerState,
+ double PHYCLKD18PerState,
+ double PHYCLKD32PerState,
+ double Downspreading,
+ bool IsMainSurfaceUsingTheIndicatedTiming,
+ enum output_encoder_class Output,
+ enum output_format_class OutputFormat,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClockBackEnd,
+ double ForcedOutputLinkBPP,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int NumberOfDSCSlices,
+ double AudioSampleRate,
+ unsigned int AudioSampleLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ bool DSCEnable,
+ unsigned int OutputLinkDPLanes,
+ enum dm_output_link_dp_rate OutputLinkDPRate,
+
+ /* Output */
+ bool *RequiresDSC,
+ double *RequiresFEC,
+ double *OutBpp,
+ enum dm_output_type *OutputType,
+ enum dm_output_rate *OutputRate,
+ unsigned int *RequiredSlots)
+{
+ bool LinkDSCEnable;
+ unsigned int dummy;
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = 0;
+ *OutputType = dm_output_type_unknown;
+ *OutputRate = dm_output_rate_unknown;
+
+ if (IsMainSurfaceUsingTheIndicatedTiming) {
+ if (Output == dm_hdmi) {
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
+ PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = "HDMI";
+ *OutputType = dm_output_type_hdmi;
+
+ } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
+ if (DSCEnable == true) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp || Output == dm_dp2p0)
+ *RequiresFEC = true;
+ else
+ *RequiresFEC = false;
+ } else {
+ *RequiresDSC = false;
+ LinkDSCEnable = false;
+ if (Output == dm_dp2p0)
+ *RequiresFEC = true;
+ else
+ *RequiresFEC = false;
+ }
+ if (Output == dm_dp2p0) {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
+ PHYCLKD32PerState >= 10000 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR10";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr10;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
+ *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR13p5";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
+ *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR20";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr20;
+ }
+ } else {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
+ PHYCLKPerState >= 270) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
+ *OutBpp == 0 && PHYCLKPerState >= 540) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR2";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr2;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
+ AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
+ RequiredSlots);
+
+ if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR3";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr3;
+ }
+ }
+ }
+ }
+}
+
+void dml32_CalculateDPPCLK(
+ unsigned int NumberOfActiveSurfaces,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKDPPCLKVCOSpeed,
+ double DPPCLKUsingSingleDPP[],
+ unsigned int DPPPerSurface[],
+
+ /* output */
+ double *GlobalDPPCLK,
+ double Dppclk[])
+{
+ unsigned int k;
+ *GlobalDPPCLK = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
+ }
+ *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
+}
+
+double dml32_TruncToValidBPP(
+ double LinkBitRate,
+ unsigned int Lanes,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum output_encoder_class Output,
+ enum output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int DSCSlices,
+ unsigned int AudioRate,
+ unsigned int AudioLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ /* Output */
+ unsigned int *RequiredSlots)
+{
+ double MaxLinkBPP;
+ unsigned int MinDSCBPP;
+ double MaxDSCBPP;
+ unsigned int NonDSCBPP0;
+ unsigned int NonDSCBPP1;
+ unsigned int NonDSCBPP2;
+ unsigned int NonDSCBPP3;
+
+ if (Format == dm_420) {
+ NonDSCBPP0 = 12;
+ NonDSCBPP1 = 15;
+ NonDSCBPP2 = 18;
+ MinDSCBPP = 6;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
+ } else if (Format == dm_444) {
+ NonDSCBPP0 = 18;
+ NonDSCBPP1 = 24;
+ NonDSCBPP2 = 30;
+ NonDSCBPP3 = 36;
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
+ } else {
+ if (Output == dm_hdmi) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 24;
+ NonDSCBPP2 = 24;
+ } else {
+ NonDSCBPP0 = 16;
+ NonDSCBPP1 = 20;
+ NonDSCBPP2 = 24;
+ }
+ if (Format == dm_n422) {
+ MinDSCBPP = 7;
+ MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
+ } else {
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
+ }
+ }
+ if (Output == dm_dp2p0) {
+ MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
+ } else if (DSCEnable && Output == dm_dp) {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
+ } else {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
+ }
+
+ if (DSCEnable) {
+ if (ODMModeDSC == dm_odm_combine_mode_4to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ else if (ODMModeDSC == dm_odm_combine_mode_2to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ else if (ODMModeDSC == dm_odm_split_mode_1to2)
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ } else {
+ if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ }
+
+ if (DesiredBPP == 0) {
+ if (DSCEnable) {
+ if (MaxLinkBPP < MinDSCBPP)
+ return BPP_INVALID;
+ else if (MaxLinkBPP >= MaxDSCBPP)
+ return MaxDSCBPP;
+ else
+ return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
+ } else {
+ if (MaxLinkBPP >= NonDSCBPP3)
+ return NonDSCBPP3;
+ else if (MaxLinkBPP >= NonDSCBPP2)
+ return NonDSCBPP2;
+ else if (MaxLinkBPP >= NonDSCBPP1)
+ return NonDSCBPP1;
+ else if (MaxLinkBPP >= NonDSCBPP0)
+ return 16.0;
+ else
+ return BPP_INVALID;
+ }
+ } else {
+ if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
+ DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
+ (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
+ return BPP_INVALID;
+ else
+ return DesiredBPP;
+ }
+
+ *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
+
+ return BPP_INVALID;
+} // TruncToValidBPP
+
+double dml32_RequiredDTBCLK(
+ bool DSCEnable,
+ double PixelClock,
+ enum output_format_class OutputFormat,
+ double OutputBpp,
+ unsigned int DSCSlices,
+ unsigned int HTotal,
+ unsigned int HActive,
+ unsigned int AudioRate,
+ unsigned int AudioLayout)
+{
+ double PixelWordRate;
+ double HCActive;
+ double HCBlank;
+ double AverageTribyteRate;
+ double HActiveTribyteRate;
+
+ if (DSCEnable != true)
+ return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
+
+ PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
+ HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
+ dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
+ HCBlank = 64 + 32 *
+ dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
+ AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
+ HActiveTribyteRate = PixelWordRate * HCActive / HActive;
+ return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
+}
+
+unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
+ enum odm_combine_mode ODMMode,
+ unsigned int DSCInputBitPerComponent,
+ double OutputBpp,
+ unsigned int HActive,
+ unsigned int HTotal,
+ unsigned int NumberOfDSCSlices,
+ enum output_format_class OutputFormat,
+ enum output_encoder_class Output,
+ double PixelClock,
+ double PixelClockBackEnd)
+{
+ unsigned int DSCDelayRequirement_val;
+
+ if (DSCEnabled == true && OutputBpp != 0) {
+ if (ODMMode == dm_odm_combine_mode_4to1) {
+ DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
+ } else if (ODMMode == dm_odm_combine_mode_2to1) {
+ DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
+ } else {
+ DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
+ }
+
+ DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
+ dml_ceil(DSCDelayRequirement_val / HActive, 1);
+
+ DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
+
+ } else {
+ DSCDelayRequirement_val = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
+ dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
+ dml_print("DML::%s: HActive = %d\n", __func__, HActive);
+ dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
+ dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
+ dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
+ dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
+#endif
+
+ return DSCDelayRequirement_val;
+}
+
+void dml32_CalculateSurfaceSizeInMall(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCN,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ bool DCCEnable[],
+ bool ViewportStationary[],
+ unsigned int ViewportXStartY[],
+ unsigned int ViewportYStartY[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int ViewportWidthY[],
+ unsigned int ViewportHeightY[],
+ unsigned int BytesPerPixelY[],
+ unsigned int ViewportWidthC[],
+ unsigned int ViewportHeightC[],
+ unsigned int BytesPerPixelC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int ReadBlockWidthY[],
+ unsigned int ReadBlockWidthC[],
+ unsigned int ReadBlockHeightY[],
+ unsigned int ReadBlockHeightC[],
+
+ /* Output */
+ unsigned int SurfaceSizeInMALL[],
+ bool *ExceededMALLSize)
+{
+ unsigned int TotalSurfaceSizeInMALL = 0;
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ViewportStationary[k]) {
+ SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
+ dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
+ ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
+ ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
+ ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
+ ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
+ dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
+
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
+ dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
+ ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
+ dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
+ ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
+ BytesPerPixelC[k];
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
+ dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
+ Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
+ - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
+ * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
+ Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
+ ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
+ Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
+ * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthC[k], 8 *
+ Read256BytesBlockWidthC[k]),
+ dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
+ * Read256BytesBlockWidthC[k] - 1, 8 *
+ Read256BytesBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k], 8 *
+ Read256BytesBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], 8 *
+ Read256BytesBlockHeightC[k]),
+ dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
+ 8 * Read256BytesBlockHeightC[k] - 1, 8 *
+ Read256BytesBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k], 8 *
+ Read256BytesBlockHeightC[k])) *
+ BytesPerPixelC[k] / 256;
+ }
+ }
+ } else {
+ SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
+ ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
+ dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
+ ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
+ BytesPerPixelY[k];
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
+ ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
+ ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
+ BytesPerPixelC[k];
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
+ Read256BytesBlockWidthY[k] - 1), 8 *
+ Read256BytesBlockWidthY[k]) *
+ dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
+ Read256BytesBlockHeightY[k] - 1), 8 *
+ Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
+
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
+ Read256BytesBlockWidthC[k] - 1), 8 *
+ Read256BytesBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
+ Read256BytesBlockHeightC[k] - 1), 8 *
+ Read256BytesBlockHeightC[k]) *
+ BytesPerPixelC[k] / 256;
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+ }
+ *ExceededMALLSize = (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true);
+} // CalculateSurfaceSizeInMall
+
+void dml32_CalculateVMRowAndSwath(
+ struct dml32_CalculateVMRowAndSwath *st_vars,
+ unsigned int NumberOfActiveSurfaces,
+ DmlPipe myPipe[],
+ unsigned int SurfaceSizeInMALL[],
+ unsigned int PTEBufferSizeInRequestsLuma,
+ unsigned int PTEBufferSizeInRequestsChroma,
+ unsigned int DCCMetaBufferSizeBytes,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int MALLAllocatedForDCN,
+ double SwathWidthY[],
+ double SwathWidthC[],
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes[],
+ unsigned int HostVMMinPageSize,
+
+ /* Output */
+ bool PTEBufferSizeNotExceeded[],
+ bool DCCMetaBufferSizeNotExceeded[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int dpte_row_height_luma[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
+ unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int PTERequestSizeC[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+ double PrefetchSourceLinesY[],
+ double PrefetchSourceLinesC[],
+ double VInitPreFillY[],
+ double VInitPreFillC[],
+ unsigned int MaxNumSwathY[],
+ unsigned int MaxNumSwathC[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double PixelPTEBytesPerRow[],
+ double PDEAndMetaPTEBytesFrame[],
+ double MetaRowByte[],
+ bool use_one_row_for_frame[],
+ bool use_one_row_for_frame_flip[],
+ bool UsesMALLForStaticScreen[],
+ bool PTE_BUFFER_MODE[],
+ unsigned int BIGK_FRAGMENT_SIZE[])
+{
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (HostVMEnable == true) {
+ vm_group_bytes[k] = 512;
+ dpte_group_bytes[k] = 512;
+ } else if (GPUVMEnable == true) {
+ vm_group_bytes[k] = 2048;
+ if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
+ dpte_group_bytes[k] = 512;
+ else
+ dpte_group_bytes[k] = 2048;
+ } else {
+ vm_group_bytes[k] = 0;
+ dpte_group_bytes[k] = 0;
+ }
+
+ if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
+ myPipe[k].SourcePixelFormat == dm_420_12 ||
+ myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
+ if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
+ !IsVertical(myPipe[k].SourceRotation)) {
+ st_vars->PTEBufferSizeInRequestsForLuma[k] =
+ (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
+ st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k];
+ } else {
+ st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
+ st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
+ }
+
+ st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
+ myPipe[k].ViewportStationary,
+ myPipe[k].DCCEnable,
+ myPipe[k].DPPPerSurface,
+ myPipe[k].BlockHeight256BytesC,
+ myPipe[k].BlockWidth256BytesC,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].SurfaceTiling,
+ myPipe[k].BytePerPixelC,
+ myPipe[k].SourceRotation,
+ SwathWidthC[k],
+ myPipe[k].ViewportHeightChroma,
+ myPipe[k].ViewportXStartC,
+ myPipe[k].ViewportYStartC,
+ GPUVMEnable,
+ HostVMEnable,
+ HostVMMaxNonCachedPageTableLevels,
+ GPUVMMaxPageTableLevels,
+ GPUVMMinPageSizeKBytes[k],
+ HostVMMinPageSize,
+ st_vars->PTEBufferSizeInRequestsForChroma[k],
+ myPipe[k].PitchC,
+ myPipe[k].DCCMetaPitchC,
+ myPipe[k].BlockWidthC,
+ myPipe[k].BlockHeightC,
+
+ /* Output */
+ &st_vars->MetaRowByteC[k],
+ &st_vars->PixelPTEBytesPerRowC[k],
+ &dpte_row_width_chroma_ub[k],
+ &dpte_row_height_chroma[k],
+ &dpte_row_height_linear_chroma[k],
+ &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k],
+ &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k],
+ &st_vars->dpte_row_height_chroma_one_row_per_frame[k],
+ &meta_req_width_chroma[k],
+ &meta_req_height_chroma[k],
+ &meta_row_width_chroma[k],
+ &meta_row_height_chroma[k],
+ &PixelPTEReqWidthC[k],
+ &PixelPTEReqHeightC[k],
+ &PTERequestSizeC[k],
+ &dpde0_bytes_per_frame_ub_c[k],
+ &meta_pte_bytes_per_frame_ub_c[k]);
+
+ PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
+ myPipe[k].VRatioChroma,
+ myPipe[k].VTapsChroma,
+ myPipe[k].InterlaceEnable,
+ myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ myPipe[k].SwathHeightC,
+ myPipe[k].SourceRotation,
+ myPipe[k].ViewportStationary,
+ SwathWidthC[k],
+ myPipe[k].ViewportHeightChroma,
+ myPipe[k].ViewportXStartC,
+ myPipe[k].ViewportYStartC,
+
+ /* Output */
+ &VInitPreFillC[k],
+ &MaxNumSwathC[k]);
+ } else {
+ st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
+ st_vars->PTEBufferSizeInRequestsForChroma[k] = 0;
+ st_vars->PixelPTEBytesPerRowC[k] = 0;
+ st_vars->PDEAndMetaPTEBytesFrameC = 0;
+ st_vars->MetaRowByteC[k] = 0;
+ MaxNumSwathC[k] = 0;
+ PrefetchSourceLinesC[k] = 0;
+ st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0;
+ st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+ st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+ }
+
+ st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
+ myPipe[k].ViewportStationary,
+ myPipe[k].DCCEnable,
+ myPipe[k].DPPPerSurface,
+ myPipe[k].BlockHeight256BytesY,
+ myPipe[k].BlockWidth256BytesY,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].SurfaceTiling,
+ myPipe[k].BytePerPixelY,
+ myPipe[k].SourceRotation,
+ SwathWidthY[k],
+ myPipe[k].ViewportHeight,
+ myPipe[k].ViewportXStart,
+ myPipe[k].ViewportYStart,
+ GPUVMEnable,
+ HostVMEnable,
+ HostVMMaxNonCachedPageTableLevels,
+ GPUVMMaxPageTableLevels,
+ GPUVMMinPageSizeKBytes[k],
+ HostVMMinPageSize,
+ st_vars->PTEBufferSizeInRequestsForLuma[k],
+ myPipe[k].PitchY,
+ myPipe[k].DCCMetaPitchY,
+ myPipe[k].BlockWidthY,
+ myPipe[k].BlockHeightY,
+
+ /* Output */
+ &st_vars->MetaRowByteY[k],
+ &st_vars->PixelPTEBytesPerRowY[k],
+ &dpte_row_width_luma_ub[k],
+ &dpte_row_height_luma[k],
+ &dpte_row_height_linear_luma[k],
+ &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k],
+ &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k],
+ &st_vars->dpte_row_height_luma_one_row_per_frame[k],
+ &meta_req_width[k],
+ &meta_req_height[k],
+ &meta_row_width[k],
+ &meta_row_height[k],
+ &PixelPTEReqWidthY[k],
+ &PixelPTEReqHeightY[k],
+ &PTERequestSizeY[k],
+ &dpde0_bytes_per_frame_ub_l[k],
+ &meta_pte_bytes_per_frame_ub_l[k]);
+
+ PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
+ myPipe[k].VRatio,
+ myPipe[k].VTaps,
+ myPipe[k].InterlaceEnable,
+ myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ myPipe[k].SwathHeightY,
+ myPipe[k].SourceRotation,
+ myPipe[k].ViewportStationary,
+ SwathWidthY[k],
+ myPipe[k].ViewportHeight,
+ myPipe[k].ViewportXStart,
+ myPipe[k].ViewportYStart,
+
+ /* Output */
+ &VInitPreFillY[k],
+ &MaxNumSwathY[k]);
+
+ PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC;
+ MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k];
+
+ if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] &&
+ st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) {
+ PTEBufferSizeNotExceeded[k] = true;
+ } else {
+ PTEBufferSizeNotExceeded[k] = false;
+ }
+
+ st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
+ st_vars->PTEBufferSizeInRequestsForLuma[k] &&
+ st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]);
+ }
+
+ dml32_CalculateMALLUseForStaticScreen(
+ NumberOfActiveSurfaces,
+ MALLAllocatedForDCN,
+ UseMALLForStaticScreen, // mode
+ SurfaceSizeInMALL,
+ st_vars->one_row_per_frame_fits_in_buffer,
+ /* Output */
+ UsesMALLForStaticScreen); // boolen
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
+ (GPUVMMinPageSizeKBytes[k] > 64);
+ BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
+ dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
+#endif
+ use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
+ (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
+
+ use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
+ !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
+
+ if (use_one_row_for_frame[k]) {
+ dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k];
+ dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k];
+ st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k];
+ dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k];
+ dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k];
+ st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k];
+ PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k];
+ }
+
+ if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
+ DCCMetaBufferSizeNotExceeded[k] = true;
+ else
+ DCCMetaBufferSizeNotExceeded[k] = false;
+
+ PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k];
+ if (use_one_row_for_frame[k])
+ PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
+
+ dml32_CalculateRowBandwidth(
+ GPUVMEnable,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].VRatio,
+ myPipe[k].VRatioChroma,
+ myPipe[k].DCCEnable,
+ myPipe[k].HTotal / myPipe[k].PixelClock,
+ st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k],
+ meta_row_height[k],
+ meta_row_height_chroma[k],
+ st_vars->PixelPTEBytesPerRowY[k],
+ st_vars->PixelPTEBytesPerRowC[k],
+ dpte_row_height_luma[k],
+ dpte_row_height_chroma[k],
+
+ /* Output */
+ &meta_row_bw[k],
+ &dpte_row_bw[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
+ __func__, k, use_one_row_for_frame_flip[k]);
+ dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
+ __func__, k, UseMALLForPStateChange[k]);
+ dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
+ dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
+ __func__, k, dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]);
+ dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
+ __func__, k, dpte_row_height_chroma[k]);
+ dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
+ __func__, k, dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
+ dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
+ __func__, k, PTEBufferSizeNotExceeded[k]);
+ dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
+ dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
+#endif
+ }
+} // CalculateVMRowAndSwath
+
+unsigned int dml32_CalculateVMAndRowBytes(
+ bool ViewportStationary,
+ bool DCCEnable,
+ unsigned int NumberOfDPPs,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum dm_rotation_angle SourceRotation,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int MacroTileWidth,
+ unsigned int MacroTileHeight,
+
+ /* Output */
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ unsigned int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *dpte_row_height_linear,
+ unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
+ unsigned int *dpte_row_width_ub_one_row_per_frame,
+ unsigned int *dpte_row_height_one_row_per_frame,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ unsigned int *DPDE0BytesFrame,
+ unsigned int *MetaPTEBytesFrame)
+{
+ unsigned int MPDEBytesFrame;
+ unsigned int DCCMetaSurfaceBytes;
+ unsigned int ExtraDPDEBytesFrame;
+ unsigned int PDEAndMetaPTEBytesFrame;
+ unsigned int HostVMDynamicLevels = 0;
+ unsigned int MacroTileSizeBytes;
+ unsigned int vp_height_meta_ub;
+ unsigned int vp_height_dpte_ub;
+ unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ }
+
+ *MetaRequestHeight = 8 * BlockHeight256Bytes;
+ *MetaRequestWidth = 8 * BlockWidth256Bytes;
+ if (SurfaceTiling == dm_sw_linear) {
+ *meta_row_height = 32;
+ *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
+ - dml_floor(ViewportXStart, *MetaRequestWidth);
+ } else if (!IsVertical(SourceRotation)) {
+ *meta_row_height = *MetaRequestHeight;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
+ *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
+ } else {
+ *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
+ }
+ *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
+ } else {
+ *meta_row_height = *MetaRequestWidth;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
+ *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
+ } else {
+ *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
+ }
+ *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
+ }
+
+ if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
+ vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
+ 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
+ } else if (!IsVertical(SourceRotation)) {
+ vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
+ } else {
+ vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
+ }
+
+ DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
+
+ if (GPUVMEnable == true) {
+ *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
+ (8 * 4.0 * 1024), 1) + 1) * 64;
+ MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
+ } else {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ }
+
+ if (DCCEnable != true) {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ *MetaRowByte = 0;
+ }
+
+ MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
+
+ if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
+ if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
+ vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
+ MacroTileHeight - 1, MacroTileHeight) -
+ dml_floor(ViewportYStart, MacroTileHeight);
+ } else if (!IsVertical(SourceRotation)) {
+ vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
+ } else {
+ vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
+ }
+ *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
+ (8 * 2097152), 1) + 1);
+ ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
+ } else {
+ *DPDE0BytesFrame = 0;
+ ExtraDPDEBytesFrame = 0;
+ vp_height_dpte_ub = 0;
+ }
+
+ PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
+ dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
+ dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
+ dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
+ dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
+ dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
+ dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
+ dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
+ dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
+ dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
+ dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
+ dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
+ dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
+#endif
+
+ if (HostVMEnable == true)
+ PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *PixelPTEReqHeight = 1;
+ *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ *PTERequestSize = 64;
+ } else if (GPUVMMinPageSizeKBytes == 4) {
+ *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
+ *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
+ *PTERequestSize = 128;
+ } else {
+ *PixelPTEReqHeight = MacroTileHeight;
+ *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
+ *PTERequestSize = 64;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
+ dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
+ dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
+ dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
+ dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
+#endif
+
+ *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
+ *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
+ (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
+ (double) *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
+ *PTERequestSize;
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ *PixelPTEReqWidth / Pitch), 1));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
+ PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
+ dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
+ dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
+ dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
+ dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
+ 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
+#endif
+ *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
+ (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
+
+ // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
+ *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ PixelPTEReqWidth_linear / Pitch), 1);
+ if (*dpte_row_height_linear > 128)
+ *dpte_row_height_linear = 128;
+
+ } else if (!IsVertical(SourceRotation)) {
+ *dpte_row_height = *PixelPTEReqHeight;
+
+ if (GPUVMMinPageSizeKBytes > 64) {
+ *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
+ *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ } else if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
+ *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
+ dml_floor(ViewportXStart, *PixelPTEReqWidth);
+ } else {
+ *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
+ *PixelPTEReqWidth;
+ }
+
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else {
+ *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
+
+ if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
+ *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
+ } else {
+ *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
+ * *PixelPTEReqHeight;
+ }
+
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
+ }
+
+ if (GPUVMEnable != true)
+ *PixelPTEBytesPerRow = 0;
+ if (HostVMEnable == true)
+ *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
+ dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
+ dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
+ dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
+ dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
+ dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
+ __func__, *dpte_row_width_ub_one_row_per_frame);
+ dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
+ __func__, *PixelPTEBytesPerRow_one_row_per_frame);
+ dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
+ *MetaPTEBytesFrame);
+#endif
+
+ return PDEAndMetaPTEBytesFrame;
+} // CalculateVMAndRowBytes
+
+double dml32_CalculatePrefetchSourceLines(
+ double VRatio,
+ unsigned int VTaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ enum dm_rotation_angle SourceRotation,
+ bool ViewportStationary,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+
+ /* Output */
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath)
+{
+
+ unsigned int vp_start_rot;
+ unsigned int sw0_tmp;
+ unsigned int MaxPartialSwath;
+ double numLines;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
+ dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
+ dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
+ dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
+ dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
+ dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
+#endif
+ if (ProgressiveToInterlaceUnitInOPP)
+ *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
+ else
+ *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
+
+ if (ViewportStationary) {
+ if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
+ vp_start_rot = SwathHeight -
+ (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
+ } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
+ vp_start_rot = ViewportXStart;
+ } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
+ vp_start_rot = SwathHeight -
+ (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
+ } else {
+ vp_start_rot = ViewportYStart;
+ }
+ sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
+ if (sw0_tmp < *VInitPreFill)
+ *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
+ else
+ *MaxNumSwath = 1;
+ MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
+ } else {
+ *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
+ if (*VInitPreFill > 1)
+ MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
+ else
+ MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
+ }
+ numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
+ dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
+ dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
+ dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
+ dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
+#endif
+ return numLines;
+
+} // CalculatePrefetchSourceLines
+
+void dml32_CalculateMALLUseForStaticScreen(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCNFinal,
+ enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
+ unsigned int SurfaceSizeInMALL[],
+ bool one_row_per_frame_fits_in_buffer[],
+
+ /* output */
+ bool UsesMALLForStaticScreen[])
+{
+ unsigned int k;
+ unsigned int SurfaceToAddToMALL;
+ bool CanAddAnotherSurfaceToMALL;
+ unsigned int TotalSurfaceSizeInMALL;
+
+ TotalSurfaceSizeInMALL = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
+ if (UsesMALLForStaticScreen[k])
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
+ dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
+#endif
+ }
+
+ SurfaceToAddToMALL = 0;
+ CanAddAnotherSurfaceToMALL = true;
+ while (CanAddAnotherSurfaceToMALL) {
+ CanAddAnotherSurfaceToMALL = false;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
+ !UsesMALLForStaticScreen[k] &&
+ UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
+ one_row_per_frame_fits_in_buffer[k] &&
+ (!CanAddAnotherSurfaceToMALL ||
+ SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
+ CanAddAnotherSurfaceToMALL = true;
+ SurfaceToAddToMALL = k;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
+ __func__, k, UseMALLForStaticScreen[k]);
+#endif
+ }
+ }
+ if (CanAddAnotherSurfaceToMALL) {
+ UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
+ dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
+#endif
+
+ }
+ }
+}
+
+void dml32_CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ /* Output */
+ double *meta_row_bw,
+ double *dpte_row_bw)
+{
+ if (DCCEnable != true) {
+ *meta_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
+ SourcePixelFormat == dm_rgbe_alpha) {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
+ MetaRowByteChroma / (meta_row_height_chroma * LineTime);
+ } else {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
+ }
+
+ if (GPUVMEnable != true) {
+ *dpte_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
+ SourcePixelFormat == dm_rgbe_alpha) {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
+ VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
+ } else {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
+ }
+}
+
+double dml32_CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock)
+{
+ double ret;
+
+ ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
+ if (DoUrgentLatencyAdjustment == true) {
+ ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
+ (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
+ }
+ return ret;
+}
+
+void dml32_CalculateUrgentBurstFactor(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ unsigned int DETBufferSizeY,
+ unsigned int DETBufferSizeC,
+ /* Output */
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding)
+{
+ double LinesInDETLuma;
+ double LinesInDETChroma;
+ unsigned int LinesInCursorBuffer;
+ double CursorBufferSizeInTime;
+ double DETBufferSizeInTimeLuma;
+ double DETBufferSizeInTimeChroma;
+
+ *NotEnoughUrgentLatencyHiding = 0;
+
+ if (CursorWidth > 0) {
+ LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
+ (CursorWidth * CursorBPP / 8.0)), 1.0);
+ if (VRatio > 0) {
+ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
+ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorCursor = 0;
+ } else {
+ *UrgentBurstFactorCursor = CursorBufferSizeInTime /
+ (CursorBufferSizeInTime - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorCursor = 1;
+ }
+ }
+
+ LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
+ DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
+
+ if (VRatio > 0) {
+ DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
+ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorLuma = 0;
+ } else {
+ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorLuma = 1;
+ }
+
+ if (BytePerPixelInDETC > 0) {
+ LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
+ 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
+ / swath_width_chroma_ub;
+
+ if (VRatio > 0) {
+ DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
+ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorChroma = 0;
+ } else {
+ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
+ / (DETBufferSizeInTimeChroma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorChroma = 1;
+ }
+ }
+} // CalculateUrgentBurstFactor
+
+void dml32_CalculateDCFCLKDeepSleep(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ /* Output */
+ double *DCFClkDeepSleep)
+{
+ unsigned int k;
+ double DisplayPipeLineDeliveryTimeLuma;
+ double DisplayPipeLineDeliveryTimeChroma;
+ double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
+ double ReadBandwidth = 0.0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
+ / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
+ DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
+ / Dppclk[k];
+ }
+ }
+
+ if (BytePerPixelC[k] > 0) {
+ DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
+ BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
+ __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
+ 32.0 / DisplayPipeLineDeliveryTimeChroma);
+ } else {
+ DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
+ 64.0 / DisplayPipeLineDeliveryTimeLuma;
+ }
+ DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+
+ *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
+ dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
+ dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
+ dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
+#endif
+} // CalculateDCFCLKDeepSleep
+
+double dml32_CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackDestinationWidth,
+ unsigned int WritebackDestinationHeight,
+ unsigned int WritebackSourceHeight,
+ unsigned int HTotal)
+{
+ double CalculateWriteBackDelay;
+ double Line_length;
+ double Output_lines_last_notclamped;
+ double WritebackVInit;
+
+ WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
+ Line_length = dml_max((double) WritebackDestinationWidth,
+ dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
+ Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
+ dml_ceil(((double)WritebackSourceHeight -
+ (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
+ if (Output_lines_last_notclamped < 0) {
+ CalculateWriteBackDelay = 0;
+ } else {
+ CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
+ (HTotal - WritebackDestinationWidth) + 80;
+ }
+ return CalculateWriteBackDelay;
+}
+
+void dml32_UseMinimumDCFCLK(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool DRRDisplay[],
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int MaxPrefetchMode,
+ double DRAMClockChangeLatencyFinal,
+ double FCLKChangeLatency,
+ double SREnterPlusExitTime,
+ unsigned int ReturnBusWidth,
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool DynamicMetadataVMEnabled,
+ bool ImmediateFlipRequirement,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
+ double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
+ unsigned int VTotal[],
+ unsigned int VActive[],
+ unsigned int DynamicMetadataTransmittedBytes[],
+ unsigned int DynamicMetadataLinesBeforeActiveRequired[],
+ bool Interlace[],
+ double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
+ double RequiredDISPCLK[][2],
+ double UrgLatency[],
+ unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
+ double ProjectedDCFClkDeepSleep[][2],
+ double MaximumVStartup[][2][DC__NUM_DPP__MAX],
+ unsigned int TotalNumberOfActiveDPP[][2],
+ unsigned int TotalNumberOfDCCActiveDPP[][2],
+ unsigned int dpte_group_bytes[],
+ double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
+ double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
+ double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
+ double MetaRowBytes[][2][DC__NUM_DPP__MAX],
+ bool DynamicMetadataEnable[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double DCFCLKPerState[],
+ /* Output */
+ double DCFCLKState[][2])
+{
+ unsigned int i, j, k;
+ unsigned int dummy1;
+ double dummy2, dummy3;
+ double NormalEfficiency;
+ double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
+
+ NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
+ for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
+ double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
+ double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
+ double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
+ double MinimumTWait = 0.0;
+ double DPTEBandwidth;
+ double DCFCLKRequiredForAverageBandwidth;
+ unsigned int ExtraLatencyBytes;
+ double ExtraLatencyCycles;
+ double DCFCLKRequiredForPeakBandwidth;
+ unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
+ double MinimumTvmPlus2Tr0;
+
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
+ + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
+ / (15.75 * HTotal[k] / PixelClock[k]);
+ }
+
+ for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
+ NoOfDPPState[k] = NoOfDPP[i][j][k];
+
+ DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
+ DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
+
+ ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
+ TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
+ NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+ ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
+ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double DCFCLKCyclesRequiredInPrefetch;
+ double PrefetchTime;
+
+ PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
+ * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
+ + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
+ * BytePerPixelC[k]) / NormalEfficiency
+ / ReturnBusWidth;
+ DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
+ + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
+ / NormalEfficiency / ReturnBusWidth
+ * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+ + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
+ / ReturnBusWidth
+ + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
+ + PixelDCFCLKCyclesRequiredInPrefetch[k];
+ PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
+ * HTotal[k] / PixelClock[k];
+ DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
+ DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
+ UrgLatency[i] * GPUVMMaxPageTableLevels *
+ (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+
+ MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
+ UseMALLForPStateChange[k],
+ SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ DRRDisplay[k],
+ DRAMClockChangeLatencyFinal,
+ FCLKChangeLatency,
+ UrgLatency[i],
+ SREnterPlusExitTime);
+
+ PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
+ MinimumTWait - UrgLatency[i] *
+ ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
+ GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
+ HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
+ DynamicMetadataVMExtraLatency[k];
+
+ if (PrefetchTime > 0) {
+ double ExpectedVRatioPrefetch;
+
+ ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
+ PixelDCFCLKCyclesRequiredInPrefetch[k] /
+ DCFCLKCyclesRequiredInPrefetch);
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
+ PixelDCFCLKCyclesRequiredInPrefetch[k] /
+ PrefetchPixelLinesTime[k] *
+ dml_max(1.0, ExpectedVRatioPrefetch) *
+ dml_max(1.0, ExpectedVRatioPrefetch / 4);
+ if (HostVMEnable == true || ImmediateFlipRequirement == true) {
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] =
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] +
+ NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
+ NormalEfficiency / ReturnBusWidth;
+ }
+ } else {
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
+ }
+ if (DynamicMetadataEnable[k] == true) {
+ double TSetupPipe;
+ double TdmbfPipe;
+ double TdmsksPipe;
+ double TdmecPipe;
+ double AllowedTimeForUrgentExtraLatency;
+
+ dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ MaxInterDCNTileRepeaters,
+ RequiredDPPCLKPerSurface[i][j][k],
+ RequiredDISPCLK[i][j],
+ ProjectedDCFClkDeepSleep[i][j],
+ PixelClock[k],
+ HTotal[k],
+ VTotal[k] - VActive[k],
+ DynamicMetadataTransmittedBytes[k],
+ DynamicMetadataLinesBeforeActiveRequired[k],
+ Interlace[k],
+ ProgressiveToInterlaceUnitInOPP,
+
+ /* output */
+ &TSetupPipe,
+ &TdmbfPipe,
+ &TdmecPipe,
+ &TdmsksPipe,
+ &dummy1,
+ &dummy2,
+ &dummy3);
+ AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
+ PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
+ TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
+ if (AllowedTimeForUrgentExtraLatency > 0)
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] =
+ dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
+ ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
+ else
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
+ }
+ }
+ DCFCLKRequiredForPeakBandwidth = 0;
+ for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
+ DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
+ DCFCLKRequiredForPeakBandwidthPerSurface[k];
+ }
+ MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
+ (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
+ (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double MaximumTvmPlus2Tr0PlusTsw;
+
+ MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
+ PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
+ if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
+ DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
+ } else {
+ DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
+ 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
+ MinimumTvmPlus2Tr0 -
+ PrefetchPixelLinesTime[k] / 4),
+ (2 * ExtraLatencyCycles +
+ PixelDCFCLKCyclesRequiredInPrefetch[k]) /
+ (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
+ }
+ }
+ DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
+ dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
+ }
+ }
+}
+
+unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels)
+{
+ unsigned int k;
+ double ret;
+ unsigned int HostVMDynamicLevels;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ } else {
+ HostVMDynamicLevels = 0;
+ }
+
+ ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
+ TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
+
+ if (GPUVMEnable == true) {
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
+ (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
+ }
+ }
+ return ret;
+}
+
+void dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ unsigned int MaxInterDCNTileRepeaters,
+ double Dppclk,
+ double Dispclk,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ unsigned int HTotal,
+ unsigned int VBlank,
+ unsigned int DynamicMetadataTransmittedBytes,
+ unsigned int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+
+ /* output */
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ double TotalRepeaterDelayTime;
+
+ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
+ *VUpdateWidthPix =
+ dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
+ *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
+ TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
+ *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
+ *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
+ *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
+ *Tdmec = HTotal / PixelClock;
+
+ if (DynamicMetadataLinesBeforeActiveRequired == 0)
+ *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
+ else
+ *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
+
+ if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
+ *Tdmsks = *Tdmsks / 2;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
+ dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
+ dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
+
+ dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
+ __func__, DynamicMetadataLinesBeforeActiveRequired);
+ dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
+ dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
+ dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
+#endif
+}
+
+double dml32_CalculateTWait(
+ unsigned int PrefetchMode,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ bool DRRDisplay,
+ double DRAMClockChangeLatency,
+ double FCLKChangeLatency,
+ double UrgentLatency,
+ double SREnterPlusExitTime)
+{
+ double TWait = 0.0;
+
+ if (PrefetchMode == 0 &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
+ !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
+ TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
+ } else {
+ TWait = UrgentLatency;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
+ dml_print("DML::%s: TWait = %f\n", __func__, TWait);
+#endif
+ return TWait;
+} // CalculateTWait
+
+// Function: get_return_bw_mbps
+// Megabyte per second
+double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const bool HostVMEnable,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed)
+{
+ double ReturnBW = 0.;
+ double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
+ double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
+ double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
+ double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
+ double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
+
+ if (HostVMEnable != true)
+ ReturnBW = PixelDataOnlyReturnBW;
+ else
+ ReturnBW = PixelMixedWithVMDataReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
+ dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
+ dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
+ dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
+ dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
+ dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
+ dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
+#endif
+ return ReturnBW;
+}
+
+// Function: get_return_bw_mbps_vm_only
+// Megabyte per second
+double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed)
+{
+ double VMDataOnlyReturnBW = dml_min3(
+ soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
+ * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
+ * (VoltageLevel < 2 ?
+ soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+#endif
+ return VMDataOnlyReturnBW;
+}
+
+double dml32_CalculateExtraLatency(
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ double DCFCLK,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels)
+{
+ double ExtraLatencyBytes;
+ double ExtraLatency;
+
+ ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ TotalNumberOfActiveDPP,
+ PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP,
+ MetaChunkSize,
+ GPUVMEnable,
+ HostVMEnable,
+ NumberOfActiveSurfaces,
+ NumberOfDPP,
+ dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+
+ ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
+ dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+ dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+ dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
+#endif
+
+ return ExtraLatency;
+} // CalculateExtraLatency
+
+bool dml32_CalculatePrefetchSchedule(
+ struct dml32_CalculatePrefetchSchedule *st_vars,
+ double HostVMInefficiencyFactor,
+ DmlPipe *myPipe,
+ unsigned int DSCDelay,
+ double DPPCLKDelaySubtotalPlusCNVCFormater,
+ double DPPCLKDelaySCL,
+ double DPPCLKDelaySCLLBOnly,
+ double DPPCLKDelayCNVCCursor,
+ double DISPCLKDelaySubtotal,
+ unsigned int DPP_RECOUT_WIDTH,
+ enum output_format_class OutputFormat,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ unsigned int GPUVMPageTableLevels,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ double HostVMMinPageSize,
+ bool DynamicMetadataEnable,
+ bool DynamicMetadataVMEnabled,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int DynamicMetadataTransmittedBytes,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ unsigned int VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ unsigned int VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ /* Output */
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ bool MyError = false;
+
+ st_vars->TimeForFetchingMetaPTE = 0;
+ st_vars->TimeForFetchingRowInVBlank = 0;
+ st_vars->LinesToRequestPrefetchPixelData = 0;
+ st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__;
+ st_vars->Tsw_est1 = 0;
+ st_vars->Tsw_est3 = 0;
+
+ if (GPUVMEnable == true && HostVMEnable == true)
+ st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ else
+ st_vars->HostVMDynamicLevelsTrips = 0;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
+ dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
+ __func__, HostVMEnable, HostVMInefficiencyFactor);
+#endif
+ dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ MaxInterDCNTileRepeaters,
+ myPipe->Dppclk,
+ myPipe->Dispclk,
+ myPipe->DCFClkDeepSleep,
+ myPipe->PixelClock,
+ myPipe->HTotal,
+ myPipe->VBlank,
+ DynamicMetadataTransmittedBytes,
+ DynamicMetadataLinesBeforeActiveRequired,
+ myPipe->InterlaceEnable,
+ myPipe->ProgressiveToInterlaceUnitInOPP,
+ TSetup,
+
+ /* output */
+ &st_vars->Tdmbf,
+ &st_vars->Tdmec,
+ &st_vars->Tdmsks,
+ VUpdateOffsetPix,
+ VUpdateWidthPix,
+ VReadyOffsetPix);
+
+ st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock;
+ st_vars->trip_to_mem = UrgentLatency;
+ st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
+
+ if (DynamicMetadataVMEnabled == true)
+ *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem;
+ else
+ *Tdmdl = TWait + UrgentExtraLatency;
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (DynamicMetadataEnable == false)
+ *Tdmdl = 0.0;
+#endif
+
+ if (DynamicMetadataEnable == true) {
+ if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) {
+ *NotEnoughTimeForDynamicMetadata = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
+ __func__, st_vars->Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
+ dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
+ __func__, st_vars->Tdmsks);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
+ __func__, *Tdmdl);
+#endif
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+
+ *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
+ GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0);
+
+ if (myPipe->ScalerEnabled)
+ st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
+ else
+ st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
+
+ st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
+
+ st_vars->DISPCLKCycles = DISPCLKDelaySubtotal;
+
+ if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
+ return true;
+
+ *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles *
+ myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
+
+ *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
+ + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
+ + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
+ myPipe->HActive / 2 : 0)
+ + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles);
+ dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
+ dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
+ dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles);
+ dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
+ dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
+ dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
+ dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
+ dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
+#endif
+
+ if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
+ *DSTYAfterScaler = 1;
+ else
+ *DSTYAfterScaler = 0;
+
+ st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
+ *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
+ *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
+ dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
+#endif
+
+ MyError = false;
+
+ st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1);
+
+ if (GPUVMEnable == true) {
+ st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
+ st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
+ if (GPUVMPageTableLevels >= 3) {
+ *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem *
+ (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
+ } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
+ st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) /
+ 4.0 * st_vars->LineTime; // VBA_ERROR
+ *Tno_bw = UrgentExtraLatency;
+ } else {
+ *Tno_bw = 0;
+ }
+ } else if (myPipe->DCCEnable == true) {
+ st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
+ st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
+ *Tno_bw = 0;
+ } else {
+ st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
+ st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0;
+ *Tno_bw = 0;
+ }
+ st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0);
+ st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0);
+
+ if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
+ || myPipe->SourcePixelFormat == dm_420_12) {
+ st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
+ } else {
+ st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
+ }
+
+ st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
+ + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
+ st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
+ st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime));
+
+ st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre;
+ st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0);
+ st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0;
+
+ if (GPUVMEnable == true) {
+ st_vars->Tvm_oto = dml_max3(
+ st_vars->Tvm_trips,
+ *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto,
+ st_vars->LineTime / 4.0);
+ } else
+ st_vars->Tvm_oto = st_vars->LineTime / 4.0;
+
+ if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ st_vars->Tr0_oto = dml_max4(
+ st_vars->Tr0_trips,
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto,
+ (st_vars->LineTime - st_vars->Tvm_oto)/2.0,
+ st_vars->LineTime / 4.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips);
+ dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto);
+ dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4);
+#endif
+ } else
+ st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0;
+
+ st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0;
+ st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0;
+ st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto;
+
+ st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime -
+ (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
+ dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw);
+ dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
+ dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
+ dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem);
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
+ dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
+ dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes);
+ dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto);
+ dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines);
+ dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines);
+ dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto);
+ dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto);
+ dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ);
+#endif
+
+ st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ);
+ dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
+ dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
+ __func__, VStartup * st_vars->LineTime);
+ dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
+ dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
+ dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
+ dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
+ __func__, *DSTYAfterScaler);
+#endif
+ st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
+ MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
+
+ if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes)
+ st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes;
+
+ *PrefetchBandwidth = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ if (st_vars->dst_y_prefetch_equ > 1) {
+ double PrefetchBandwidth1;
+ double PrefetchBandwidth2;
+ double PrefetchBandwidth3;
+ double PrefetchBandwidth4;
+
+ if (st_vars->Tpre_rounded - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw);
+ st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1;
+ } else
+ PrefetchBandwidth1 = 0;
+
+ if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw)
+ && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw);
+ }
+
+ if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0)
+ PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) /
+ (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded);
+ else
+ PrefetchBandwidth2 = 0;
+
+ if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded);
+ st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3;
+ } else
+ PrefetchBandwidth3 = 0;
+
+
+ if (VStartup == MaxVStartup &&
+ (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 *
+ st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded);
+ }
+
+ if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) {
+ PrefetchBandwidth4 = st_vars->prefetch_sw_bytes /
+ (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded);
+ } else {
+ PrefetchBandwidth4 = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded);
+ dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded);
+ dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1);
+ dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3);
+ dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
+ dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
+ dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
+ dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
+#endif
+ {
+ bool Case1OK;
+ bool Case2OK;
+ bool Case3OK;
+
+ if (PrefetchBandwidth1 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
+ >= st_vars->Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) {
+ Case1OK = true;
+ } else {
+ Case1OK = false;
+ }
+ } else {
+ Case1OK = false;
+ }
+
+ if (PrefetchBandwidth2 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
+ >= st_vars->Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) {
+ Case2OK = true;
+ } else {
+ Case2OK = false;
+ }
+ } else {
+ Case2OK = false;
+ }
+
+ if (PrefetchBandwidth3 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
+ st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
+ HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
+ st_vars->Tr0_trips_rounded) {
+ Case3OK = true;
+ } else {
+ Case3OK = false;
+ }
+ } else {
+ Case3OK = false;
+ }
+
+ if (Case1OK)
+ st_vars->prefetch_bw_equ = PrefetchBandwidth1;
+ else if (Case2OK)
+ st_vars->prefetch_bw_equ = PrefetchBandwidth2;
+ else if (Case3OK)
+ st_vars->prefetch_bw_equ = PrefetchBandwidth3;
+ else
+ st_vars->prefetch_bw_equ = PrefetchBandwidth4;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
+ dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
+ dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ);
+#endif
+
+ if (st_vars->prefetch_bw_equ > 0) {
+ if (GPUVMEnable == true) {
+ st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
+ HostVMInefficiencyFactor / st_vars->prefetch_bw_equ,
+ st_vars->Tvm_trips, st_vars->LineTime / 4);
+ } else {
+ st_vars->Tvm_equ = st_vars->LineTime / 4;
+ }
+
+ if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
+ HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips,
+ (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4);
+ } else {
+ st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2;
+ }
+ } else {
+ st_vars->Tvm_equ = 0;
+ st_vars->Tr0_equ = 0;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
+#endif
+ }
+ }
+
+ if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) {
+ *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto;
+ st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto;
+ st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto;
+ *PrefetchBandwidth = st_vars->prefetch_bw_oto;
+ } else {
+ *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ;
+ st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ;
+ st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ;
+ *PrefetchBandwidth = st_vars->prefetch_bw_equ;
+ }
+
+ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0;
+
+ *DestinationLinesToRequestRowInVBlank =
+ dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0;
+
+ st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
+ *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData);
+#endif
+
+ if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) {
+ *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
+ dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
+#endif
+ if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
+ if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
+ *VRatioPrefetchY =
+ dml_max((double) PrefetchSourceLinesY /
+ st_vars->LinesToRequestPrefetchPixelData,
+ (double) MaxNumSwathY * SwathHeightY /
+ (st_vars->LinesToRequestPrefetchPixelData -
+ (VInitPreFillY - 3.0) / 2.0));
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+ } else {
+ MyError = true;
+ *VRatioPrefetchY = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
+#endif
+ }
+
+ *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
+ dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
+#endif
+ if ((SwathHeightC > 4)) {
+ if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
+ *VRatioPrefetchC =
+ dml_max(*VRatioPrefetchC,
+ (double) MaxNumSwathC * SwathHeightC /
+ (st_vars->LinesToRequestPrefetchPixelData -
+ (VInitPreFillC - 3.0) / 2.0));
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+ } else {
+ MyError = true;
+ *VRatioPrefetchC = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
+ dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
+#endif
+ }
+
+ *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
+ / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
+ / st_vars->LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
+ dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
+ __func__, *RequiredPrefetchPixDataBWLuma);
+#endif
+ *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
+ st_vars->LinesToRequestPrefetchPixelData
+ * myPipe->BytePerPixelC
+ * swath_width_chroma_ub / st_vars->LineTime;
+ } else {
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
+ __func__, st_vars->LinesToRequestPrefetchPixelData);
+#endif
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
+ (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime +
+ 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE);
+ dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
+ (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime);
+ dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
+ dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime -
+ st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
+ ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup);
+ dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
+ PixelPTEBytesPerRow);
+#endif
+ } else {
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
+ __func__, st_vars->dst_y_prefetch_equ);
+#endif
+ }
+
+ {
+ double prefetch_vm_bw;
+ double prefetch_row_bw;
+
+ if (PDEAndMetaPTEBytesFrame == 0) {
+ prefetch_vm_bw = 0;
+ } else if (*DestinationLinesToRequestVMInVBlank > 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+#endif
+ prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
+ (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+#endif
+ } else {
+ prefetch_vm_bw = 0;
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+#endif
+ }
+
+ if (MetaRowByte + PixelPTEBytesPerRow == 0) {
+ prefetch_row_bw = 0;
+ } else if (*DestinationLinesToRequestRowInVBlank > 0) {
+ prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+#endif
+ } else {
+ prefetch_row_bw = 0;
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+#endif
+ }
+
+ *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
+ }
+
+ if (MyError) {
+ *PrefetchBandwidth = 0;
+ st_vars->TimeForFetchingMetaPTE = 0;
+ st_vars->TimeForFetchingRowInVBlank = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *DestinationLinesForPrefetch = 0;
+ st_vars->LinesToRequestPrefetchPixelData = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ return MyError;
+} // CalculatePrefetchSchedule
+
+void dml32_CalculateFlipSchedule(
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool GPUVMEnable,
+ double HostVMMinPageSize,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow,
+ double BandwidthAvailableForImmediateFlip,
+ unsigned int TotImmediateFlipBytes,
+ enum source_format_class SourcePixelFormat,
+ double LineTime,
+ double VRatio,
+ double VRatioChroma,
+ double Tno_bw,
+ bool DCCEnable,
+ unsigned int dpte_row_height,
+ unsigned int meta_row_height,
+ unsigned int dpte_row_height_chroma,
+ unsigned int meta_row_height_chroma,
+ bool use_one_row_for_frame_flip,
+
+ /* Output */
+ double *DestinationLinesToRequestVMInImmediateFlip,
+ double *DestinationLinesToRequestRowInImmediateFlip,
+ double *final_flip_bw,
+ bool *ImmediateFlipSupportedForPipe)
+{
+ double min_row_time = 0.0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double TimeForFetchingMetaPTEImmediateFlip;
+ double TimeForFetchingRowInVBlankImmediateFlip;
+ double ImmediateFlipBW;
+
+ if (GPUVMEnable == true && HostVMEnable == true)
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ else
+ HostVMDynamicLevelsTrips = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
+ dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+#endif
+
+ if (TotImmediateFlipBytes > 0) {
+ if (use_one_row_for_frame_flip) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
+ BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ } else {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
+ BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ }
+ if (GPUVMEnable == true) {
+ TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
+ HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency *
+ (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ LineTime / 4.0);
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ }
+ if ((GPUVMEnable == true || DCCEnable == true)) {
+ TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
+ UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
+ } else {
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ }
+
+ *DestinationLinesToRequestVMInImmediateFlip =
+ dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
+ *DestinationLinesToRequestRowInImmediateFlip =
+ dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
+
+ if (GPUVMEnable == true) {
+ *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
+ (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
+ } else if ((GPUVMEnable == true || DCCEnable == true)) {
+ *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
+ } else {
+ *final_flip_bw = 0;
+ }
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ *DestinationLinesToRequestVMInImmediateFlip = 0;
+ *DestinationLinesToRequestRowInImmediateFlip = 0;
+ *final_flip_bw = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dml_min(dpte_row_height *
+ LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = dml_min(meta_row_height *
+ LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
+ } else {
+ min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
+ LineTime / VRatio, dpte_row_height_chroma * LineTime /
+ VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
+ }
+ } else {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dpte_row_height * LineTime / VRatio;
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = meta_row_height * LineTime / VRatio;
+ } else {
+ min_row_time =
+ dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
+ }
+ }
+
+ if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
+ || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
+ > min_row_time) {
+ *ImmediateFlipSupportedForPipe = false;
+ } else {
+ *ImmediateFlipSupportedForPipe = true;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
+ __func__, *DestinationLinesToRequestVMInImmediateFlip);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
+ __func__, *DestinationLinesToRequestRowInImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
+ __func__, TimeForFetchingRowInVBlankImmediateFlip);
+ dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
+#endif
+} // CalculateFlipSchedule
+
+void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
+ bool USRRetrainingRequiredFinal,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int PrefetchMode,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MaxLineBufferLines,
+ unsigned int LineBufferSize,
+ unsigned int WritebackInterfaceBufferSize,
+ double DCFCLK,
+ double ReturnBW,
+ bool SynchronizeTimingsFinal,
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ bool DRRDisplay[],
+ unsigned int dpte_group_bytes[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ SOCParametersList mmSOCParameters,
+ unsigned int WritebackChunkSize,
+ double SOCCLK,
+ double DCFClkDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int LBBitPerPixel[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int VTaps[],
+ unsigned int VTapsChroma[],
+ double VRatio[],
+ double VRatioChroma[],
+ unsigned int HTotal[],
+ unsigned int VTotal[],
+ unsigned int VActive[],
+ double PixelClock[],
+ unsigned int BlendingAndTiming[],
+ unsigned int DPPPerSurface[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ double DSTXAfterScaler[],
+ double DSTYAfterScaler[],
+ bool WritebackEnable[],
+ enum source_format_class WritebackPixelFormat[],
+ double WritebackDestinationWidth[],
+ double WritebackDestinationHeight[],
+ double WritebackSourceHeight[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+
+ /* Output */
+ Watermarks *Watermark,
+ enum clock_change_support *DRAMClockChangeSupport,
+ double MaxActiveDRAMClockChangeLatencySupported[],
+ unsigned int SubViewportLinesNeededInMALL[],
+ enum dm_fclock_change_support *FCLKChangeSupport,
+ double *MinActiveFCLKChangeLatencySupported,
+ bool *USRRetrainingSupport,
+ double ActiveDRAMClockChangeLatencyMargin[])
+{
+ unsigned int i, j, k;
+
+ st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0;
+ st_vars->DRAMClockChangeSupportNumber = 0;
+ st_vars->DRAMClockChangeMethod = 0;
+ st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
+ st_vars->MinActiveFCLKChangeMargin = 0.;
+ st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
+ st_vars->TotalPixelBW = 0.0;
+ st_vars->TotalActiveWriteback = 0;
+
+ Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
+ Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
+ + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
+ Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
+ Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
+ Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
+ + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
+ dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
+ dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
+ dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
+ dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
+ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
+ dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
+ __func__, Watermark->Z8StutterEnterPlusExitWatermark);
+#endif
+
+
+ st_vars->TotalActiveWriteback = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (WritebackEnable[k] == true)
+ st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1;
+ }
+
+ if (st_vars->TotalActiveWriteback <= 1) {
+ Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
+ } else {
+ Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
+ + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+ if (USRRetrainingRequiredFinal)
+ Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+ if (st_vars->TotalActiveWriteback <= 1) {
+ Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ + mmSOCParameters.WritebackLatency;
+ Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
+ + mmSOCParameters.WritebackLatency;
+ } else {
+ Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
+ + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
+ }
+
+ if (USRRetrainingRequiredFinal)
+ Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+ if (USRRetrainingRequiredFinal)
+ Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
+ __func__, Watermark->WritebackDRAMClockChangeWatermark);
+ dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
+ dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
+ dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
+ dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
+ SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
+ st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
+
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
+ dml_print("DML::%s: k=%d, LineBufferSize = %d\n", __func__, k, LineBufferSize);
+ dml_print("DML::%s: k=%d, LBBitPerPixel = %d\n", __func__, k, LBBitPerPixel[k]);
+ dml_print("DML::%s: k=%d, HRatio = %f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]);
+#endif
+
+ st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
+ st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
+ st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k];
+
+ if (UnboundedRequestEnabled) {
+ st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY
+ + CompressedBufferSizeInkByte * 1024
+ * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
+ / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW;
+ }
+
+ st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]);
+ st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
+
+ st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY
+ - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
+
+ if (NumberOfActiveSurfaces > 1) {
+ st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY
+ - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
+ / PixelClock[k] / VRatio[k];
+ }
+
+ if (BytePerPixelDETC[k] > 0) {
+ st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]);
+ st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
+ / VRatioChroma[k];
+ st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC
+ - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
+ / PixelClock[k];
+ if (NumberOfActiveSurfaces > 1) {
+ st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC
+ - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
+ / PixelClock[k] / VRatioChroma[k];
+ }
+ st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY,
+ st_vars->ActiveClockChangeLatencyHidingC);
+ } else {
+ st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY;
+ }
+
+ ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
+ - Watermark->DRAMClockChangeWatermark;
+ st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
+ - Watermark->FCLKChangeWatermark;
+ st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
+
+ if (WritebackEnable[k]) {
+ st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
+ / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
+ / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
+ if (WritebackPixelFormat[k] == dm_444_64)
+ st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2;
+
+ st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding
+ - Watermark->WritebackDRAMClockChangeWatermark;
+
+ st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding
+ - Watermark->WritebackFCLKChangeWatermark;
+
+ ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
+ st_vars->WritebackFCLKChangeLatencyMargin);
+ st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k],
+ st_vars->WritebackDRAMClockChangeLatencyMargin);
+ }
+ MaxActiveDRAMClockChangeLatencySupported[k] =
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
+ 0 :
+ (ActiveDRAMClockChangeLatencyMargin[k]
+ + mmSOCParameters.DRAMClockChangeLatency);
+ }
+
+ for (i = 0; i < NumberOfActiveSurfaces; ++i) {
+ for (j = 0; j < NumberOfActiveSurfaces; ++j) {
+ if (i == j ||
+ (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
+ (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
+ (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
+ (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
+ HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
+ VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
+ (DRRDisplay[i] || DRRDisplay[j]))) {
+ st_vars->SynchronizedSurfaces[i][j] = true;
+ } else {
+ st_vars->SynchronizedSurfaces[i][j] = false;
+ }
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
+ st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) {
+ st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
+ st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k];
+ st_vars->SurfaceWithMinActiveFCLKChangeMargin = k;
+ }
+ }
+
+ *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
+
+ st_vars->SameTimingForFCLKChange = true;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) {
+ if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (st_vars->SameTimingForFCLKChange ||
+ st_vars->ActiveFCLKChangeLatencyMargin[k] <
+ st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
+ st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k];
+ }
+ st_vars->SameTimingForFCLKChange = false;
+ }
+ }
+
+ if (st_vars->MinActiveFCLKChangeMargin > 0) {
+ *FCLKChangeSupport = dm_fclock_change_vactive;
+ } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
+ (PrefetchMode <= 1)) {
+ *FCLKChangeSupport = dm_fclock_change_vblank;
+ } else {
+ *FCLKChangeSupport = dm_fclock_change_unsupported;
+ }
+
+ *USRRetrainingSupport = true;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (st_vars->USRRetrainingLatencyMargin[k] < 0)) {
+ *USRRetrainingSupport = false;
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
+ UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
+ UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
+ ActiveDRAMClockChangeLatencyMargin[k] < 0) {
+ if (PrefetchMode > 0) {
+ st_vars->DRAMClockChangeSupportNumber = 2;
+ } else if (st_vars->DRAMClockChangeSupportNumber == 0) {
+ st_vars->DRAMClockChangeSupportNumber = 1;
+ st_vars->LastSurfaceWithoutMargin = k;
+ } else if (st_vars->DRAMClockChangeSupportNumber == 1 &&
+ !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) {
+ st_vars->DRAMClockChangeSupportNumber = 2;
+ }
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
+ st_vars->DRAMClockChangeMethod = 1;
+ else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
+ st_vars->DRAMClockChangeMethod = 2;
+ }
+
+ if (st_vars->DRAMClockChangeMethod == 0) {
+ if (st_vars->DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
+ else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ } else if (st_vars->DRAMClockChangeMethod == 1) {
+ if (st_vars->DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
+ else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ } else {
+ if (st_vars->DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
+ else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ unsigned int dst_y_pstate;
+ unsigned int src_y_pstate_l;
+ unsigned int src_y_pstate_c;
+ unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
+
+ dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
+ src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
+ src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k];
+ sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
+dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
+dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
+dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]);
+dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
+dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
+dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
+dml_print("DML::%s: k=%d, meta_row_height = %d\n", __func__, k, meta_row_height[k]);
+dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
+#endif
+ SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
+
+ if (BytePerPixelDETC[k] > 0) {
+ src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
+ src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k];
+ sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
+ SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
+dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
+dml_print("DML::%s: k=%d, meta_row_height_chroma = %d\n", __func__, k, meta_row_height_chroma[k]);
+dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
+#endif
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
+ dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
+ dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
+ __func__, *MinActiveFCLKChangeLatencySupported);
+ dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
+#endif
+} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
+
+double dml32_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackSourceWidth,
+ unsigned int WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize,
+ double DISPCLKDPPCLKVCOSpeed)
+{
+ double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
+
+ DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
+ DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
+ DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
+ WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
+ return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
+}
+
+void dml32_CalculateMinAndMaxPrefetchMode(
+ enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
+ unsigned int *MinPrefetchMode,
+ unsigned int *MaxPrefetchMode)
+{
+ if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
+ *MinPrefetchMode = 3;
+ *MaxPrefetchMode = 3;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
+ *MinPrefetchMode = 2;
+ *MaxPrefetchMode = 2;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
+ *MinPrefetchMode = 1;
+ *MaxPrefetchMode = 1;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
+ *MinPrefetchMode = 0;
+ *MaxPrefetchMode = 0;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
+ *MinPrefetchMode = 0;
+ *MaxPrefetchMode = 3;
+ } else {
+ *MinPrefetchMode = 0;
+ *MaxPrefetchMode = 3;
+ }
+} // CalculateMinAndMaxPrefetchMode
+
+void dml32_CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActiveSurfaces,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+
+ /* Output */
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[])
+{
+ double req_per_swath_ub;
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
+ dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
+ dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
+ dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
+ dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
+#endif
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma[k] =
+ swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma[k] = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma[k] =
+ swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma[k] =
+ swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (VRatioPrefetchY[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
+ swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
+ swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (VRatioPrefetchC[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
+ DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
+ swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!IsVertical(SourceRotation[k]))
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
+ else
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
+#endif
+
+ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (!IsVertical(SourceRotation[k]))
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
+ else
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
+#endif
+ DisplayPipeRequestDeliveryTimeChroma[k] =
+ DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ unsigned int cursor_req_per_width;
+
+ cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
+ 256.0 / 8.0, 1.0);
+ if (NumberOfCursors[k] > 0) {
+ if (VRatio[k] <= 1) {
+ CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
+ HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
+ PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ if (VRatioPrefetchY[k] <= 1) {
+ CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
+ HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
+ PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ } else {
+ CursorRequestDeliveryTime[k] = 0;
+ CursorRequestDeliveryTimePrefetch[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
+ __func__, k, NumberOfCursors[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
+ __func__, k, CursorRequestDeliveryTime[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
+ __func__, k, CursorRequestDeliveryTimePrefetch[k]);
+#endif
+ }
+} // CalculatePixelDeliveryTimes
+
+void dml32_CalculateMetaAndPTETimes(
+ bool use_one_row_for_frame[],
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int MetaChunkSize,
+ unsigned int MinMetaChunkSizeBytes,
+ unsigned int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int dpte_row_height[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PTERequestSizeC[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+
+ /* Output */
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[])
+{
+ unsigned int meta_chunk_width;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_chunks_per_row_ub;
+ unsigned int meta_chunk_width_chroma;
+ unsigned int min_meta_chunk_width_chroma;
+ unsigned int meta_chunk_per_row_int_chroma;
+ unsigned int meta_row_remainder_chroma;
+ unsigned int meta_chunk_threshold_chroma;
+ unsigned int meta_chunks_per_row_ub_chroma;
+ unsigned int dpte_group_width_luma;
+ unsigned int dpte_groups_per_row_luma_ub;
+ unsigned int dpte_group_width_chroma;
+ unsigned int dpte_groups_per_row_chroma_ub;
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0)
+ DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ else
+ DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
+ DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0)
+ DST_Y_PER_META_ROW_NOM_C[k] = 0;
+ else
+ DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (DCCEnable[k] == true) {
+ meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
+ min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
+ meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
+ meta_row_remainder = meta_row_width[k] % meta_chunk_width;
+ if (!IsVertical(SourceRotation[k]))
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
+ else
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
+
+ if (meta_row_remainder <= meta_chunk_threshold)
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ else
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+
+ TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ if (BytePerPixelC[k] == 0) {
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ } else {
+ meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
+ meta_row_height_chroma[k];
+ min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
+ meta_row_height_chroma[k];
+ meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
+ meta_chunk_width_chroma;
+ meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
+ if (!IsVertical(SourceRotation[k])) {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
+ meta_req_width_chroma[k];
+ } else {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
+ meta_req_height_chroma[k];
+ }
+ if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
+ else
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
+
+ TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ }
+ } else {
+ TimePerMetaChunkNominal[k] = 0;
+ TimePerMetaChunkVBlank[k] = 0;
+ TimePerMetaChunkFlip[k] = 0;
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (GPUVMEnable == true) {
+ if (!IsVertical(SourceRotation[k])) {
+ dpte_group_width_luma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
+ } else {
+ dpte_group_width_luma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
+ (double) dpte_group_width_luma / 2.0, 1.0);
+ } else {
+ dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
+ (double) dpte_group_width_luma, 1.0);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
+ __func__, k, use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
+ __func__, k, dpte_group_bytes[k]);
+ dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
+ __func__, k, PTERequestSizeY[k]);
+ dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
+ __func__, k, PixelPTEReqWidthY[k]);
+ dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
+ __func__, k, PixelPTEReqHeightY[k]);
+ dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
+ __func__, k, dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
+ __func__, k, dpte_group_width_luma);
+ dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
+ __func__, k, dpte_groups_per_row_luma_ub);
+#endif
+
+ time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ if (BytePerPixelC[k] == 0) {
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ } else {
+ if (!IsVertical(SourceRotation[k])) {
+ dpte_group_width_chroma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
+ } else {
+ dpte_group_width_chroma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
+ (double) dpte_group_width_chroma / 2.0, 1.0);
+ } else {
+ dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
+ (double) dpte_group_width_chroma, 1.0);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
+ __func__, k, dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
+ __func__, k, dpte_group_width_chroma);
+ dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
+ __func__, k, dpte_groups_per_row_chroma_ub);
+#endif
+ time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ }
+ } else {
+ time_per_pte_group_nom_luma[k] = 0;
+ time_per_pte_group_vblank_luma[k] = 0;
+ time_per_pte_group_flip_luma[k] = 0;
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, k, DestinationLinesToRequestRowInVBlank[k]);
+ dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
+ __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
+ __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
+ __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
+ __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
+ __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
+ __func__, k, TimePerMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
+ __func__, k, TimePerMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
+ __func__, k, TimePerMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
+ __func__, k, TimePerChromaMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
+ __func__, k, TimePerChromaMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
+ __func__, k, TimePerChromaMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
+ __func__, k, time_per_pte_group_nom_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
+ __func__, k, time_per_pte_group_vblank_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
+ __func__, k, time_per_pte_group_flip_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
+ __func__, k, time_per_pte_group_nom_chroma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
+ __func__, k, time_per_pte_group_vblank_chroma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
+ __func__, k, time_per_pte_group_flip_chroma[k]);
+#endif
+ }
+} // CalculateMetaAndPTETimes
+
+void dml32_CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ unsigned int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+
+ /* Output */
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[])
+{
+ unsigned int k;
+ unsigned int num_group_per_lower_vm_stage;
+ unsigned int num_req_per_lower_vm_stage;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+#endif
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
+ dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
+ dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
+ __func__, k, dpde0_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
+ __func__, k, dpde0_bytes_per_frame_ub_c[k]);
+ dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
+ __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
+ __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
+#endif
+
+ if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0) +
+ dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = 2 + dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = 1 + dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) + dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1);
+ }
+ }
+ }
+
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
+ dpde0_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
+ meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
+ 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
+ meta_pte_bytes_per_frame_ub_l[k] / 64 +
+ meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
+ 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ }
+ }
+
+ TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
+ HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
+ HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+ TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+
+ if (GPUVMMaxPageTableLevels > 2) {
+ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
+ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
+ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
+ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
+ }
+
+ } else {
+ TimePerVMGroupVBlank[k] = 0;
+ TimePerVMGroupFlip[k] = 0;
+ TimePerVMRequestVBlank[k] = 0;
+ TimePerVMRequestFlip[k] = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
+#endif
+ }
+} // CalculateVMGroupAndRequestTimes
+
+void dml32_CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ unsigned int nomDETInKByte,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum dm_rotation_angle SourceRotation,
+ /* Output */
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma)
+{
+ typedef enum {
+ REQ_256Bytes,
+ REQ_128BytesNonContiguous,
+ REQ_128BytesContiguous,
+ REQ_NA
+ } RequestType;
+
+ RequestType RequestLuma;
+ RequestType RequestChroma;
+
+ unsigned int segment_order_horz_contiguous_luma;
+ unsigned int segment_order_horz_contiguous_chroma;
+ unsigned int segment_order_vert_contiguous_luma;
+ unsigned int segment_order_vert_contiguous_chroma;
+ unsigned int req128_horz_wc_l;
+ unsigned int req128_horz_wc_c;
+ unsigned int req128_vert_wc_l;
+ unsigned int req128_vert_wc_c;
+ unsigned int MAS_vp_horz_limit;
+ unsigned int MAS_vp_vert_limit;
+ unsigned int max_vp_horz_width;
+ unsigned int max_vp_vert_height;
+ unsigned int eff_surf_width_l;
+ unsigned int eff_surf_width_c;
+ unsigned int eff_surf_height_l;
+ unsigned int eff_surf_height_c;
+ unsigned int full_swath_bytes_horz_wc_l;
+ unsigned int full_swath_bytes_horz_wc_c;
+ unsigned int full_swath_bytes_vert_wc_l;
+ unsigned int full_swath_bytes_vert_wc_c;
+ unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
+
+ unsigned int yuv420;
+ unsigned int horz_div_l;
+ unsigned int horz_div_c;
+ unsigned int vert_div_l;
+ unsigned int vert_div_c;
+
+ unsigned int swath_buf_size;
+ double detile_buf_vp_horz_limit;
+ double detile_buf_vp_vert_limit;
+
+ yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
+ SourcePixelFormat == dm_420_12) ? 1 : 0);
+ horz_div_l = 1;
+ horz_div_c = 1;
+ vert_div_l = 1;
+ vert_div_c = 1;
+
+ if (BytePerPixelY == 1)
+ vert_div_l = 0;
+ if (BytePerPixelC == 1)
+ vert_div_c = 0;
+
+ if (BytePerPixelC == 0) {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
+ BytePerPixelY / (1 + horz_div_l));
+ detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
+ (1 + vert_div_l));
+ } else {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
+ BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
+ BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
+ detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
+ (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
+ (1 + vert_div_c) / (1 + yuv420));
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
+ detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
+ }
+
+ detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
+ detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
+
+ MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
+ MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
+ max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
+ max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
+ eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
+ eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
+ eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
+ eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
+
+ full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
+ full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
+ if (BytePerPixelC > 0) {
+ full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
+ full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
+ } else {
+ full_swath_bytes_horz_wc_c = 0;
+ full_swath_bytes_vert_wc_c = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
+ full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
+ full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
+ full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
+ }
+
+ if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 0;
+ } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
+ full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 1;
+ } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
+ full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 0;
+ } else {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 1;
+ }
+
+ if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 0;
+ } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
+ full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 1;
+ } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
+ full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 0;
+ } else {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 1;
+ }
+
+ if (BytePerPixelY == 2) {
+ segment_order_horz_contiguous_luma = 0;
+ segment_order_vert_contiguous_luma = 1;
+ } else {
+ segment_order_horz_contiguous_luma = 1;
+ segment_order_vert_contiguous_luma = 0;
+ }
+
+ if (BytePerPixelC == 2) {
+ segment_order_horz_contiguous_chroma = 0;
+ segment_order_vert_contiguous_chroma = 1;
+ } else {
+ segment_order_horz_contiguous_chroma = 1;
+ segment_order_vert_contiguous_chroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
+ dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
+ dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
+ dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
+ dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
+ dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
+ dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
+ __func__, segment_order_horz_contiguous_chroma);
+#endif
+
+ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
+ if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
+ (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
+ (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+
+ } else if (!IsVertical(SourceRotation)) {
+ if (req128_horz_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if (segment_order_horz_contiguous_luma == 0)
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_horz_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if (segment_order_horz_contiguous_chroma == 0)
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+
+ } else {
+ if (req128_vert_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if (segment_order_vert_contiguous_luma == 0)
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_vert_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if (segment_order_vert_contiguous_chroma == 0)
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+ }
+
+ if (RequestLuma == REQ_256Bytes) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 256;
+ *IndependentBlockLuma = 0;
+ } else if (RequestLuma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 128;
+ *IndependentBlockLuma = 128;
+ } else {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 64;
+ *IndependentBlockLuma = 64;
+ }
+
+ if (RequestChroma == REQ_256Bytes) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 256;
+ *IndependentBlockChroma = 0;
+ } else if (RequestChroma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 128;
+ *IndependentBlockChroma = 128;
+ } else {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 64;
+ *IndependentBlockChroma = 64;
+ }
+
+ if (DCCEnabled != true || BytePerPixelC == 0) {
+ *MaxUncompressedBlockChroma = 0;
+ *MaxCompressedBlockChroma = 0;
+ *IndependentBlockChroma = 0;
+ }
+
+ if (DCCEnabled != true) {
+ *MaxUncompressedBlockLuma = 0;
+ *MaxCompressedBlockLuma = 0;
+ *IndependentBlockLuma = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
+ dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
+ dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
+ dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
+ dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
+ dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
+#endif
+
+} // CalculateDCCConfiguration
+
+void dml32_CalculateStutterEfficiency(
+ unsigned int CompressedBufferSizeInkByte,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool UnboundedRequestEnabled,
+ unsigned int MetaFIFOSizeInKEntries,
+ unsigned int ZeroSizeBufferEntries,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ unsigned int CompbufReservedSpace64B,
+ unsigned int CompbufReservedSpaceZs,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizeTimingsFinal,
+ unsigned int BlendingAndTiming[],
+ double StutterEnterPlusExitWatermark,
+ double Z8StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ unsigned int DPPPerSurface[],
+ unsigned int DETBufferSizeY[],
+ unsigned int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ unsigned int HTotal[],
+ unsigned int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesC[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int DCCYMaxUncompressedBlock[],
+ unsigned int DCCCMaxUncompressedBlock[],
+ unsigned int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthSurfaceLuma[],
+ double ReadBandwidthSurfaceChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+
+ /* Output */
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ unsigned int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ unsigned int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod,
+ bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
+{
+
+ bool FoundCriticalSurface = false;
+ unsigned int SwathSizeCriticalSurface = 0;
+ unsigned int LastChunkOfSwathSize;
+ unsigned int MissingPartOfLastSwathOfDETSize;
+ double LastZ8StutterPeriod = 0.0;
+ double LastStutterPeriod = 0.0;
+ unsigned int TotalNumberOfActiveOTG = 0;
+ double doublePixelClock;
+ unsigned int doubleHTotal;
+ unsigned int doubleVTotal;
+ bool SameTiming = true;
+ double DETBufferingTimeY;
+ double SwathWidthYCriticalSurface = 0.0;
+ double SwathHeightYCriticalSurface = 0.0;
+ double VActiveTimeCriticalSurface = 0.0;
+ double FrameTimeCriticalSurface = 0.0;
+ unsigned int BytePerPixelYCriticalSurface = 0;
+ double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
+ unsigned int DETBufferSizeYCriticalSurface = 0;
+ double MinTTUVBlankCriticalSurface = 0.0;
+ unsigned int BlockWidth256BytesYCriticalSurface = 0;
+ bool doublePlaneCriticalSurface = 0;
+ bool doublePipeCriticalSurface = 0;
+ double TotalCompressedReadBandwidth;
+ double TotalRowReadBandwidth;
+ double AverageDCCCompressionRate;
+ double EffectiveCompressedBufferSize;
+ double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
+ double StutterBurstTime;
+ unsigned int TotalActiveWriteback;
+ double LinesInDETY;
+ double LinesInDETYRoundedDownToSwath;
+ double MaximumEffectiveCompressionLuma;
+ double MaximumEffectiveCompressionChroma;
+ double TotalZeroSizeRequestReadBandwidth;
+ double TotalZeroSizeCompressedReadBandwidth;
+ double AverageDCCZeroSizeFraction;
+ double AverageZeroSizeCompressionRate;
+ unsigned int k;
+
+ TotalZeroSizeRequestReadBandwidth = 0;
+ TotalZeroSizeCompressedReadBandwidth = 0;
+ TotalRowReadBandwidth = 0;
+ TotalCompressedReadBandwidth = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ if (DCCEnable[k] == true) {
+ if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
+ || (!IsVertical(SourceRotation[k])
+ && BlockHeight256BytesY[k] > SwathHeightY[k])
+ || DCCYMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionLuma = 2;
+ } else {
+ MaximumEffectiveCompressionLuma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k]
+ / dml_min(NetDCCRateLuma[k],
+ MaximumEffectiveCompressionLuma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
+ __func__, k, ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
+ __func__, k, NetDCCRateLuma[k]);
+ dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
+ __func__, k, MaximumEffectiveCompressionLuma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
+ / MaximumEffectiveCompressionLuma;
+
+ if (ReadBandwidthSurfaceChroma[k] > 0) {
+ if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
+ || (!IsVertical(SourceRotation[k])
+ && BlockHeight256BytesC[k] > SwathHeightC[k])
+ || DCCCMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionChroma = 2;
+ } else {
+ MaximumEffectiveCompressionChroma = 4;
+ }
+ TotalCompressedReadBandwidth =
+ TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ / dml_min(NetDCCRateChroma[k],
+ MaximumEffectiveCompressionChroma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
+ __func__, k, ReadBandwidthSurfaceChroma[k]);
+ dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
+ __func__, k, NetDCCRateChroma[k]);
+ dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
+ __func__, k, MaximumEffectiveCompressionChroma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ * DCCFractionOfZeroSizeRequestsChroma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ * DCCFractionOfZeroSizeRequestsChroma[k]
+ / MaximumEffectiveCompressionChroma;
+ }
+ } else {
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
+ }
+ TotalRowReadBandwidth = TotalRowReadBandwidth
+ + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
+ }
+ }
+
+ AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
+ AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
+ dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
+ __func__, TotalZeroSizeCompressedReadBandwidth);
+ dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
+ dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
+ dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
+ dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
+#endif
+ if (AverageDCCZeroSizeFraction == 1) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
+ / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
+ * AverageZeroSizeCompressionRate
+ + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
+ * AverageZeroSizeCompressionRate;
+ } else if (AverageDCCZeroSizeFraction > 0) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
+ / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = dml_min(
+ (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (double) MetaFIFOSizeInKEntries * 1024 * 64
+ / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
+ + 1 / AverageDCCCompressionRate))
+ + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
+ * AverageDCCCompressionRate,
+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
+ / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__,
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
+ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
+ AverageDCCCompressionRate));
+ dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
+ CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
+ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+#endif
+ } else {
+ EffectiveCompressedBufferSize = dml_min(
+ (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
+ + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
+ * AverageDCCCompressionRate;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__,
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__,
+ MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
+ dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+#endif
+
+ *StutterPeriod = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ LinesInDETY = ((double) DETBufferSizeY[k]
+ + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
+ * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
+ / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
+ DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
+ / VRatio[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
+ dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
+ __func__, k, ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
+ dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
+ __func__, k, LinesInDETYRoundedDownToSwath);
+ dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
+ dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
+ dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+#endif
+
+ if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
+ bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
+
+ FoundCriticalSurface = true;
+ *StutterPeriod = DETBufferingTimeY;
+ FrameTimeCriticalSurface = (
+ isInterlaceTiming ?
+ dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
+ * (double) HTotal[k] / PixelClock[k];
+ VActiveTimeCriticalSurface = (
+ isInterlaceTiming ?
+ dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
+ * (double) HTotal[k] / PixelClock[k];
+ BytePerPixelYCriticalSurface = BytePerPixelY[k];
+ SwathWidthYCriticalSurface = SwathWidthY[k];
+ SwathHeightYCriticalSurface = SwathHeightY[k];
+ BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
+ LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
+ - (LinesInDETY - LinesInDETYRoundedDownToSwath);
+ DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
+ MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
+ doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
+ doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
+ __func__, k, FoundCriticalSurface);
+ dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
+ __func__, k, *StutterPeriod);
+ dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
+ __func__, k, MinTTUVBlankCriticalSurface);
+ dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
+ __func__, k, FrameTimeCriticalSurface);
+ dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
+ __func__, k, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
+ __func__, k, BytePerPixelYCriticalSurface);
+ dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
+ __func__, k, SwathWidthYCriticalSurface);
+ dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
+ __func__, k, SwathHeightYCriticalSurface);
+ dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
+ __func__, k, BlockWidth256BytesYCriticalSurface);
+ dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
+ __func__, k, doublePlaneCriticalSurface);
+ dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
+ __func__, k, doublePipeCriticalSurface);
+ dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
+ __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
+#endif
+ }
+ }
+ }
+
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
+ EffectiveCompressedBufferSize);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
+ __func__, *StutterPeriod * TotalDataReadBandwidth);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+ dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
+ dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+#endif
+
+ StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
+ / ReturnBW
+ + (*StutterPeriod * TotalDataReadBandwidth
+ - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
+ + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
+ AverageDCCCompressionRate / ReturnBW);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
+ __func__, (*StutterPeriod * TotalDataReadBandwidth));
+ dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
+ dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+#endif
+ StutterBurstTime = dml_max(StutterBurstTime,
+ LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
+ * SwathWidthYCriticalSurface / ReturnBW);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Time to finish residue swath=%f\n",
+ __func__,
+ LinesToFinishSwathTransferStutterCriticalSurface *
+ BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
+#endif
+
+ TotalActiveWriteback = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (WritebackEnable[k])
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+
+ if (TotalActiveWriteback == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
+ dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
+ dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+#endif
+ *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
+ 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
+ *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
+ 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
+ *NumberOfStutterBurstsPerFrame = (
+ *StutterEfficiencyNotIncludingVBlank > 0 ?
+ dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
+ *Z8NumberOfStutterBurstsPerFrame = (
+ *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
+ dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
+ } else {
+ *StutterEfficiencyNotIncludingVBlank = 0.;
+ *Z8StutterEfficiencyNotIncludingVBlank = 0.;
+ *NumberOfStutterBurstsPerFrame = 0;
+ *Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *Z8StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ if (BlendingAndTiming[k] == k) {
+ if (TotalNumberOfActiveOTG == 0) {
+ doublePixelClock = PixelClock[k];
+ doubleHTotal = HTotal[k];
+ doubleVTotal = VTotal[k];
+ } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
+ || doubleVTotal != VTotal[k]) {
+ SameTiming = false;
+ }
+ TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
+ }
+ }
+ }
+
+ if (*StutterEfficiencyNotIncludingVBlank > 0) {
+ LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+
+ if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
+ && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
+ *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
+ + StutterBurstTime * VActiveTimeCriticalSurface
+ / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *StutterEfficiency = 0;
+ }
+
+ if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
+ LastZ8StutterPeriod = VActiveTimeCriticalSurface
+ - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+ if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
+ MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
+ *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
+ * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *Z8StutterEfficiency = 0.;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+ dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
+ dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
+ * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
+ LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
+ MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
+ - DETBufferSizeYCriticalSurface;
+
+ *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
+ && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
+ && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
+ && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
+ dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
+ dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
+ dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+#endif
+} // CalculateStutterEfficiency
+
+void dml32_CalculateMaxDETAndMinCompressedBufferSize(
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int ROBBufferSizeInKByte,
+ unsigned int MaxNumDPP,
+ bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
+ unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
+
+ /* Output */
+ unsigned int *MaxTotalDETInKByte,
+ unsigned int *nomDETInKByte,
+ unsigned int *MinCompressedBufferSizeInKByte)
+{
+ bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
+ unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
+
+ *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
+ (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
+ *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
+ *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
+ dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
+ dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
+#endif
+
+ if (det_buff_size_override_en) {
+ *nomDETInKByte = det_buff_size_override_val;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
+#endif
+ }
+} // CalculateMaxDETAndMinCompressedBufferSize
+
+bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[])
+{
+ unsigned int k;
+ bool NotEnoughUrgentLatencyHiding = false;
+ bool CalculateVActiveBandwithSupport_val = false;
+ double VActiveBandwith = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
+ }
+
+ CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
+dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
+dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
+#endif
+ return CalculateVActiveBandwithSupport_val;
+}
+
+void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *PrefetchBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *PrefetchBandwidthSupport)
+{
+ unsigned int k;
+ bool NotEnoughUrgentLatencyHiding = false;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ *PrefetchBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+ *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
+}
+
+double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double cursor_bw_pre[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[])
+{
+ unsigned int k;
+ double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ return CalculateBandwidthAvailableForImmediateFlip_val;
+}
+
+void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ enum immediate_flip_requirement ImmediateFlipRequirement[],
+ double final_flip_bw[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *TotalBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *ImmediateFlipBandwidthSupport)
+{
+ unsigned int k;
+ *TotalBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ } else {
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+ }
+ *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
+ *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
new file mode 100644
index 000000000000..37a314ce284b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -0,0 +1,1188 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML_DCN32_DISPLAY_MODE_VBA_UTIL_32_H__
+#define __DML_DCN32_DISPLAY_MODE_VBA_UTIL_32_H__
+
+#include "../display_mode_enums.h"
+#include "os_types.h"
+#include "../dc_features.h"
+#include "../display_mode_structs.h"
+#include "dml/display_mode_vba.h"
+
+unsigned int dml32_dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output);
+
+unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
+
+bool IsVertical(enum dm_rotation_angle Scan);
+
+void dml32_CalculateBytePerPixelAndBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+
+ /*Output*/
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC,
+ unsigned int *MacroTileHeightY,
+ unsigned int *MacroTileHeightC,
+ unsigned int *MacroTileWidthY,
+ unsigned int *MacroTileWidthC);
+
+void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
+ double HRatio,
+ double HRatioChroma,
+ double VRatio,
+ double VRatioChroma,
+ double MaxDCHUBToPSCLThroughput,
+ double MaxPSCLToLBThroughput,
+ double PixelClock,
+ enum source_format_class SourcePixelFormat,
+ unsigned int HTaps,
+ unsigned int HTapsChroma,
+ unsigned int VTaps,
+ unsigned int VTapsChroma,
+
+ /* output */
+ double *PSCL_THROUGHPUT,
+ double *PSCL_THROUGHPUT_CHROMA,
+ double *DPPCLKUsingSingleDPP);
+
+void dml32_CalculateSwathAndDETConfiguration(
+ struct dml32_CalculateSwathAndDETConfiguration *st_vars,
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ double ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int nomDETInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+ unsigned int PixelChunkSizeKBytes,
+ unsigned int ROBSizeKBytes,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class Output[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum dm_rotation_angle SourceRotation[],
+ bool ViewportStationary[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BlendingAndTiming[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ unsigned int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool *UnboundedRequestEnabled,
+ unsigned int *CompressedBufferSizeInkByte,
+ unsigned int *CompBufReservedSpaceKBytes,
+ bool *CompBufReservedSpaceNeedAdjustment,
+ bool ViewportSizeSupportPerSurface[],
+ bool *ViewportSizeSupport);
+
+void dml32_CalculateSwathWidth(
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ enum source_format_class SourcePixelFormat[],
+ enum dm_rotation_angle SourceScan[],
+ bool ViewportStationary[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int BlendingAndTiming[],
+ unsigned int HActive[],
+ double HRatio[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ double SwathWidthdoubleDPPY[],
+ double SwathWidthdoubleDPPC[],
+ double SwathWidthY[], // per-pipe
+ double SwathWidthC[], // per-pipe
+ unsigned int MaximumSwathHeightY[],
+ unsigned int MaximumSwathHeightC[],
+ unsigned int swath_width_luma_ub[], // per-pipe
+ unsigned int swath_width_chroma_ub[]);
+
+bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ unsigned int TotalNumberOfActiveDPP,
+ bool NoChroma,
+ enum output_encoder_class Output,
+ enum dm_swizzle_mode SurfaceTiling,
+ bool CompBufReservedSpaceNeedAdjustment,
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+
+void dml32_CalculateDETBufferSize(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ bool UnboundedRequestEnabled,
+ unsigned int nomDETInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum source_format_class SourcePixelFormat[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int RoundedUpMaxSwathSizeBytesY[],
+ unsigned int RoundedUpMaxSwathSizeBytesC[],
+ unsigned int DPPPerSurface[],
+ /* Output */
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int *CompressedBufferSizeInkByte);
+
+void dml32_CalculateODMMode(
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ unsigned int HActive,
+ enum output_encoder_class Output,
+ enum odm_combine_policy ODMUse,
+ double StateDispclk,
+ double MaxDispclk,
+ bool DSCEnable,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int MaxNumDPP,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+
+ /* Output */
+ bool *TotalAvailablePipesSupport,
+ unsigned int *NumberOfDPP,
+ enum odm_combine_mode *ODMMode,
+ double *RequiredDISPCLKPerSurface);
+
+double dml32_CalculateRequiredDispclk(
+ enum odm_combine_mode ODMMode,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+ double MaxDispclk);
+
+double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed);
+
+void dml32_CalculateOutputLink(
+ double PHYCLKPerState,
+ double PHYCLKD18PerState,
+ double PHYCLKD32PerState,
+ double Downspreading,
+ bool IsMainSurfaceUsingTheIndicatedTiming,
+ enum output_encoder_class Output,
+ enum output_format_class OutputFormat,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClockBackEnd,
+ double ForcedOutputLinkBPP,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int NumberOfDSCSlices,
+ double AudioSampleRate,
+ unsigned int AudioSampleLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ bool DSCEnable,
+ unsigned int OutputLinkDPLanes,
+ enum dm_output_link_dp_rate OutputLinkDPRate,
+
+ /* Output */
+ bool *RequiresDSC,
+ double *RequiresFEC,
+ double *OutBpp,
+ enum dm_output_type *OutputType,
+ enum dm_output_rate *OutputRate,
+ unsigned int *RequiredSlots);
+
+void dml32_CalculateDPPCLK(
+ unsigned int NumberOfActiveSurfaces,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKDPPCLKVCOSpeed,
+ double DPPCLKUsingSingleDPP[],
+ unsigned int DPPPerSurface[],
+
+ /* output */
+ double *GlobalDPPCLK,
+ double Dppclk[]);
+
+double dml32_TruncToValidBPP(
+ double LinkBitRate,
+ unsigned int Lanes,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum output_encoder_class Output,
+ enum output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int DSCSlices,
+ unsigned int AudioRate,
+ unsigned int AudioLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ /* Output */
+ unsigned int *RequiredSlots);
+
+double dml32_RequiredDTBCLK(
+ bool DSCEnable,
+ double PixelClock,
+ enum output_format_class OutputFormat,
+ double OutputBpp,
+ unsigned int DSCSlices,
+ unsigned int HTotal,
+ unsigned int HActive,
+ unsigned int AudioRate,
+ unsigned int AudioLayout);
+
+unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
+ enum odm_combine_mode ODMMode,
+ unsigned int DSCInputBitPerComponent,
+ double OutputBpp,
+ unsigned int HActive,
+ unsigned int HTotal,
+ unsigned int NumberOfDSCSlices,
+ enum output_format_class OutputFormat,
+ enum output_encoder_class Output,
+ double PixelClock,
+ double PixelClockBackEnd);
+
+void dml32_CalculateSurfaceSizeInMall(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCN,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ bool DCCEnable[],
+ bool ViewportStationary[],
+ unsigned int ViewportXStartY[],
+ unsigned int ViewportYStartY[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int ViewportWidthY[],
+ unsigned int ViewportHeightY[],
+ unsigned int BytesPerPixelY[],
+ unsigned int ViewportWidthC[],
+ unsigned int ViewportHeightC[],
+ unsigned int BytesPerPixelC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int ReadBlockWidthY[],
+ unsigned int ReadBlockWidthC[],
+ unsigned int ReadBlockHeightY[],
+ unsigned int ReadBlockHeightC[],
+
+ /* Output */
+ unsigned int SurfaceSizeInMALL[],
+ bool *ExceededMALLSize);
+
+void dml32_CalculateVMRowAndSwath(
+ struct dml32_CalculateVMRowAndSwath *st_vars,
+ unsigned int NumberOfActiveSurfaces,
+ DmlPipe myPipe[],
+ unsigned int SurfaceSizeInMALL[],
+ unsigned int PTEBufferSizeInRequestsLuma,
+ unsigned int PTEBufferSizeInRequestsChroma,
+ unsigned int DCCMetaBufferSizeBytes,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int MALLAllocatedForDCN,
+ double SwathWidthY[],
+ double SwathWidthC[],
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes[],
+ unsigned int HostVMMinPageSize,
+
+ /* Output */
+ bool PTEBufferSizeNotExceeded[],
+ bool DCCMetaBufferSizeNotExceeded[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int dpte_row_height_luma[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
+ unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int PTERequestSizeC[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+ double PrefetchSourceLinesY[],
+ double PrefetchSourceLinesC[],
+ double VInitPreFillY[],
+ double VInitPreFillC[],
+ unsigned int MaxNumSwathY[],
+ unsigned int MaxNumSwathC[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double PixelPTEBytesPerRow[],
+ double PDEAndMetaPTEBytesFrame[],
+ double MetaRowByte[],
+ bool use_one_row_for_frame[],
+ bool use_one_row_for_frame_flip[],
+ bool UsesMALLForStaticScreen[],
+ bool PTE_BUFFER_MODE[],
+ unsigned int BIGK_FRAGMENT_SIZE[]);
+
+unsigned int dml32_CalculateVMAndRowBytes(
+ bool ViewportStationary,
+ bool DCCEnable,
+ unsigned int NumberOfDPPs,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum dm_rotation_angle SourceScan,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int MacroTileWidth,
+ unsigned int MacroTileHeight,
+
+ /* Output */
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ unsigned int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *dpte_row_height_linear,
+ unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
+ unsigned int *dpte_row_width_ub_one_row_per_frame,
+ unsigned int *dpte_row_height_one_row_per_frame,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ unsigned int *DPDE0BytesFrame,
+ unsigned int *MetaPTEBytesFrame);
+
+double dml32_CalculatePrefetchSourceLines(
+ double VRatio,
+ unsigned int VTaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ enum dm_rotation_angle SourceRotation,
+ bool ViewportStationary,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+
+ /* Output */
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath);
+
+void dml32_CalculateMALLUseForStaticScreen(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCNFinal,
+ enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
+ unsigned int SurfaceSizeInMALL[],
+ bool one_row_per_frame_fits_in_buffer[],
+
+ /* output */
+ bool UsesMALLForStaticScreen[]);
+
+void dml32_CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ /* Output */
+ double *meta_row_bw,
+ double *dpte_row_bw);
+
+double dml32_CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock);
+
+void dml32_CalculateUrgentBurstFactor(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ unsigned int DETBufferSizeY,
+ unsigned int DETBufferSizeC,
+ /* Output */
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding);
+
+void dml32_CalculateDCFCLKDeepSleep(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ /* Output */
+ double *DCFClkDeepSleep);
+
+double dml32_CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackDestinationWidth,
+ unsigned int WritebackDestinationHeight,
+ unsigned int WritebackSourceHeight,
+ unsigned int HTotal);
+
+void dml32_UseMinimumDCFCLK(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool DRRDisplay[],
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int MaxPrefetchMode,
+ double DRAMClockChangeLatencyFinal,
+ double FCLKChangeLatency,
+ double SREnterPlusExitTime,
+ unsigned int ReturnBusWidth,
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool DynamicMetadataVMEnabled,
+ bool ImmediateFlipRequirement,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
+ double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
+ unsigned int VTotal[],
+ unsigned int VActive[],
+ unsigned int DynamicMetadataTransmittedBytes[],
+ unsigned int DynamicMetadataLinesBeforeActiveRequired[],
+ bool Interlace[],
+ double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
+ double RequiredDISPCLK[][2],
+ double UrgLatency[],
+ unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
+ double ProjectedDCFClkDeepSleep[][2],
+ double MaximumVStartup[][2][DC__NUM_DPP__MAX],
+ unsigned int TotalNumberOfActiveDPP[][2],
+ unsigned int TotalNumberOfDCCActiveDPP[][2],
+ unsigned int dpte_group_bytes[],
+ double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
+ double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
+ double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
+ double MetaRowBytes[][2][DC__NUM_DPP__MAX],
+ bool DynamicMetadataEnable[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double DCFCLKPerState[],
+ /* Output */
+ double DCFCLKState[][2]);
+
+unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels);
+
+void dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ unsigned int MaxInterDCNTileRepeaters,
+ double Dppclk,
+ double Dispclk,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ unsigned int HTotal,
+ unsigned int VBlank,
+ unsigned int DynamicMetadataTransmittedBytes,
+ unsigned int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+
+double dml32_CalculateTWait(
+ unsigned int PrefetchMode,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ bool DRRDisplay,
+ double DRAMClockChangeLatency,
+ double FCLKChangeLatency,
+ double UrgentLatency,
+ double SREnterPlusExitTime);
+
+double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const bool HostVMEnable,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed);
+
+double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed);
+
+double dml32_CalculateExtraLatency(
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ double DCFCLK,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels);
+
+bool dml32_CalculatePrefetchSchedule(
+ struct dml32_CalculatePrefetchSchedule *st_vars,
+ double HostVMInefficiencyFactor,
+ DmlPipe *myPipe,
+ unsigned int DSCDelay,
+ double DPPCLKDelaySubtotalPlusCNVCFormater,
+ double DPPCLKDelaySCL,
+ double DPPCLKDelaySCLLBOnly,
+ double DPPCLKDelayCNVCCursor,
+ double DISPCLKDelaySubtotal,
+ unsigned int DPP_RECOUT_WIDTH,
+ enum output_format_class OutputFormat,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ unsigned int GPUVMPageTableLevels,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ double HostVMMinPageSize,
+ bool DynamicMetadataEnable,
+ bool DynamicMetadataVMEnabled,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int DynamicMetadataTransmittedBytes,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ unsigned int VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ unsigned int VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ /* Output */
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix);
+
+void dml32_CalculateFlipSchedule(
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool GPUVMEnable,
+ double HostVMMinPageSize,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow,
+ double BandwidthAvailableForImmediateFlip,
+ unsigned int TotImmediateFlipBytes,
+ enum source_format_class SourcePixelFormat,
+ double LineTime,
+ double VRatio,
+ double VRatioChroma,
+ double Tno_bw,
+ bool DCCEnable,
+ unsigned int dpte_row_height,
+ unsigned int meta_row_height,
+ unsigned int dpte_row_height_chroma,
+ unsigned int meta_row_height_chroma,
+ bool use_one_row_for_frame_flip,
+
+ /* Output */
+ double *DestinationLinesToRequestVMInImmediateFlip,
+ double *DestinationLinesToRequestRowInImmediateFlip,
+ double *final_flip_bw,
+ bool *ImmediateFlipSupportedForPipe);
+
+void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
+ bool USRRetrainingRequiredFinal,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int PrefetchMode,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MaxLineBufferLines,
+ unsigned int LineBufferSize,
+ unsigned int WritebackInterfaceBufferSize,
+ double DCFCLK,
+ double ReturnBW,
+ bool SynchronizeTimingsFinal,
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ bool DRRDisplay[],
+ unsigned int dpte_group_bytes[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ SOCParametersList mmSOCParameters,
+ unsigned int WritebackChunkSize,
+ double SOCCLK,
+ double DCFClkDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int LBBitPerPixel[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int VTaps[],
+ unsigned int VTapsChroma[],
+ double VRatio[],
+ double VRatioChroma[],
+ unsigned int HTotal[],
+ unsigned int VTotal[],
+ unsigned int VActive[],
+ double PixelClock[],
+ unsigned int BlendingAndTiming[],
+ unsigned int DPPPerSurface[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ double DSTXAfterScaler[],
+ double DSTYAfterScaler[],
+ bool WritebackEnable[],
+ enum source_format_class WritebackPixelFormat[],
+ double WritebackDestinationWidth[],
+ double WritebackDestinationHeight[],
+ double WritebackSourceHeight[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+
+ /* Output */
+ Watermarks *Watermark,
+ enum clock_change_support *DRAMClockChangeSupport,
+ double MaxActiveDRAMClockChangeLatencySupported[],
+ unsigned int SubViewportLinesNeededInMALL[],
+ enum dm_fclock_change_support *FCLKChangeSupport,
+ double *MinActiveFCLKChangeLatencySupported,
+ bool *USRRetrainingSupport,
+ double ActiveDRAMClockChangeLatencyMargin[]);
+
+double dml32_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackSourceWidth,
+ unsigned int WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize,
+ double DISPCLKDPPCLKVCOSpeed);
+
+void dml32_CalculateMinAndMaxPrefetchMode(
+ enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
+ unsigned int *MinPrefetchMode,
+ unsigned int *MaxPrefetchMode);
+
+void dml32_CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActiveSurfaces,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+
+ /* Output */
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[]);
+
+void dml32_CalculateMetaAndPTETimes(
+ bool use_one_row_for_frame[],
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int MetaChunkSize,
+ unsigned int MinMetaChunkSizeBytes,
+ unsigned int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int dpte_row_height[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PTERequestSizeC[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+
+ /* Output */
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[]);
+
+void dml32_CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ unsigned int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+
+ /* Output */
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[]);
+
+void dml32_CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ unsigned int nomDETInKByte,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum dm_rotation_angle SourceRotation,
+ /* Output */
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma);
+
+void dml32_CalculateStutterEfficiency(
+ unsigned int CompressedBufferSizeInkByte,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool UnboundedRequestEnabled,
+ unsigned int MetaFIFOSizeInKEntries,
+ unsigned int ZeroSizeBufferEntries,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ unsigned int CompbufReservedSpace64B,
+ unsigned int CompbufReservedSpaceZs,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizeTimingsFinal,
+ unsigned int BlendingAndTiming[],
+ double StutterEnterPlusExitWatermark,
+ double Z8StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ unsigned int DPPPerSurface[],
+ unsigned int DETBufferSizeY[],
+ unsigned int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ unsigned int HTotal[],
+ unsigned int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesC[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int DCCYMaxUncompressedBlock[],
+ unsigned int DCCCMaxUncompressedBlock[],
+ unsigned int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthSurfaceLuma[],
+ double ReadBandwidthSurfaceChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+
+ /* Output */
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ unsigned int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ unsigned int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod,
+ bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+
+void dml32_CalculateMaxDETAndMinCompressedBufferSize(
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int ROBBufferSizeInKByte,
+ unsigned int MaxNumDPP,
+ bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
+ unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
+
+ /* Output */
+ unsigned int *MaxTotalDETInKByte,
+ unsigned int *nomDETInKByte,
+ unsigned int *MinCompressedBufferSizeInKByte);
+
+bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[]);
+
+void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *PrefetchBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *PrefetchBandwidthSupport);
+
+double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double cursor_bw_pre[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[]);
+
+void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ enum immediate_flip_requirement ImmediateFlipRequirement[],
+ double final_flip_bw[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *TotalBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *ImmediateFlipBandwidthSupport);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
new file mode 100644
index 000000000000..a1276f6b9581
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
@@ -0,0 +1,615 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "../display_mode_lib.h"
+#include "../display_mode_vba.h"
+#include "../dml_inline_defs.h"
+#include "display_rq_dlg_calc_32.h"
+
+static bool is_dual_plane(enum source_format_class source_format)
+{
+ bool ret_val = 0;
+
+ if ((source_format == dm_420_12) || (source_format == dm_420_8) || (source_format == dm_420_10)
+ || (source_format == dm_rgbe_alpha))
+ ret_val = 1;
+
+ return ret_val;
+}
+
+void dml32_rq_dlg_get_rq_reg(display_rq_regs_st *rq_regs,
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx)
+{
+ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
+ bool dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
+ double stored_swath_l_bytes;
+ double stored_swath_c_bytes;
+ bool is_phantom_pipe;
+ uint32_t pixel_chunk_bytes = 0;
+ uint32_t min_pixel_chunk_bytes = 0;
+ uint32_t meta_chunk_bytes = 0;
+ uint32_t min_meta_chunk_bytes = 0;
+ uint32_t dpte_group_bytes = 0;
+ uint32_t mpte_group_bytes = 0;
+
+ uint32_t p1_pixel_chunk_bytes = 0;
+ uint32_t p1_min_pixel_chunk_bytes = 0;
+ uint32_t p1_meta_chunk_bytes = 0;
+ uint32_t p1_min_meta_chunk_bytes = 0;
+ uint32_t p1_dpte_group_bytes = 0;
+ uint32_t p1_mpte_group_bytes = 0;
+
+ unsigned int detile_buf_size_in_bytes;
+ unsigned int detile_buf_plane1_addr;
+ unsigned int pte_row_height_linear;
+
+ memset(rq_regs, 0, sizeof(*rq_regs));
+
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] start, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+
+ pixel_chunk_bytes = get_pixel_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024; // From VBA
+ min_pixel_chunk_bytes = get_min_pixel_chunk_size_in_byte(mode_lib, e2e_pipe_param, num_pipes); // From VBA
+
+ if (pixel_chunk_bytes == 64 * 1024)
+ min_pixel_chunk_bytes = 0;
+
+ meta_chunk_bytes = get_meta_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024; // From VBA
+ min_meta_chunk_bytes = get_min_meta_chunk_size_in_byte(mode_lib, e2e_pipe_param, num_pipes); // From VBA
+
+ dpte_group_bytes = get_dpte_group_size_in_bytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ mpte_group_bytes = get_vm_group_size_in_bytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ p1_pixel_chunk_bytes = pixel_chunk_bytes;
+ p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes;
+ p1_meta_chunk_bytes = meta_chunk_bytes;
+ p1_min_meta_chunk_bytes = min_meta_chunk_bytes;
+ p1_dpte_group_bytes = dpte_group_bytes;
+ p1_mpte_group_bytes = mpte_group_bytes;
+
+ if ((enum source_format_class) src->source_format == dm_rgbe_alpha)
+ p1_pixel_chunk_bytes = get_alpha_pixel_chunk_size_in_kbyte(mode_lib, e2e_pipe_param, num_pipes) * 1024;
+
+ rq_regs->rq_regs_l.chunk_size = dml_log2(pixel_chunk_bytes) - 10;
+ rq_regs->rq_regs_c.chunk_size = dml_log2(p1_pixel_chunk_bytes) - 10;
+
+ if (min_pixel_chunk_bytes == 0)
+ rq_regs->rq_regs_l.min_chunk_size = 0;
+ else
+ rq_regs->rq_regs_l.min_chunk_size = dml_log2(min_pixel_chunk_bytes) - 8 + 1;
+
+ if (p1_min_pixel_chunk_bytes == 0)
+ rq_regs->rq_regs_c.min_chunk_size = 0;
+ else
+ rq_regs->rq_regs_c.min_chunk_size = dml_log2(p1_min_pixel_chunk_bytes) - 8 + 1;
+
+ rq_regs->rq_regs_l.meta_chunk_size = dml_log2(meta_chunk_bytes) - 10;
+ rq_regs->rq_regs_c.meta_chunk_size = dml_log2(p1_meta_chunk_bytes) - 10;
+
+ if (min_meta_chunk_bytes == 0)
+ rq_regs->rq_regs_l.min_meta_chunk_size = 0;
+ else
+ rq_regs->rq_regs_l.min_meta_chunk_size = dml_log2(min_meta_chunk_bytes) - 6 + 1;
+
+ if (min_meta_chunk_bytes == 0)
+ rq_regs->rq_regs_c.min_meta_chunk_size = 0;
+ else
+ rq_regs->rq_regs_c.min_meta_chunk_size = dml_log2(p1_min_meta_chunk_bytes) - 6 + 1;
+
+ rq_regs->rq_regs_l.dpte_group_size = dml_log2(dpte_group_bytes) - 6;
+ rq_regs->rq_regs_l.mpte_group_size = dml_log2(mpte_group_bytes) - 6;
+ rq_regs->rq_regs_c.dpte_group_size = dml_log2(p1_dpte_group_bytes) - 6;
+ rq_regs->rq_regs_c.mpte_group_size = dml_log2(p1_mpte_group_bytes) - 6;
+
+ detile_buf_size_in_bytes = get_det_buffer_size_kbytes(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * 1024;
+ detile_buf_plane1_addr = 0;
+ pte_row_height_linear = get_dpte_row_height_linear_l(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx);
+
+ if (src->sw_mode == dm_sw_linear)
+ ASSERT(pte_row_height_linear >= 8);
+
+ rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(pte_row_height_linear), 1) - 3;
+
+ if (dual_plane) {
+ unsigned int p1_pte_row_height_linear = get_dpte_row_height_linear_c(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx);
+ ;
+ if (src->sw_mode == dm_sw_linear)
+ ASSERT(p1_pte_row_height_linear >= 8);
+
+ rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(p1_pte_row_height_linear), 1) - 3;
+ }
+
+ rq_regs->rq_regs_l.swath_height = dml_log2(get_swath_height_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx));
+ rq_regs->rq_regs_c.swath_height = dml_log2(get_swath_height_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx));
+
+ // FIXME: take the max between luma, chroma chunk size?
+ // okay for now, as we are setting pixel_chunk_bytes to 8kb anyways
+ if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb
+ rq_regs->drq_expansion_mode = 0;
+ } else {
+ rq_regs->drq_expansion_mode = 2;
+ }
+ rq_regs->prq_expansion_mode = 1;
+ rq_regs->mrq_expansion_mode = 1;
+ rq_regs->crq_expansion_mode = 1;
+
+ stored_swath_l_bytes = get_det_stored_buffer_size_l_bytes(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx);
+ stored_swath_c_bytes = get_det_stored_buffer_size_c_bytes(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx);
+ is_phantom_pipe = get_is_phantom_pipe(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+
+ // Note: detile_buf_plane1_addr is in unit of 1KB
+ if (dual_plane) {
+ if (is_phantom_pipe) {
+ detile_buf_plane1_addr = ((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma
+ } else {
+ if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
+ detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n",
+ __func__, detile_buf_plane1_addr);
+#endif
+ } else {
+ detile_buf_plane1_addr =
+ dml_round_to_multiple(
+ (unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0),
+ 1024, 0) / 1024.0; // 2/3 to luma
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n",
+ __func__, detile_buf_plane1_addr);
+#endif
+ }
+ }
+ }
+ rq_regs->plane1_base_address = detile_buf_plane1_addr;
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
+ dml_print("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
+ dml_print("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
+ dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
+ dml_print("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
+#endif
+ print__rq_regs_st(mode_lib, rq_regs);
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] done, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+}
+
+void dml32_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx)
+{
+ const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
+ const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
+ const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
+ double refcyc_per_req_delivery_pre_cur0 = 0.;
+ double refcyc_per_req_delivery_cur0 = 0.;
+ double refcyc_per_req_delivery_pre_c = 0.;
+ double refcyc_per_req_delivery_c = 0.;
+ double refcyc_per_req_delivery_pre_l;
+ double refcyc_per_req_delivery_l;
+ double refcyc_per_line_delivery_pre_c = 0.;
+ double refcyc_per_line_delivery_c = 0.;
+ double refcyc_per_line_delivery_pre_l;
+ double refcyc_per_line_delivery_l;
+ double min_ttu_vblank;
+ double vratio_pre_l;
+ double vratio_pre_c;
+ unsigned int min_dst_y_next_start;
+ unsigned int htotal = dst->htotal;
+ unsigned int hblank_end = dst->hblank_end;
+ unsigned int vblank_end = dst->vblank_end;
+ bool interlaced = dst->interlaced;
+ double pclk_freq_in_mhz = dst->pixel_rate_mhz;
+ unsigned int vready_after_vcount0;
+ double refclk_freq_in_mhz = clks->refclk_mhz;
+ double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
+ bool dual_plane = 0;
+ unsigned int pipe_index_in_combine[DC__NUM_PIPES__MAX];
+ unsigned int dst_x_after_scaler;
+ unsigned int dst_y_after_scaler;
+ double dst_y_prefetch;
+ double dst_y_per_vm_vblank;
+ double dst_y_per_row_vblank;
+ double dst_y_per_vm_flip;
+ double dst_y_per_row_flip;
+ double max_dst_y_per_vm_vblank = 32.0;
+ double max_dst_y_per_row_vblank = 16.0;
+ double dst_y_per_pte_row_nom_l;
+ double dst_y_per_pte_row_nom_c;
+ double dst_y_per_meta_row_nom_l;
+ double dst_y_per_meta_row_nom_c;
+ double refcyc_per_pte_group_nom_l;
+ double refcyc_per_pte_group_nom_c;
+ double refcyc_per_pte_group_vblank_l;
+ double refcyc_per_pte_group_vblank_c;
+ double refcyc_per_pte_group_flip_l;
+ double refcyc_per_pte_group_flip_c;
+ double refcyc_per_meta_chunk_nom_l;
+ double refcyc_per_meta_chunk_nom_c;
+ double refcyc_per_meta_chunk_vblank_l;
+ double refcyc_per_meta_chunk_vblank_c;
+ double refcyc_per_meta_chunk_flip_l;
+ double refcyc_per_meta_chunk_flip_c;
+
+ memset(dlg_regs, 0, sizeof(*dlg_regs));
+ memset(ttu_regs, 0, sizeof(*ttu_regs));
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] starts, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq);
+ dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced);
+ ASSERT(ref_freq_to_pix_freq < 4.0);
+
+ dlg_regs->ref_freq_to_pix_freq = (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19));
+ dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal * dml_pow(2, 8));
+ dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits
+
+ min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ min_dst_y_next_start = get_min_dst_y_next_start(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+
+ dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank);
+ dml_print("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, min_dst_y_next_start);
+ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq);
+
+ dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
+
+ vready_after_vcount0 = get_vready_at_or_after_vsync(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx); // From VBA
+ dlg_regs->vready_after_vcount0 = vready_after_vcount0;
+
+ dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, dlg_regs->vready_after_vcount0);
+
+ dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+
+ // do some adjustment on the dst_after scaler to account for odm combine mode
+ dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: input dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler);
+
+ // need to figure out which side of odm combine we're in
+ if (dst->odm_combine == dm_odm_combine_mode_2to1 || dst->odm_combine == dm_odm_combine_mode_4to1) {
+ // figure out which pipes go together
+ bool visited[DC__NUM_PIPES__MAX];
+ unsigned int i, j, k;
+
+ for (k = 0; k < num_pipes; ++k) {
+ visited[k] = false;
+ pipe_index_in_combine[k] = 0;
+ }
+
+ for (i = 0; i < num_pipes; i++) {
+ if (e2e_pipe_param[i].pipe.src.is_hsplit && !visited[i]) {
+
+ unsigned int grp = e2e_pipe_param[i].pipe.src.hsplit_grp;
+ unsigned int grp_idx = 0;
+
+ for (j = i; j < num_pipes; j++) {
+ if (e2e_pipe_param[j].pipe.src.hsplit_grp == grp
+ && e2e_pipe_param[j].pipe.src.is_hsplit && !visited[j]) {
+ pipe_index_in_combine[j] = grp_idx;
+ dml_print("DML_DLG: %s: pipe[%d] is in grp %d idx %d\n",
+ __func__, j, grp, grp_idx);
+ grp_idx++;
+ visited[j] = true;
+ }
+ }
+ }
+ }
+ }
+
+ if (dst->odm_combine == dm_odm_combine_mode_disabled) {
+ // FIXME how about ODM split??
+ dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end * ref_freq_to_pix_freq);
+ } else {
+ if (dst->odm_combine == dm_odm_combine_mode_2to1 || dst->odm_combine == dm_odm_combine_mode_4to1) {
+ // TODO: We should really check that 4to1 is supported before setting it to 4
+ unsigned int odm_combine_factor = (dst->odm_combine == dm_odm_combine_mode_2to1 ? 2 : 4);
+ unsigned int odm_pipe_index = pipe_index_in_combine[pipe_idx];
+
+ dlg_regs->refcyc_h_blank_end = (unsigned int) (((double) hblank_end
+ + odm_pipe_index * (double) dst->hactive / odm_combine_factor) * ref_freq_to_pix_freq);
+ }
+ }
+ ASSERT(dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13));
+
+ dml_print("DML_DLG: %s: htotal= %d\n", __func__, htotal);
+ dml_print("DML_DLG: %s: dst_x_after_scaler[%d]= %d\n", __func__, pipe_idx, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: dst_y_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_y_after_scaler);
+
+ dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ // From VBA
+ dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ // magic!
+ if (htotal <= 75) {
+ max_dst_y_per_vm_vblank = 100.0;
+ max_dst_y_per_row_vblank = 100.0;
+ }
+
+ dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch);
+ dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip);
+ dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank);
+
+ ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank);
+ ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank);
+ ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
+
+ vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+ vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA
+
+ dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l);
+ dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c);
+
+ // Active
+ refcyc_per_line_delivery_pre_l = get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_line_delivery_l = get_refcyc_per_line_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l);
+
+ if (dual_plane) {
+ refcyc_per_line_delivery_pre_c = get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_line_delivery_c = get_refcyc_per_line_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n",
+ __func__, refcyc_per_line_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n",
+ __func__, refcyc_per_line_delivery_c);
+ }
+
+ if (src->dynamic_metadata_enable && src->gpuvm)
+ dlg_regs->refcyc_per_vm_dmdata = get_refcyc_per_vm_dmdata_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dlg_regs->dmdata_dl_delta = get_dmdata_dl_delta_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx)
+ * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l);
+
+ if (dual_plane) {
+ refcyc_per_req_delivery_pre_c = get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_req_delivery_c = get_refcyc_per_req_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n",
+ __func__, refcyc_per_req_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c);
+ }
+
+ // TTU - Cursor
+ ASSERT(src->num_cursors <= 1);
+ if (src->num_cursors > 0) {
+ refcyc_per_req_delivery_pre_cur0 = get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib,
+ e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_req_delivery_cur0 = get_refcyc_per_cursor_req_delivery_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n",
+ __func__, refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n",
+ __func__, refcyc_per_req_delivery_cur0);
+ }
+
+ // Assign to register structures
+ dlg_regs->min_dst_y_next_start = min_dst_y_next_start * dml_pow(2, 2);
+ ASSERT(dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
+
+ dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line
+ dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk
+ dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2));
+ dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2));
+ dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2));
+ dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2));
+ dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2));
+
+ dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19));
+ dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19));
+
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, dlg_regs->dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, dlg_regs->dst_y_per_row_vblank);
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, dlg_regs->dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, dlg_regs->dst_y_per_row_flip);
+
+ dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+ dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA
+
+ // From VBA
+ dst_y_per_pte_row_nom_l = get_dst_y_per_pte_row_nom_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_pte_row_nom_c = get_dst_y_per_pte_row_nom_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_meta_row_nom_l = get_dst_y_per_meta_row_nom_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+ // From VBA
+ dst_y_per_meta_row_nom_c = get_dst_y_per_meta_row_nom_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+
+ refcyc_per_pte_group_nom_l = get_refcyc_per_pte_group_nom_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_nom_c = get_refcyc_per_pte_group_nom_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_vblank_l = get_refcyc_per_pte_group_vblank_l_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_vblank_c = get_refcyc_per_pte_group_vblank_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_flip_l = get_refcyc_per_pte_group_flip_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_pte_group_flip_c = get_refcyc_per_pte_group_flip_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ refcyc_per_meta_chunk_nom_l = get_refcyc_per_meta_chunk_nom_l_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_nom_c = get_refcyc_per_meta_chunk_nom_c_in_us(mode_lib, e2e_pipe_param, num_pipes,
+ pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_vblank_l = get_refcyc_per_meta_chunk_vblank_l_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_vblank_c = get_refcyc_per_meta_chunk_vblank_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_flip_l = get_refcyc_per_meta_chunk_flip_l_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+ refcyc_per_meta_chunk_flip_c = get_refcyc_per_meta_chunk_flip_c_in_us(mode_lib, e2e_pipe_param,
+ num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA
+
+ dlg_regs->dst_y_per_pte_row_nom_l = dst_y_per_pte_row_nom_l * dml_pow(2, 2);
+ dlg_regs->dst_y_per_pte_row_nom_c = dst_y_per_pte_row_nom_c * dml_pow(2, 2);
+ dlg_regs->dst_y_per_meta_row_nom_l = dst_y_per_meta_row_nom_l * dml_pow(2, 2);
+ dlg_regs->dst_y_per_meta_row_nom_c = dst_y_per_meta_row_nom_c * dml_pow(2, 2);
+ dlg_regs->refcyc_per_pte_group_nom_l = refcyc_per_pte_group_nom_l;
+ dlg_regs->refcyc_per_pte_group_nom_c = refcyc_per_pte_group_nom_c;
+ dlg_regs->refcyc_per_pte_group_vblank_l = refcyc_per_pte_group_vblank_l;
+ dlg_regs->refcyc_per_pte_group_vblank_c = refcyc_per_pte_group_vblank_c;
+ dlg_regs->refcyc_per_pte_group_flip_l = refcyc_per_pte_group_flip_l;
+ dlg_regs->refcyc_per_pte_group_flip_c = refcyc_per_pte_group_flip_c;
+ dlg_regs->refcyc_per_meta_chunk_nom_l = refcyc_per_meta_chunk_nom_l;
+ dlg_regs->refcyc_per_meta_chunk_nom_c = refcyc_per_meta_chunk_nom_c;
+ dlg_regs->refcyc_per_meta_chunk_vblank_l = refcyc_per_meta_chunk_vblank_l;
+ dlg_regs->refcyc_per_meta_chunk_vblank_c = refcyc_per_meta_chunk_vblank_c;
+ dlg_regs->refcyc_per_meta_chunk_flip_l = refcyc_per_meta_chunk_flip_l;
+ dlg_regs->refcyc_per_meta_chunk_flip_c = refcyc_per_meta_chunk_flip_c;
+ dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, 1);
+ dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, 1);
+ dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, 1);
+ dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, 1);
+
+ dlg_regs->chunk_hdl_adjust_cur0 = 3;
+ dlg_regs->dst_y_offset_cur0 = 0;
+ dlg_regs->chunk_hdl_adjust_cur1 = 3;
+ dlg_regs->dst_y_offset_cur1 = 0;
+
+ dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
+
+ ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_pre_cur0 =
+ (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 * dml_pow(2, 10));
+ ttu_regs->refcyc_per_req_delivery_pre_cur1 = 0;
+ ttu_regs->refcyc_per_req_delivery_cur1 = 0;
+ ttu_regs->qos_level_low_wm = 0;
+
+ ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal * ref_freq_to_pix_freq);
+
+ ttu_regs->qos_level_flip = 14;
+ ttu_regs->qos_level_fixed_l = 8;
+ ttu_regs->qos_level_fixed_c = 8;
+ ttu_regs->qos_level_fixed_cur0 = 8;
+ ttu_regs->qos_ramp_disable_l = 0;
+ ttu_regs->qos_ramp_disable_c = 0;
+ ttu_regs->qos_ramp_disable_cur0 = 0;
+ ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
+
+ // CHECK for HW registers' range, assert or clamp
+ ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
+ if (dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1;
+
+ if (dlg_regs->refcyc_per_vm_group_flip >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1;
+
+ if (dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1;
+
+ if (dlg_regs->refcyc_per_vm_req_flip >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1;
+
+ ASSERT(dlg_regs->dst_y_after_scaler < (unsigned int) 8);
+ ASSERT(dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17));
+ if (dual_plane) {
+ if (dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) {
+ // FIXME what so special about chroma, can we just assert?
+ dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u > register max U15.2 %u\n",
+ __func__, dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)dml_pow(2, 17) - 1);
+ }
+ }
+ ASSERT(dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17));
+ ASSERT(dlg_regs->dst_y_per_meta_row_nom_c < (unsigned int)dml_pow(2, 17));
+
+ if (dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1;
+ if (dual_plane) {
+ if (dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1;
+ }
+ ASSERT(dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
+ if (dual_plane) {
+ ASSERT(dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)dml_pow(2, 13));
+ }
+
+ if (dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1;
+ if (dual_plane) {
+ if (dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23))
+ dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1;
+ }
+ ASSERT(dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_meta_chunk_vblank_c < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13));
+ ASSERT(dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13));
+ ASSERT(ttu_regs->qos_level_low_wm < dml_pow(2, 14));
+ ASSERT(ttu_regs->qos_level_high_wm < dml_pow(2, 14));
+ ASSERT(ttu_regs->min_ttu_vblank < dml_pow(2, 24));
+
+ print__ttu_regs_st(mode_lib, ttu_regs);
+ print__dlg_regs_st(mode_lib, dlg_regs);
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] done, num_pipes=%d\n", __func__, pipe_idx, num_pipes);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h
new file mode 100644
index 000000000000..ebee365293cd
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML32_DISPLAY_RQ_DLG_CALC_H__
+#define __DML32_DISPLAY_RQ_DLG_CALC_H__
+
+#include "../display_rq_dlg_helpers.h"
+
+struct display_mode_lib;
+
+/*
+* Function: dml_rq_dlg_get_rq_reg
+* Main entry point for test to get the register values out of this DML class.
+* This function calls <get_rq_param> and <extract_rq_regs> functions to calculate
+* and then populate the rq_regs struct
+* Input:
+* pipe_param - pipe source configuration (e.g. vp, pitch, scaling, dest, etc.)
+* Output:
+* rq_regs - struct that holds all the RQ registers field value.
+* See also: <display_rq_regs_st>
+*/
+void dml32_rq_dlg_get_rq_reg(display_rq_regs_st *rq_regs,
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
+
+/*
+* Function: dml_rq_dlg_get_dlg_reg
+* Calculate and return DLG and TTU register struct given the system setting
+* Output:
+* dlg_regs - output DLG register struct
+* ttu_regs - output DLG TTU register struct
+* Input:
+* e2e_pipe_param - "compacted" array of e2e pipe param struct
+* num_pipes - num of active "pipe" or "route"
+* pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg
+* cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered.
+* Added for legacy or unrealistic timing tests.
+*/
+void dml32_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
new file mode 100644
index 000000000000..84b4b00f29cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
@@ -0,0 +1,684 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "clk_mgr.h"
+#include "resource.h"
+#include "dcn321_fpu.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn321/dcn321_resource.h"
+
+#define DCN3_2_DEFAULT_DET_SIZE 256
+
+struct _vcs_dpi_ip_params_st dcn3_21_ip = {
+ .gpuvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_enable = 0,
+ .rob_buffer_size_kbytes = 128,
+ .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1280,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 22,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .alpha_pixel_chunk_size_kbytes = 4,
+ .min_pixel_chunk_size_bytes = 1024,
+ .dcc_meta_buffer_size_bytes = 6272,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,
+ .maximum_pixels_per_line_per_dsc_unit = 6016,
+ .dsc422_native_support = true,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 57,
+ .line_buffer_size_bits = 1171920,
+ .max_line_buffer_lines = 32,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 47,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 125,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+ .max_num_dp2p0_outputs = 2,
+ .max_num_dp2p0_streams = 4,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 1564.0,
+ .fabricclk_mhz = 400.0,
+ .dispclk_mhz = 2150.0,
+ .dppclk_mhz = 2150.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .phyclk_d32_mhz = 625.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 716.667,
+ .dram_speed_mts = 1600.0,
+ .dtbclk_mhz = 1564.0,
+ },
+ },
+ .num_states = 1,
+ .sr_exit_time_us = 12.36,
+ .sr_enter_plus_exit_time_us = 16.72,
+ .sr_exit_z8_time_us = 285.0,
+ .sr_enter_plus_exit_z8_time_us = 320,
+ .writeback_latency_us = 12.0,
+ .round_trip_ping_latency_dcfclk_cycles = 263,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .fclk_change_latency_us = 20,
+ .usr_retraining_latency_us = 2,
+ .smn_latency_us = 2,
+ .mall_allocated_for_dcn_mbytes = 64,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 100.0,
+ .pct_ideal_fabric_bw_after_urgent = 67.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented
+ .pct_ideal_dram_bw_after_urgent_strobe = 67.0,
+ .max_avg_sdp_bw_use_normal_percent = 80.0,
+ .max_avg_fabric_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_strobe_percent = 50.0,
+ .max_avg_dram_bw_use_normal_percent = 15.0,
+ .num_chans = 8,
+ .dram_channel_width_bytes = 2,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .dram_clock_change_latency_us = 400,
+ .dispclk_dppclk_vco_speed_mhz = 4300.0,
+ .do_urgent_latency_adjustment = true,
+ .urgent_latency_adjustment_fabric_clock_component_us = 1.0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+};
+
+static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ if (entry->dcfclk_mhz > 0) {
+ float bw_on_sdp = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ entry->fabricclk_mhz = bw_on_sdp / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_sdp / (dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->fabricclk_mhz > 0) {
+ float bw_on_fabric = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ entry->dcfclk_mhz = bw_on_fabric / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_fabric / (dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
+ } else if (entry->dram_speed_mts > 0) {
+ float bw_on_dram = entry->dram_speed_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ entry->fabricclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dcfclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100));
+ }
+}
+
+static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ float memory_bw_kbytes_sec;
+ float fabric_bw_kbytes_sec;
+ float sdp_bw_kbytes_sec;
+ float limiting_bw_kbytes_sec;
+
+ memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
+
+ fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100);
+
+ sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100);
+
+ limiting_bw_kbytes_sec = memory_bw_kbytes_sec;
+
+ if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = fabric_bw_kbytes_sec;
+
+ if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec)
+ limiting_bw_kbytes_sec = sdp_bw_kbytes_sec;
+
+ return limiting_bw_kbytes_sec;
+}
+
+void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+ unsigned int *num_entries,
+ struct _vcs_dpi_voltage_scaling_st *entry)
+{
+ int i = 0;
+ int index = 0;
+ float net_bw_of_new_state = 0;
+
+ dc_assert_fp_enabled();
+
+ get_optimal_ntuple(entry);
+
+ if (*num_entries == 0) {
+ table[0] = *entry;
+ (*num_entries)++;
+ } else {
+ net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
+ while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) {
+ index++;
+ if (index >= *num_entries)
+ break;
+ }
+
+ for (i = *num_entries; i > index; i--)
+ table[i] = table[i - 1];
+
+ table[index] = *entry;
+ (*num_entries)++;
+ }
+}
+
+static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
+ unsigned int index)
+{
+ int i;
+
+ if (*num_entries == 0)
+ return;
+
+ for (i = index; i < *num_entries - 1; i++) {
+ table[i] = table[i + 1];
+ }
+ memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st));
+}
+
+static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
+ struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ int i, j;
+ struct _vcs_dpi_voltage_scaling_st entry = {0};
+
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0,
+ max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0;
+
+ unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299;
+
+ static const unsigned int num_dcfclk_stas = 5;
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564};
+
+ unsigned int num_uclk_dpms = 0;
+ unsigned int num_fclk_dpms = 0;
+ unsigned int num_dcfclk_dpms = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz)
+ max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz)
+ max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz)
+ max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+
+ if (bw_params->clk_table.entries[i].memclk_mhz > 0)
+ num_uclk_dpms++;
+ if (bw_params->clk_table.entries[i].fclk_mhz > 0)
+ num_fclk_dpms++;
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > 0)
+ num_dcfclk_dpms++;
+ }
+
+ if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz)
+ return -1;
+
+ if (max_dppclk_mhz == 0)
+ max_dppclk_mhz = max_dispclk_mhz;
+
+ if (max_fclk_mhz == 0)
+ max_fclk_mhz = max_dcfclk_mhz * dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / dcn3_21_soc.pct_ideal_fabric_bw_after_urgent;
+
+ if (max_phyclk_mhz == 0)
+ max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz;
+
+ *num_entries = 0;
+ entry.dispclk_mhz = max_dispclk_mhz;
+ entry.dscclk_mhz = max_dispclk_mhz / 3;
+ entry.dppclk_mhz = max_dppclk_mhz;
+ entry.dtbclk_mhz = max_dtbclk_mhz;
+ entry.phyclk_mhz = max_phyclk_mhz;
+ entry.phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz;
+ entry.phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz;
+
+ // Insert all the DCFCLK STAs
+ for (i = 0; i < num_dcfclk_stas; i++) {
+ entry.dcfclk_mhz = dcfclk_sta_targets[i];
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // Insert the max DCFCLK
+ entry.dcfclk_mhz = max_dcfclk_mhz;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = 0;
+
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+
+ // Insert the UCLK DPMS
+ for (i = 0; i < num_uclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = 0;
+ entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16;
+
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // If FCLK is coarse grained, insert individual DPMs.
+ if (num_fclk_dpms > 2) {
+ for (i = 0; i < num_fclk_dpms; i++) {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+ }
+ // If FCLK fine grained, only insert max
+ else {
+ entry.dcfclk_mhz = 0;
+ entry.fabricclk_mhz = max_fclk_mhz;
+ entry.dram_speed_mts = 0;
+
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
+ }
+
+ // At this point, the table contains all "points of interest" based on
+ // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock
+ // ratios (by derate, are exact).
+
+ // Remove states that require higher clocks than are supported
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz > max_dcfclk_mhz ||
+ table[i].fabricclk_mhz > max_fclk_mhz ||
+ table[i].dram_speed_mts > max_uclk_mhz * 16)
+ remove_entry_from_table_at_index(table, num_entries, i);
+ }
+
+ // At this point, the table only contains supported points of interest
+ // it could be used as is, but some states may be redundant due to
+ // coarse grained nature of some clocks, so we want to round up to
+ // coarse grained DPMs and remove duplicates.
+
+ // Round up UCLKs
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_uclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) {
+ table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ // If FCLK is coarse grained, round up to next DPMs
+ if (num_fclk_dpms > 2) {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ for (j = 0; j < num_fclk_dpms; j++) {
+ if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) {
+ table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz;
+ break;
+ }
+ }
+ }
+ }
+ // Otherwise, round up to minimum.
+ else {
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].fabricclk_mhz < min_fclk_mhz) {
+ table[i].fabricclk_mhz = min_fclk_mhz;
+ break;
+ }
+ }
+ }
+
+ // Round DCFCLKs up to minimum
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ if (table[i].dcfclk_mhz < min_dcfclk_mhz) {
+ table[i].dcfclk_mhz = min_dcfclk_mhz;
+ break;
+ }
+ }
+
+ // Remove duplicate states, note duplicate states are always neighbouring since table is sorted.
+ i = 0;
+ while (i < *num_entries - 1) {
+ if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz &&
+ table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz &&
+ table[i].dram_speed_mts == table[i + 1].dram_speed_mts)
+ remove_entry_from_table_at_index(table, num_entries, i + 1);
+ else
+ i++;
+ }
+
+ // Fix up the state indicies
+ for (i = *num_entries - 1; i >= 0 ; i--) {
+ table[i].state = i;
+ }
+
+ return 0;
+}
+
+static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
+ unsigned int *optimal_dcfclk,
+ unsigned int *optimal_fclk)
+{
+ double bw_from_dram, bw_from_dram1, bw_from_dram2;
+
+ bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100);
+ bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans *
+ dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100);
+
+ bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2;
+
+ if (optimal_fclk)
+ *optimal_fclk = bw_from_dram /
+ (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100));
+
+ if (optimal_dcfclk)
+ *optimal_dcfclk = bw_from_dram /
+ (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100));
+}
+
+/** dcn321_update_bw_bounding_box
+ * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet
+ * with actual values as per dGPU SKU:
+ * -with passed few options from dc->config
+ * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW)
+ * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes
+ * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU
+ * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC)
+ * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different
+ * clocks (which might differ for certain dGPU SKU of the same ASIC)
+ */
+void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ dc_assert_fp_enabled();
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
+ /* Overrides from dc->config options */
+ dcn3_21_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk;
+
+ /* Override from passed dc->bb_overrides if available*/
+ if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
+ && dc->bb_overrides.sr_exit_time_ns) {
+ dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.sr_enter_plus_exit_time_us * 1000)
+ != dc->bb_overrides.sr_enter_plus_exit_time_ns
+ && dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ dcn3_21_soc.sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns
+ && dc->bb_overrides.urgent_latency_ns) {
+ dcn3_21_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.dram_clock_change_latency_us * 1000)
+ != dc->bb_overrides.dram_clock_change_latency_ns
+ && dc->bb_overrides.dram_clock_change_latency_ns) {
+ dcn3_21_soc.dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000)
+ != dc->bb_overrides.dummy_clock_change_latency_ns
+ && dc->bb_overrides.dummy_clock_change_latency_ns) {
+ dcn3_21_soc.dummy_pstate_latency_us =
+ dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0;
+ }
+
+ /* Override from VBIOS if VBIOS bb_info available */
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info) {
+ struct bp_soc_bb_info bb_info = {0};
+
+ if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
+ if (bb_info.dram_clock_change_latency_100ns > 0)
+ dcn3_21_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10;
+
+ if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dcn3_21_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10;
+
+ if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dcn3_21_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
+ }
+ }
+
+ /* Override from VBIOS for num_chan */
+ if (dc->ctx->dc_bios->vram_info.num_chans)
+ dcn3_21_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
+
+ if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+
+ }
+
+ /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */
+ dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+
+ /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */
+ if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) {
+ if (dc->debug.use_legacy_soc_bb_mechanism) {
+ unsigned int i = 0, j = 0, num_states = 0;
+
+ unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0};
+ unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0};
+ unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0};
+
+ unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {615, 906, 1324, 1564};
+ unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0;
+ unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0;
+
+ for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
+ if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz;
+ if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz;
+ if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz;
+ if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
+ }
+ if (!max_dcfclk_mhz)
+ max_dcfclk_mhz = dcn3_21_soc.clock_limits[0].dcfclk_mhz;
+ if (!max_dispclk_mhz)
+ max_dispclk_mhz = dcn3_21_soc.clock_limits[0].dispclk_mhz;
+ if (!max_dppclk_mhz)
+ max_dppclk_mhz = dcn3_21_soc.clock_limits[0].dppclk_mhz;
+ if (!max_phyclk_mhz)
+ max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz;
+
+ if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
+ dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
+ num_dcfclk_sta_targets++;
+ } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) {
+ // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
+ dcfclk_sta_targets[i] = max_dcfclk_mhz;
+ break;
+ }
+ }
+ // Update size of array since we "removed" duplicates
+ num_dcfclk_sta_targets = i + 1;
+ }
+
+ num_uclk_states = bw_params->clk_table.num_entries;
+
+ // Calculate optimal dcfclk for each uclk
+ for (i = 0; i < num_uclk_states; i++) {
+ dcn321_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
+ &optimal_dcfclk_for_uclk[i], NULL);
+ if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
+ optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
+ }
+ }
+
+ // Calculate optimal uclk for each dcfclk sta target
+ for (i = 0; i < num_dcfclk_sta_targets; i++) {
+ for (j = 0; j < num_uclk_states; j++) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ break;
+ }
+ }
+ }
+
+ i = 0;
+ j = 0;
+ // create the final dcfclk and uclk table
+ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ } else {
+ if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ } else {
+ j = num_uclk_states;
+ }
+ }
+ }
+
+ while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) {
+ dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
+ dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
+ }
+
+ while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES &&
+ optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) {
+ dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j];
+ dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
+ }
+
+ dcn3_21_soc.num_states = num_states;
+ for (i = 0; i < dcn3_21_soc.num_states; i++) {
+ dcn3_21_soc.clock_limits[i].state = i;
+ dcn3_21_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
+ dcn3_21_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
+
+ /* Fill all states with max values of all these clocks */
+ dcn3_21_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
+ dcn3_21_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
+ dcn3_21_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
+ dcn3_21_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3;
+
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (i > 0) {
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_21_soc.clock_limits[i].dtbclk_mhz = dcn3_21_soc.clock_limits[i-1].dtbclk_mhz;
+ } else {
+ dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+ } else if (bw_params->clk_table.entries[i].dtbclk_mhz) {
+ dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_21_soc.clock_limits[i].socclk_mhz = dcn3_21_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_21_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
+
+ if (!dram_speed_mts[i] && i > 0)
+ dcn3_21_soc.clock_limits[i].dram_speed_mts = dcn3_21_soc.clock_limits[i-1].dram_speed_mts;
+ else
+ dcn3_21_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
+
+ /* These clocks cannot come from bw_params, always fill from dcn3_21_soc[0] */
+ /* PHYCLK_D18, PHYCLK_D32 */
+ dcn3_21_soc.clock_limits[i].phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_21_soc.clock_limits[i].phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz;
+ }
+ } else {
+ build_synthetic_soc_states(bw_params, dcn3_21_soc.clock_limits, &dcn3_21_soc.num_states);
+ }
+
+ /* Re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32);
+ if (dc->current_state)
+ dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32);
+ }
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
new file mode 100644
index 000000000000..e8fad9b4be69
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN32_FPU_H__
+#define __DCN32_FPU_H__
+
+#include "dml/display_mode_vba.h"
+
+void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+ unsigned int *num_entries,
+ struct _vcs_dpi_voltage_scaling_st *entry);
+
+void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
index edb9f7567d6d..f394b3f3922a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
@@ -26,7 +26,11 @@
#define __DISPLAY_MODE_ENUMS_H__
enum output_encoder_class {
- dm_dp = 0, dm_hdmi = 1, dm_wb = 2, dm_edp
+ dm_dp = 0,
+ dm_hdmi = 1,
+ dm_wb = 2,
+ dm_edp = 3,
+ dm_dp2p0 = 5,
};
enum output_format_class {
dm_444 = 0, dm_420 = 1, dm_n422, dm_s422
@@ -105,6 +109,10 @@ enum clock_change_support {
dm_dram_clock_change_uninitialized = 0,
dm_dram_clock_change_vactive,
dm_dram_clock_change_vblank,
+ dm_dram_clock_change_vactive_w_mall_full_frame,
+ dm_dram_clock_change_vactive_w_mall_sub_vp,
+ dm_dram_clock_change_vblank_w_mall_full_frame,
+ dm_dram_clock_change_vblank_w_mall_sub_vp,
dm_dram_clock_change_unsupported
};
@@ -169,6 +177,9 @@ enum odm_combine_mode {
dm_odm_combine_mode_disabled,
dm_odm_combine_mode_2to1,
dm_odm_combine_mode_4to1,
+ dm_odm_split_mode_1to2,
+ dm_odm_mode_mso_1to2,
+ dm_odm_mode_mso_1to4
};
enum odm_combine_policy {
@@ -176,11 +187,15 @@ enum odm_combine_policy {
dm_odm_combine_policy_none,
dm_odm_combine_policy_2to1,
dm_odm_combine_policy_4to1,
+ dm_odm_split_policy_1to2,
+ dm_odm_mso_policy_1to2,
+ dm_odm_mso_policy_1to4,
};
enum immediate_flip_requirement {
dm_immediate_flip_not_required,
dm_immediate_flip_required,
+ dm_immediate_flip_opportunistic,
};
enum unbounded_requesting_policy {
@@ -189,4 +204,75 @@ enum unbounded_requesting_policy {
dm_unbounded_requesting_disable
};
+enum dm_rotation_angle {
+ dm_rotation_0,
+ dm_rotation_90,
+ dm_rotation_180,
+ dm_rotation_270,
+ dm_rotation_0m,
+ dm_rotation_90m,
+ dm_rotation_180m,
+ dm_rotation_270m,
+};
+
+enum dm_use_mall_for_pstate_change_mode {
+ dm_use_mall_pstate_change_disable,
+ dm_use_mall_pstate_change_full_frame,
+ dm_use_mall_pstate_change_sub_viewport,
+ dm_use_mall_pstate_change_phantom_pipe
+};
+
+enum dm_use_mall_for_static_screen_mode {
+ dm_use_mall_static_screen_disable,
+ dm_use_mall_static_screen_optimize,
+ dm_use_mall_static_screen_enable,
+};
+
+enum dm_output_link_dp_rate {
+ dm_dp_rate_na,
+ dm_dp_rate_hbr,
+ dm_dp_rate_hbr2,
+ dm_dp_rate_hbr3,
+ dm_dp_rate_uhbr10,
+ dm_dp_rate_uhbr13p5,
+ dm_dp_rate_uhbr20,
+};
+
+enum dm_fclock_change_support {
+ dm_fclock_change_vactive,
+ dm_fclock_change_vblank,
+ dm_fclock_change_unsupported,
+};
+
+enum dm_prefetch_modes {
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible,
+ dm_prefetch_support_uclk_fclk_and_stutter,
+ dm_prefetch_support_fclk_and_stutter,
+ dm_prefetch_support_stutter,
+ dm_prefetch_support_none,
+};
+enum dm_output_type {
+ dm_output_type_unknown,
+ dm_output_type_dp,
+ dm_output_type_edp,
+ dm_output_type_dp2p0,
+ dm_output_type_hdmi,
+ dm_output_type_hdmifrl,
+};
+
+enum dm_output_rate {
+ dm_output_rate_unknown,
+ dm_output_rate_dp_rate_hbr,
+ dm_output_rate_dp_rate_hbr2,
+ dm_output_rate_dp_rate_hbr3,
+ dm_output_rate_dp_rate_uhbr10,
+ dm_output_rate_dp_rate_uhbr13p5,
+ dm_output_rate_dp_rate_uhbr20,
+ dm_output_rate_hdmi_rate_3x3,
+ dm_output_rate_hdmi_rate_6x3,
+ dm_output_rate_hdmi_rate_6x4,
+ dm_output_rate_hdmi_rate_8x4,
+ dm_output_rate_hdmi_rate_10x4,
+ dm_output_rate_hdmi_rate_12x4,
+};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
index 30db51fbd8cd..5d27ff0ebb5f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
@@ -35,6 +35,8 @@
#include "dcn30/display_rq_dlg_calc_30.h"
#include "dcn31/display_mode_vba_31.h"
#include "dcn31/display_rq_dlg_calc_31.h"
+#include "dcn32/display_mode_vba_32.h"
+#include "dcn32/display_rq_dlg_calc_32.h"
#include "dml_logger.h"
const struct dml_funcs dml20_funcs = {
@@ -72,6 +74,13 @@ const struct dml_funcs dml31_funcs = {
.rq_dlg_get_rq_reg = dml31_rq_dlg_get_rq_reg
};
+const struct dml_funcs dml32_funcs = {
+ .validate = dml32_ModeSupportAndSystemConfigurationFull,
+ .recalculate = dml32_recalculate,
+ .rq_dlg_get_dlg_reg_v2 = dml32_rq_dlg_get_dlg_reg,
+ .rq_dlg_get_rq_reg_v2 = dml32_rq_dlg_get_rq_reg
+};
+
void dml_init_instance(struct display_mode_lib *lib,
const struct _vcs_dpi_soc_bounding_box_st *soc_bb,
const struct _vcs_dpi_ip_params_st *ip_params,
@@ -98,6 +107,9 @@ void dml_init_instance(struct display_mode_lib *lib,
case DML_PROJECT_DCN31_FPGA:
lib->funcs = dml31_funcs;
break;
+ case DML_PROJECT_DCN32:
+ lib->funcs = dml32_funcs;
+ break;
default:
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
index d76251fd1566..2bdd6ed22611 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
@@ -41,6 +41,7 @@ enum dml_project {
DML_PROJECT_DCN30,
DML_PROJECT_DCN31,
DML_PROJECT_DCN31_FPGA,
+ DML_PROJECT_DCN32,
};
struct display_mode_lib;
@@ -62,6 +63,20 @@ struct dml_funcs {
struct display_mode_lib *mode_lib,
display_rq_regs_st *rq_regs,
const display_pipe_params_st *pipe_param);
+ // DLG interfaces have different function parameters in DCN32.
+ // Create new function pointers to address the changes
+ void (*rq_dlg_get_dlg_reg_v2)(
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+ display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
+ void (*rq_dlg_get_rq_reg_v2)(display_rq_regs_st *rq_regs,
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx);
void (*recalculate)(struct display_mode_lib *mode_lib);
void (*validate)(struct display_mode_lib *mode_lib);
};
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index 2df660cd8801..e8b094006d95 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -54,12 +54,102 @@ typedef struct _vcs_dpi_display_rq_regs_st display_rq_regs_st;
typedef struct _vcs_dpi_display_dlg_sys_params_st display_dlg_sys_params_st;
typedef struct _vcs_dpi_display_arb_params_st display_arb_params_st;
+typedef struct {
+ double UrgentWatermark;
+ double WritebackUrgentWatermark;
+ double DRAMClockChangeWatermark;
+ double FCLKChangeWatermark;
+ double WritebackDRAMClockChangeWatermark;
+ double WritebackFCLKChangeWatermark;
+ double StutterExitWatermark;
+ double StutterEnterPlusExitWatermark;
+ double Z8StutterExitWatermark;
+ double Z8StutterEnterPlusExitWatermark;
+ double USRRetrainingWatermark;
+} Watermarks;
+
+typedef struct {
+ double UrgentLatency;
+ double ExtraLatency;
+ double WritebackLatency;
+ double DRAMClockChangeLatency;
+ double FCLKChangeLatency;
+ double SRExitTime;
+ double SREnterPlusExitTime;
+ double SRExitZ8Time;
+ double SREnterPlusExitZ8Time;
+ double USRRetrainingLatencyPlusSMNLatency;
+} Latencies;
+
+typedef struct {
+ double Dppclk;
+ double Dispclk;
+ double PixelClock;
+ double DCFClkDeepSleep;
+ unsigned int DPPPerSurface;
+ bool ScalerEnabled;
+ enum dm_rotation_angle SourceRotation;
+ unsigned int ViewportHeight;
+ unsigned int ViewportHeightChroma;
+ unsigned int BlockWidth256BytesY;
+ unsigned int BlockHeight256BytesY;
+ unsigned int BlockWidth256BytesC;
+ unsigned int BlockHeight256BytesC;
+ unsigned int BlockWidthY;
+ unsigned int BlockHeightY;
+ unsigned int BlockWidthC;
+ unsigned int BlockHeightC;
+ unsigned int InterlaceEnable;
+ unsigned int NumberOfCursors;
+ unsigned int VBlank;
+ unsigned int HTotal;
+ unsigned int HActive;
+ bool DCCEnable;
+ enum odm_combine_mode ODMMode;
+ enum source_format_class SourcePixelFormat;
+ enum dm_swizzle_mode SurfaceTiling;
+ unsigned int BytePerPixelY;
+ unsigned int BytePerPixelC;
+ bool ProgressiveToInterlaceUnitInOPP;
+ double VRatio;
+ double VRatioChroma;
+ unsigned int VTaps;
+ unsigned int VTapsChroma;
+ unsigned int PitchY;
+ unsigned int DCCMetaPitchY;
+ unsigned int PitchC;
+ unsigned int DCCMetaPitchC;
+ bool ViewportStationary;
+ unsigned int ViewportXStart;
+ unsigned int ViewportYStart;
+ unsigned int ViewportXStartC;
+ unsigned int ViewportYStartC;
+ bool FORCE_ONE_ROW_FOR_FRAME;
+ unsigned int SwathHeightY;
+ unsigned int SwathHeightC;
+} DmlPipe;
+
+typedef struct {
+ double UrgentLatency;
+ double ExtraLatency;
+ double WritebackLatency;
+ double DRAMClockChangeLatency;
+ double FCLKChangeLatency;
+ double SRExitTime;
+ double SREnterPlusExitTime;
+ double SRExitZ8Time;
+ double SREnterPlusExitZ8Time;
+ double USRRetrainingLatency;
+ double SMNLatency;
+} SOCParametersList;
+
struct _vcs_dpi_voltage_scaling_st {
int state;
double dscclk_mhz;
double dcfclk_mhz;
double socclk_mhz;
double phyclk_d18_mhz;
+ double phyclk_d32_mhz;
double dram_speed_mts;
double fabricclk_mhz;
double dispclk_mhz;
@@ -71,6 +161,11 @@ struct _vcs_dpi_voltage_scaling_st {
struct _vcs_dpi_soc_bounding_box_st {
struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
+ /*
+ * This is a temporary stash for updating @clock_limits with the PMFW
+ * clock table. Do not use outside of *update_bw_boudning_box functions.
+ */
+ struct _vcs_dpi_voltage_scaling_st _clock_tmp[DC__VOLTAGE_STATES];
unsigned int num_states;
double sr_exit_time_us;
double sr_enter_plus_exit_time_us;
@@ -80,6 +175,16 @@ struct _vcs_dpi_soc_bounding_box_st {
double urgent_latency_pixel_data_only_us;
double urgent_latency_pixel_mixed_with_vm_data_us;
double urgent_latency_vm_data_only_us;
+ double usr_retraining_latency_us;
+ double smn_latency_us;
+ double fclk_change_latency_us;
+ double mall_allocated_for_dcn_mbytes;
+ double pct_ideal_fabric_bw_after_urgent;
+ double pct_ideal_dram_bw_after_urgent_strobe;
+ double max_avg_fabric_bw_use_normal_percent;
+ double max_avg_dram_bw_use_normal_strobe_percent;
+ enum dm_prefetch_modes allow_for_pstate_or_stutter_in_vblank_final;
+ bool dram_clock_change_requirement_final;
double writeback_latency_us;
double ideal_dram_bw_after_urgent_percent;
double pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
@@ -148,6 +253,9 @@ struct _vcs_dpi_ip_params_st {
unsigned int dpp_output_buffer_pixels;
unsigned int opp_output_buffer_lines;
unsigned int pixel_chunk_size_kbytes;
+ unsigned int alpha_pixel_chunk_size_kbytes;
+ unsigned int min_pixel_chunk_size_bytes;
+ unsigned int dcc_meta_buffer_size_bytes;
unsigned char pte_enable;
unsigned int pte_chunk_size_kbytes;
unsigned int meta_chunk_size_kbytes;
@@ -168,6 +276,7 @@ struct _vcs_dpi_ip_params_st {
double writeback_min_hscl_ratio;
double writeback_min_vscl_ratio;
unsigned int maximum_dsc_bits_per_component;
+ unsigned int maximum_pixels_per_line_per_dsc_unit;
unsigned int writeback_max_hscl_taps;
unsigned int writeback_max_vscl_taps;
unsigned int writeback_line_buffer_luma_buffer_size;
@@ -224,6 +333,9 @@ struct _vcs_dpi_ip_params_st {
unsigned int can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one;
unsigned int bug_forcing_LC_req_same_size_fixed;
unsigned int number_of_cursors;
+ unsigned int max_num_dp2p0_outputs;
+ unsigned int max_num_dp2p0_streams;
+ unsigned int VBlankNomDefaultUS;
};
struct _vcs_dpi_display_xfc_params_st {
@@ -250,6 +362,8 @@ struct _vcs_dpi_display_pipe_source_params_st {
bool hostvm_levels_force_en;
unsigned int hostvm_levels_force;
int source_scan;
+ int source_rotation; // new in dml32
+ unsigned int det_size_override; // use to populate DETSizeOverride in vba struct
int sw_mode;
int macro_tile_size;
unsigned int surface_width_y;
@@ -264,6 +378,15 @@ struct _vcs_dpi_display_pipe_source_params_st {
unsigned int viewport_height_c;
unsigned int viewport_width_max;
unsigned int viewport_height_max;
+ unsigned int viewport_x_y;
+ unsigned int viewport_x_c;
+ bool viewport_stationary;
+ unsigned int dcc_rate_luma;
+ unsigned int gpuvm_min_page_size_kbytes;
+ unsigned int use_mall_for_pstate_change;
+ unsigned int use_mall_for_static_screen;
+ bool force_one_row_for_frame;
+ bool pte_buffer_mode;
unsigned int data_pitch;
unsigned int data_pitch_c;
unsigned int meta_pitch;
@@ -296,10 +419,17 @@ struct writeback_st {
int wb_vtaps_luma;
int wb_htaps_chroma;
int wb_vtaps_chroma;
+ unsigned int wb_htaps;
+ unsigned int wb_vtaps;
double wb_hratio;
double wb_vratio;
};
+struct display_audio_params_st {
+ unsigned int audio_sample_rate_khz;
+ int audio_sample_layout;
+};
+
struct _vcs_dpi_display_output_params_st {
int dp_lanes;
double output_bpp;
@@ -313,6 +443,11 @@ struct _vcs_dpi_display_output_params_st {
int dsc_slices;
int max_audio_sample_rate;
struct writeback_st wb;
+ struct display_audio_params_st audio;
+ unsigned int output_bpc;
+ int dp_rate;
+ unsigned int dp_multistream_id;
+ bool dp_multistream_en;
};
struct _vcs_dpi_scaler_ratio_depth_st {
@@ -361,6 +496,10 @@ struct _vcs_dpi_display_pipe_dest_params_st {
unsigned char use_maximum_vstartup;
unsigned int vtotal_max;
unsigned int vtotal_min;
+ unsigned int refresh_rate;
+ bool synchronize_timings;
+ unsigned int odm_combine_policy;
+ bool drr_display;
};
struct _vcs_dpi_display_pipe_params_st {
@@ -558,6 +697,9 @@ struct _vcs_dpi_display_arb_params_st {
int max_req_outstanding;
int min_req_outstanding;
int sat_level_us;
+ int hvm_min_req_outstand_commit_threshold;
+ int hvm_max_qos_commit_threshold;
+ int compbuf_reserved_space_kbytes;
};
#endif /*__DISPLAY_MODE_STRUCTS_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index c0740dbdcc2e..503e7d984ff0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -111,6 +111,22 @@ dml_get_attr_func(tcalc, mode_lib->vba.TCalc);
dml_get_attr_func(fraction_of_urgent_bandwidth, mode_lib->vba.FractionOfUrgentBandwidth);
dml_get_attr_func(fraction_of_urgent_bandwidth_imm_flip, mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip);
+
+dml_get_attr_func(cstate_max_cap_mode, mode_lib->vba.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+dml_get_attr_func(comp_buffer_size_kbytes, mode_lib->vba.CompressedBufferSizeInkByte);
+dml_get_attr_func(pixel_chunk_size_in_kbyte, mode_lib->vba.PixelChunkSizeInKByte);
+dml_get_attr_func(alpha_pixel_chunk_size_in_kbyte, mode_lib->vba.AlphaPixelChunkSizeInKByte);
+dml_get_attr_func(meta_chunk_size_in_kbyte, mode_lib->vba.MetaChunkSize);
+dml_get_attr_func(min_pixel_chunk_size_in_byte, mode_lib->vba.MinPixelChunkSizeBytes);
+dml_get_attr_func(min_meta_chunk_size_in_byte, mode_lib->vba.MinMetaChunkSizeBytes);
+dml_get_attr_func(fclk_watermark, mode_lib->vba.Watermark.FCLKChangeWatermark);
+dml_get_attr_func(usr_retraining_watermark, mode_lib->vba.Watermark.USRRetrainingWatermark);
+
+dml_get_attr_func(comp_buffer_reserved_space_kbytes, mode_lib->vba.CompBufReservedSpaceKBytes);
+dml_get_attr_func(comp_buffer_reserved_space_64bytes, mode_lib->vba.CompBufReservedSpace64B);
+dml_get_attr_func(comp_buffer_reserved_space_zs, mode_lib->vba.CompBufReservedSpaceZs);
+dml_get_attr_func(unbounded_request_enabled, mode_lib->vba.UnboundedRequestEnabled);
+
#define dml_get_pipe_attr_func(attr, var) double get_##attr(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes, unsigned int which_pipe) \
{\
unsigned int which_plane; \
@@ -165,6 +181,27 @@ dml_get_pipe_attr_func(vupdate_width, mode_lib->vba.VUpdateWidthPix);
dml_get_pipe_attr_func(vready_offset, mode_lib->vba.VReadyOffsetPix);
dml_get_pipe_attr_func(vready_at_or_after_vsync, mode_lib->vba.VREADY_AT_OR_AFTER_VSYNC);
dml_get_pipe_attr_func(min_dst_y_next_start, mode_lib->vba.MIN_DST_Y_NEXT_START);
+dml_get_pipe_attr_func(dst_y_per_pte_row_nom_l, mode_lib->vba.DST_Y_PER_PTE_ROW_NOM_L);
+dml_get_pipe_attr_func(dst_y_per_pte_row_nom_c, mode_lib->vba.DST_Y_PER_PTE_ROW_NOM_C);
+dml_get_pipe_attr_func(dst_y_per_meta_row_nom_l, mode_lib->vba.DST_Y_PER_META_ROW_NOM_L);
+dml_get_pipe_attr_func(dst_y_per_meta_row_nom_c, mode_lib->vba.DST_Y_PER_META_ROW_NOM_C);
+dml_get_pipe_attr_func(refcyc_per_pte_group_nom_l_in_us, mode_lib->vba.time_per_pte_group_nom_luma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_nom_c_in_us, mode_lib->vba.time_per_pte_group_nom_chroma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_vblank_l_in_us, mode_lib->vba.time_per_pte_group_vblank_luma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_vblank_c_in_us, mode_lib->vba.time_per_pte_group_vblank_chroma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_flip_l_in_us, mode_lib->vba.time_per_pte_group_flip_luma);
+dml_get_pipe_attr_func(refcyc_per_pte_group_flip_c_in_us, mode_lib->vba.time_per_pte_group_flip_chroma);
+dml_get_pipe_attr_func(vstartup_calculated, mode_lib->vba.VStartup);
+dml_get_pipe_attr_func(dpte_row_height_linear_c, mode_lib->vba.dpte_row_height_linear_chroma);
+dml_get_pipe_attr_func(swath_height_l, mode_lib->vba.SwathHeightY);
+dml_get_pipe_attr_func(swath_height_c, mode_lib->vba.SwathHeightC);
+dml_get_pipe_attr_func(det_stored_buffer_size_l_bytes, mode_lib->vba.DETBufferSizeY);
+dml_get_pipe_attr_func(det_stored_buffer_size_c_bytes, mode_lib->vba.DETBufferSizeC);
+dml_get_pipe_attr_func(dpte_group_size_in_bytes, mode_lib->vba.dpte_group_bytes);
+dml_get_pipe_attr_func(vm_group_size_in_bytes, mode_lib->vba.vm_group_bytes);
+dml_get_pipe_attr_func(dpte_row_height_linear_l, mode_lib->vba.dpte_row_height_linear);
+dml_get_pipe_attr_func(pte_buffer_mode, mode_lib->vba.PTE_BUFFER_MODE);
+dml_get_pipe_attr_func(subviewport_lines_needed_in_mall, mode_lib->vba.SubViewportLinesNeededInMALL);
double get_total_immediate_flip_bytes(
struct display_mode_lib *mode_lib,
@@ -202,6 +239,67 @@ double get_total_prefetch_bw(
return total_prefetch_bw;
}
+unsigned int get_total_surface_size_in_mall_bytes(
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes)
+{
+ unsigned int k;
+ unsigned int size = 0.0;
+ recalculate_params(mode_lib, pipes, num_pipes);
+ for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
+ size += mode_lib->vba.SurfaceSizeInMALL[k];
+ return size;
+}
+
+static unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx)
+{
+ int pipe_idx = -1;
+ int i;
+
+ ASSERT(plane_idx < DC__NUM_DPP__MAX);
+
+ for (i = 0; i < DC__NUM_DPP__MAX ; i++) {
+ if (plane_idx == mode_lib->vba.pipe_plane[i]) {
+ pipe_idx = i;
+ break;
+ }
+ }
+ ASSERT(pipe_idx >= 0);
+
+ return pipe_idx;
+}
+
+
+double get_det_buffer_size_kbytes(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes, unsigned int pipe_idx)
+{
+ unsigned int plane_idx;
+ double det_buf_size_kbytes;
+
+ recalculate_params(mode_lib, pipes, num_pipes);
+ plane_idx = mode_lib->vba.pipe_plane[pipe_idx];
+
+ dml_print("DML::%s: num_pipes=%d pipe_idx=%d plane_idx=%0d\n", __func__, num_pipes, pipe_idx, plane_idx);
+ det_buf_size_kbytes = mode_lib->vba.DETBufferSizeInKByte[plane_idx]; // per hubp DET buffer size
+
+ dml_print("DML::%s: det_buf_size_kbytes=%3.2f\n", __func__, det_buf_size_kbytes);
+
+ return det_buf_size_kbytes;
+}
+
+bool get_is_phantom_pipe(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes, unsigned int pipe_idx)
+{
+ unsigned int plane_idx;
+
+ recalculate_params(mode_lib, pipes, num_pipes);
+ plane_idx = mode_lib->vba.pipe_plane[pipe_idx];
+ dml_print("DML::%s: num_pipes=%d pipe_idx=%d UseMALLForPStateChange=%0d\n", __func__, num_pipes, pipe_idx,
+ mode_lib->vba.UsesMALLForPStateChange[plane_idx]);
+ return (mode_lib->vba.UsesMALLForPStateChange[plane_idx] == dm_use_mall_pstate_change_phantom_pipe);
+}
+
static void fetch_socbb_params(struct display_mode_lib *mode_lib)
{
soc_bounding_box_st *soc = &mode_lib->vba.soc;
@@ -241,6 +339,21 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib)
soc->max_avg_sdp_bw_use_normal_percent;
mode_lib->vba.SRExitZ8Time = soc->sr_exit_z8_time_us;
mode_lib->vba.SREnterPlusExitZ8Time = soc->sr_enter_plus_exit_z8_time_us;
+ mode_lib->vba.FCLKChangeLatency = soc->fclk_change_latency_us;
+ mode_lib->vba.USRRetrainingLatency = soc->usr_retraining_latency_us;
+ mode_lib->vba.SMNLatency = soc->smn_latency_us;
+ mode_lib->vba.MALLAllocatedForDCNFinal = soc->mall_allocated_for_dcn_mbytes;
+
+ mode_lib->vba.PercentOfIdealDRAMBWReceivedAfterUrgLatencySTROBE = soc->pct_ideal_dram_bw_after_urgent_strobe;
+ mode_lib->vba.MaxAveragePercentOfIdealFabricBWDisplayCanUseInNormalSystemOperation =
+ soc->max_avg_fabric_bw_use_normal_percent;
+ mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE =
+ soc->max_avg_dram_bw_use_normal_strobe_percent;
+
+ mode_lib->vba.DRAMClockChangeRequirementFinal = soc->dram_clock_change_requirement_final;
+ mode_lib->vba.FCLKChangeRequirementFinal = 1;
+ mode_lib->vba.USRRetrainingRequiredFinal = 1;
+ mode_lib->vba.AllowForPStateChangeOrStutterInVBlankFinal = soc->allow_for_pstate_or_stutter_in_vblank_final;
mode_lib->vba.DRAMClockChangeLatency = soc->dram_clock_change_latency_us;
mode_lib->vba.DummyPStateCheck = soc->dram_clock_change_latency_us == soc->dummy_pstate_latency_us;
mode_lib->vba.DRAMClockChangeSupportsVActive = !soc->disable_dram_clock_change_vactive_support ||
@@ -283,6 +396,7 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib)
mode_lib->vba.SOCCLKPerState[i] = soc->clock_limits[i].socclk_mhz;
mode_lib->vba.PHYCLKPerState[i] = soc->clock_limits[i].phyclk_mhz;
mode_lib->vba.PHYCLKD18PerState[i] = soc->clock_limits[i].phyclk_d18_mhz;
+ mode_lib->vba.PHYCLKD32PerState[i] = soc->clock_limits[i].phyclk_d32_mhz;
mode_lib->vba.MaxDppclk[i] = soc->clock_limits[i].dppclk_mhz;
mode_lib->vba.MaxDSCCLK[i] = soc->clock_limits[i].dscclk_mhz;
mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mts;
@@ -325,6 +439,18 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib)
mode_lib->vba.COMPBUF_RESERVED_SPACE_ZS = ip->compbuf_reserved_space_zs;
mode_lib->vba.MaximumDSCBitsPerComponent = ip->maximum_dsc_bits_per_component;
mode_lib->vba.DSC422NativeSupport = ip->dsc422_native_support;
+ /* In DCN3.2, nomDETInKByte should be initialized correctly. */
+ mode_lib->vba.nomDETInKByte = ip->det_buffer_size_kbytes;
+ mode_lib->vba.CompbufReservedSpace64B = ip->compbuf_reserved_space_64b;
+ mode_lib->vba.CompbufReservedSpaceZs = ip->compbuf_reserved_space_zs;
+ mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal = ip->compressed_buffer_segment_size_in_kbytes;
+ mode_lib->vba.LineBufferSizeFinal = ip->line_buffer_size_bits;
+ mode_lib->vba.AlphaPixelChunkSizeInKByte = ip->alpha_pixel_chunk_size_kbytes; // not ysed
+ mode_lib->vba.MinPixelChunkSizeBytes = ip->min_pixel_chunk_size_bytes; // not used
+ mode_lib->vba.MaximumPixelsPerLinePerDSCUnit = ip->maximum_pixels_per_line_per_dsc_unit;
+ mode_lib->vba.MaxNumDP2p0Outputs = ip->max_num_dp2p0_outputs;
+ mode_lib->vba.MaxNumDP2p0Streams = ip->max_num_dp2p0_streams;
+ mode_lib->vba.DCCMetaBufferSizeBytes = ip->dcc_meta_buffer_size_bytes;
mode_lib->vba.PixelChunkSizeInKByte = ip->pixel_chunk_size_kbytes;
mode_lib->vba.MetaChunkSize = ip->meta_chunk_size_kbytes;
@@ -399,6 +525,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
visited[k] = false;
mode_lib->vba.NumberOfActivePlanes = 0;
+ mode_lib->vba.NumberOfActiveSurfaces = 0;
mode_lib->vba.ImmediateFlipSupport = false;
for (j = 0; j < mode_lib->vba.cache_num_pipes; ++j) {
display_pipe_source_params_st *src = &pipes[j].pipe.src;
@@ -429,6 +556,22 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
src->viewport_y_y;
mode_lib->vba.ViewportYStartC[mode_lib->vba.NumberOfActivePlanes] =
src->viewport_y_c;
+ mode_lib->vba.SourceRotation[mode_lib->vba.NumberOfActiveSurfaces] = src->source_rotation;
+ mode_lib->vba.ViewportXStartY[mode_lib->vba.NumberOfActiveSurfaces] = src->viewport_x_y;
+ mode_lib->vba.ViewportXStartC[mode_lib->vba.NumberOfActiveSurfaces] = src->viewport_x_c;
+ // TODO: Assign correct value to viewport_stationary
+ mode_lib->vba.ViewportStationary[mode_lib->vba.NumberOfActivePlanes] =
+ src->viewport_stationary;
+ mode_lib->vba.UsesMALLForPStateChange[mode_lib->vba.NumberOfActivePlanes] = src->use_mall_for_pstate_change;
+ mode_lib->vba.UseMALLForStaticScreen[mode_lib->vba.NumberOfActivePlanes] = src->use_mall_for_static_screen;
+ mode_lib->vba.GPUVMMinPageSizeKBytes[mode_lib->vba.NumberOfActivePlanes] = src->gpuvm_min_page_size_kbytes;
+ mode_lib->vba.RefreshRate[mode_lib->vba.NumberOfActivePlanes] = dst->refresh_rate; //todo remove this
+ mode_lib->vba.OutputLinkDPRate[mode_lib->vba.NumberOfActivePlanes] = dout->dp_rate;
+ mode_lib->vba.ODMUse[mode_lib->vba.NumberOfActivePlanes] = dst->odm_combine_policy;
+ mode_lib->vba.DETSizeOverride[mode_lib->vba.NumberOfActivePlanes] = src->det_size_override;
+ //TODO: Need to assign correct values to dp_multistream vars
+ mode_lib->vba.OutputMultistreamEn[mode_lib->vba.NumberOfActiveSurfaces] = dout->dp_multistream_en;
+ mode_lib->vba.OutputMultistreamId[mode_lib->vba.NumberOfActiveSurfaces] = dout->dp_multistream_id;
mode_lib->vba.PitchY[mode_lib->vba.NumberOfActivePlanes] = src->data_pitch;
mode_lib->vba.SurfaceWidthY[mode_lib->vba.NumberOfActivePlanes] = src->surface_width_y;
mode_lib->vba.SurfaceHeightY[mode_lib->vba.NumberOfActivePlanes] = src->surface_height_y;
@@ -555,6 +698,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
mode_lib->vba.PixelClock[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz;
mode_lib->vba.PixelClockBackEnd[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz;
mode_lib->vba.DPPCLK[mode_lib->vba.NumberOfActivePlanes] = clks->dppclk_mhz;
+ mode_lib->vba.DRRDisplay[mode_lib->vba.NumberOfActiveSurfaces] = dst->drr_display;
if (ip->is_line_buffer_bpp_fixed)
mode_lib->vba.LBBitPerPixel[mode_lib->vba.NumberOfActivePlanes] =
ip->line_buffer_fixed_bpp;
@@ -677,6 +821,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
}
mode_lib->vba.NumberOfActivePlanes++;
+ mode_lib->vba.NumberOfActiveSurfaces++;
}
// handle overlays through BlendingAndTiming
@@ -702,6 +847,11 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
}
}
+ mode_lib->vba.SynchronizeTimingsFinal = pipes[0].pipe.dest.synchronize_timings;
+ mode_lib->vba.DCCProgrammingAssumesScanDirectionUnknownFinal = false;
+
+ mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = 0;
+
mode_lib->vba.UseUnboundedRequesting = dm_unbounded_requesting;
for (k = 0; k < mode_lib->vba.cache_num_pipes; ++k) {
if (pipes[k].pipe.src.unbounded_req_mode == 0)
@@ -745,6 +895,32 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
mode_lib->vba.GPUVMEnable = mode_lib->vba.GPUVMEnable && !!ip->gpuvm_enable;
mode_lib->vba.HostVMEnable = mode_lib->vba.HostVMEnable && !!ip->hostvm_enable;
+
+ for (k = 0; k < mode_lib->vba.cache_num_pipes; ++k) {
+ mode_lib->vba.ForceOneRowForFrame[k] = pipes[k].pipe.src.force_one_row_for_frame;
+ mode_lib->vba.PteBufferMode[k] = pipes[k].pipe.src.pte_buffer_mode;
+
+ if (mode_lib->vba.PteBufferMode[k] == 0 && mode_lib->vba.GPUVMEnable) {
+ if (mode_lib->vba.ForceOneRowForFrame[k] ||
+ (mode_lib->vba.GPUVMMinPageSizeKBytes[k] > 64*1024) ||
+ (mode_lib->vba.UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_disable) ||
+ (mode_lib->vba.UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable)) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ERROR: Invalid PteBufferMode=%d for plane %0d!\n",
+ __func__, mode_lib->vba.PteBufferMode[k], k);
+ dml_print("DML::%s: - ForceOneRowForFrame = %d\n",
+ __func__, mode_lib->vba.ForceOneRowForFrame[k]);
+ dml_print("DML::%s: - GPUVMMinPageSizeKBytes = %d\n",
+ __func__, mode_lib->vba.GPUVMMinPageSizeKBytes[k]);
+ dml_print("DML::%s: - UseMALLForPStateChange = %d\n",
+ __func__, (int) mode_lib->vba.UsesMALLForPStateChange[k]);
+ dml_print("DML::%s: - UseMALLForStaticScreen = %d\n",
+ __func__, (int) mode_lib->vba.UseMALLForStaticScreen[k]);
+#endif
+ ASSERT(0);
+ }
+ }
+ }
}
/**
@@ -792,7 +968,7 @@ static void recalculate_params(
}
}
-bool Calculate256BBlockSizes(
+void Calculate256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int BytePerPixelY,
@@ -830,7 +1006,6 @@ bool Calculate256BBlockSizes(
*BlockWidth256BytesY = 256 / BytePerPixelY / *BlockHeight256BytesY;
*BlockWidth256BytesC = 256 / BytePerPixelC / *BlockHeight256BytesC;
}
- return true;
}
bool CalculateMinAndMaxPrefetchMode(
@@ -896,6 +1071,7 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib)
soc_bounding_box_st *soc = &mode_lib->vba.soc;
unsigned int k;
unsigned int total_pipes = 0;
+ unsigned int pipe_idx = 0;
mode_lib->vba.VoltageLevel = mode_lib->vba.cache_pipes[0].clks_cfg.voltage;
mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb];
@@ -917,6 +1093,11 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib)
// Total Available Pipes Support Check
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
total_pipes += mode_lib->vba.DPPPerPlane[k];
+ pipe_idx = get_pipe_idx(mode_lib, k);
+ if (mode_lib->vba.cache_pipes[pipe_idx].clks_cfg.dppclk_mhz > 0.0)
+ mode_lib->vba.DPPCLK[k] = mode_lib->vba.cache_pipes[pipe_idx].clks_cfg.dppclk_mhz;
+ else
+ mode_lib->vba.DPPCLK[k] = soc->clock_limits[mode_lib->vba.VoltageLevel].dppclk_mhz;
}
ASSERT(total_pipes <= DC__NUM_DPP__MAX);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 0603b32971a6..8460aefe7b6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -58,6 +58,19 @@ dml_get_attr_decl(return_bw);
dml_get_attr_decl(tcalc);
dml_get_attr_decl(fraction_of_urgent_bandwidth);
dml_get_attr_decl(fraction_of_urgent_bandwidth_imm_flip);
+dml_get_attr_decl(cstate_max_cap_mode);
+dml_get_attr_decl(comp_buffer_size_kbytes);
+dml_get_attr_decl(pixel_chunk_size_in_kbyte);
+dml_get_attr_decl(alpha_pixel_chunk_size_in_kbyte);
+dml_get_attr_decl(meta_chunk_size_in_kbyte);
+dml_get_attr_decl(min_pixel_chunk_size_in_byte);
+dml_get_attr_decl(min_meta_chunk_size_in_byte);
+dml_get_attr_decl(fclk_watermark);
+dml_get_attr_decl(usr_retraining_watermark);
+dml_get_attr_decl(comp_buffer_reserved_space_kbytes);
+dml_get_attr_decl(comp_buffer_reserved_space_64bytes);
+dml_get_attr_decl(comp_buffer_reserved_space_zs);
+dml_get_attr_decl(unbounded_request_enabled);
#define dml_get_pipe_attr_decl(attr) double get_##attr(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes, unsigned int which_pipe)
@@ -75,6 +88,26 @@ dml_get_pipe_attr_decl(dst_y_per_row_vblank);
dml_get_pipe_attr_decl(dst_y_prefetch);
dml_get_pipe_attr_decl(dst_y_per_vm_flip);
dml_get_pipe_attr_decl(dst_y_per_row_flip);
+dml_get_pipe_attr_decl(dst_y_per_pte_row_nom_l);
+dml_get_pipe_attr_decl(dst_y_per_pte_row_nom_c);
+dml_get_pipe_attr_decl(dst_y_per_meta_row_nom_l);
+dml_get_pipe_attr_decl(dst_y_per_meta_row_nom_c);
+dml_get_pipe_attr_decl(dpte_row_height_linear_c);
+dml_get_pipe_attr_decl(swath_height_l);
+dml_get_pipe_attr_decl(swath_height_c);
+dml_get_pipe_attr_decl(det_stored_buffer_size_l_bytes);
+dml_get_pipe_attr_decl(det_stored_buffer_size_c_bytes);
+dml_get_pipe_attr_decl(dpte_group_size_in_bytes);
+dml_get_pipe_attr_decl(vm_group_size_in_bytes);
+dml_get_pipe_attr_decl(det_buffer_size_kbytes);
+dml_get_pipe_attr_decl(dpte_row_height_linear_l);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_nom_l_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_nom_c_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_vblank_l_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_vblank_c_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_flip_l_in_us);
+dml_get_pipe_attr_decl(refcyc_per_pte_group_flip_c_in_us);
+dml_get_pipe_attr_decl(pte_buffer_mode);
dml_get_pipe_attr_decl(refcyc_per_vm_group_vblank);
dml_get_pipe_attr_decl(refcyc_per_vm_group_flip);
dml_get_pipe_attr_decl(refcyc_per_vm_req_vblank);
@@ -108,6 +141,8 @@ dml_get_pipe_attr_decl(vupdate_width);
dml_get_pipe_attr_decl(vready_offset);
dml_get_pipe_attr_decl(vready_at_or_after_vsync);
dml_get_pipe_attr_decl(min_dst_y_next_start);
+dml_get_pipe_attr_decl(vstartup_calculated);
+dml_get_pipe_attr_decl(subviewport_lines_needed_in_mall);
double get_total_immediate_flip_bytes(
struct display_mode_lib *mode_lib,
@@ -126,9 +161,18 @@ unsigned int dml_get_voltage_level(
const display_e2e_pipe_params_st *pipes,
unsigned int num_pipes);
+unsigned int get_total_surface_size_in_mall_bytes(
+ struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes);
+
+bool get_is_phantom_pipe(struct display_mode_lib *mode_lib,
+ const display_e2e_pipe_params_st *pipes,
+ unsigned int num_pipes,
+ unsigned int pipe_idx);
void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct display_mode_lib *mode_lib);
-bool Calculate256BBlockSizes(
+void Calculate256BBlockSizes(
enum source_format_class SourcePixelFormat,
enum dm_swizzle_mode SurfaceTiling,
unsigned int BytePerPixelY,
@@ -138,6 +182,185 @@ bool Calculate256BBlockSizes(
unsigned int *BlockWidth256BytesY,
unsigned int *BlockWidth256BytesC);
+struct dml32_CalculateSwathAndDETConfiguration {
+ unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
+ unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpSwathSizeBytesY;
+ unsigned int RoundedUpSwathSizeBytesC;
+ double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
+ double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
+ unsigned int TotalActiveDPP;
+ bool NoChromaSurfaces;
+ unsigned int DETBufferSizeInKByteForSwathCalculation;
+};
+
+struct dml32_CalculateVMRowAndSwath {
+ unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
+ unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
+ unsigned int PDEAndMetaPTEBytesFrameY;
+ unsigned int PDEAndMetaPTEBytesFrameC;
+ unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
+ unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
+ bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
+};
+
+struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport {
+ unsigned int SurfaceWithMinActiveFCLKChangeMargin;
+ unsigned int DRAMClockChangeSupportNumber;
+ unsigned int LastSurfaceWithoutMargin;
+ unsigned int DRAMClockChangeMethod;
+ bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin;
+ double MinActiveFCLKChangeMargin;
+ double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank;
+ double ActiveClockChangeLatencyHidingY;
+ double ActiveClockChangeLatencyHidingC;
+ double ActiveClockChangeLatencyHiding;
+ double EffectiveDETBufferSizeY;
+ double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
+ double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
+ double TotalPixelBW;
+ bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
+ double EffectiveLBLatencyHidingY;
+ double EffectiveLBLatencyHidingC;
+ double LinesInDETY[DC__NUM_DPP__MAX];
+ double LinesInDETC[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
+ double FullDETBufferingTimeY;
+ double FullDETBufferingTimeC;
+ double WritebackDRAMClockChangeLatencyMargin;
+ double WritebackFCLKChangeLatencyMargin;
+ double WritebackLatencyHiding;
+ bool SameTimingForFCLKChange;
+ unsigned int TotalActiveWriteback;
+ unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
+ unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
+};
+
+struct dml32_CalculatePrefetchSchedule {
+ unsigned int DPPCycles, DISPCLKCycles;
+ double DSTTotalPixelsAfterScaler;
+ double LineTime;
+ double dst_y_prefetch_equ;
+ double prefetch_bw_oto;
+ double Tvm_oto;
+ double Tr0_oto;
+ double Tvm_oto_lines;
+ double Tr0_oto_lines;
+ double dst_y_prefetch_oto;
+ double TimeForFetchingMetaPTE;
+ double TimeForFetchingRowInVBlank;
+ double LinesToRequestPrefetchPixelData;
+ unsigned int HostVMDynamicLevelsTrips;
+ double trip_to_mem;
+ double Tvm_trips;
+ double Tr0_trips;
+ double Tvm_trips_rounded;
+ double Tr0_trips_rounded;
+ double Lsw_oto;
+ double Tpre_rounded;
+ double prefetch_bw_equ;
+ double Tvm_equ;
+ double Tr0_equ;
+ double Tdmbf;
+ double Tdmec;
+ double Tdmsks;
+ double prefetch_sw_bytes;
+ double bytes_pp;
+ double dep_bytes;
+ unsigned int max_vratio_pre;
+ double min_Lsw;
+ double Tsw_est1;
+ double Tsw_est3;
+};
+
+struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation {
+ unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX];
+ double dummy_single_array[2][DC__NUM_DPP__MAX];
+ unsigned int dummy_long_array[2][DC__NUM_DPP__MAX];
+ double dummy_double_array[2][DC__NUM_DPP__MAX];
+ bool dummy_boolean_array[DC__NUM_DPP__MAX];
+ bool dummy_boolean;
+ bool dummy_boolean2;
+ enum output_encoder_class dummy_output_encoder_array[DC__NUM_DPP__MAX];
+ DmlPipe SurfaceParameters[DC__NUM_DPP__MAX];
+ bool dummy_boolean_array2[2][DC__NUM_DPP__MAX];
+ unsigned int ReorderBytes;
+ unsigned int VMDataOnlyReturnBW;
+ double HostVMInefficiencyFactor;
+ DmlPipe myPipe;
+ SOCParametersList mmSOCParameters;
+ double dummy_unit_vector[DC__NUM_DPP__MAX];
+ double dummy_single[2];
+ enum clock_change_support dummy_dramchange_support;
+ enum dm_fclock_change_support dummy_fclkchange_support;
+ bool dummy_USRRetrainingSupport;
+};
+
+struct dml32_ModeSupportAndSystemConfigurationFull {
+ unsigned int dummy_integer_array[22][DC__NUM_DPP__MAX];
+ double dummy_double_array[2][DC__NUM_DPP__MAX];
+ DmlPipe SurfParameters[DC__NUM_DPP__MAX];
+ double dummy_single[5];
+ double dummy_single2[5];
+ SOCParametersList mSOCParameters;
+ unsigned int MaximumSwathWidthSupportLuma;
+ unsigned int MaximumSwathWidthSupportChroma;
+ double DSTYAfterScaler[DC__NUM_DPP__MAX];
+ double DSTXAfterScaler[DC__NUM_DPP__MAX];
+ double MaxTotalVActiveRDBandwidth;
+ bool dummy_boolean_array[2][DC__NUM_DPP__MAX];
+ enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX];
+ DmlPipe myPipe;
+ unsigned int dummy_integer[4];
+ unsigned int TotalNumberOfActiveOTG;
+ unsigned int TotalNumberOfActiveHDMIFRL;
+ unsigned int TotalNumberOfActiveDP2p0;
+ unsigned int TotalNumberOfActiveDP2p0Outputs;
+ unsigned int TotalDSCUnitsRequired;
+ unsigned int ReorderingBytes;
+ unsigned int TotalSlots;
+ unsigned int NumberOfDPPDSC;
+ unsigned int NumberOfDPPNoDSC;
+ unsigned int NextPrefetchModeState;
+ bool MPCCombineMethodAsNeededForPStateChangeAndVoltage;
+ bool MPCCombineMethodAsPossible;
+ bool FullFrameMALLPStateMethod;
+ bool SubViewportMALLPStateMethod;
+ bool PhantomPipeMALLPStateMethod;
+ bool NoChroma;
+ bool TotalAvailablePipesSupportNoDSC;
+ bool TotalAvailablePipesSupportDSC;
+ enum odm_combine_mode ODMModeNoDSC;
+ enum odm_combine_mode ODMModeDSC;
+ double RequiredDISPCLKPerSurfaceNoDSC;
+ double RequiredDISPCLKPerSurfaceDSC;
+ double BWOfNonCombinedSurfaceOfMaximumBandwidth;
+ double VMDataOnlyReturnBWPerState;
+ double HostVMInefficiencyFactor;
+ bool dummy_boolean[2];
+};
+
+struct dummy_vars {
+ struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+ DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation;
+ struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull;
+ struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration;
+ struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath;
+ struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport;
+ struct dml32_CalculatePrefetchSchedule dml32_CalculatePrefetchSchedule;
+};
+
struct vba_vars_st {
ip_params_st ip;
soc_bounding_box_st soc;
@@ -169,6 +392,7 @@ struct vba_vars_st {
double NextMaxVStartup;
double VBlankTime;
double SmallestVBlank;
+ enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal; // Mode Support only
double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
double EffectiveDETPlusLBLinesLuma;
double EffectiveDETPlusLBLinesChroma;
@@ -212,6 +436,13 @@ struct vba_vars_st {
double UrgentLatencyPixelMixedWithVMData;
double UrgentLatencyVMDataOnly;
double UrgentLatency; // max of the above three
+ double USRRetrainingLatency;
+ double SMNLatency;
+ double FCLKChangeLatency;
+ unsigned int MALLAllocatedForDCNFinal;
+ double MaxAveragePercentOfIdealFabricBWDisplayCanUseInNormalSystemOperation;
+ double MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperationSTROBE;
+ double PercentOfIdealDRAMBWReceivedAfterUrgLatencySTROBE;
double WritebackLatency;
double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly; // Mode Support
double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData; // Mode Support
@@ -284,6 +515,14 @@ struct vba_vars_st {
double DPPCLKDelayCNVCCursor;
double DISPCLKDelaySubtotal;
bool ProgressiveToInterlaceUnitInOPP;
+ unsigned int CompressedBufferSegmentSizeInkByteFinal;
+ unsigned int CompbufReservedSpace64B;
+ unsigned int CompbufReservedSpaceZs;
+ unsigned int LineBufferSizeFinal;
+ unsigned int MaximumPixelsPerLinePerDSCUnit;
+ unsigned int AlphaPixelChunkSizeInKByte;
+ double MinPixelChunkSizeBytes;
+ unsigned int DCCMetaBufferSizeBytes;
// Pipe/Plane Parameters
int VoltageLevel;
double FabricClock;
@@ -291,6 +530,23 @@ struct vba_vars_st {
double DISPCLK;
double SOCCLK;
double DCFCLK;
+ unsigned int MaxTotalDETInKByte;
+ unsigned int MinCompressedBufferSizeInKByte;
+ unsigned int NumberOfActiveSurfaces;
+ bool ViewportStationary[DC__NUM_DPP__MAX];
+ unsigned int RefreshRate[DC__NUM_DPP__MAX];
+ double OutputBPP[DC__NUM_DPP__MAX];
+ unsigned int GPUVMMinPageSizeKBytes[DC__NUM_DPP__MAX];
+ bool SynchronizeTimingsFinal;
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ bool ForceOneRowForFrame[DC__NUM_DPP__MAX];
+ unsigned int ViewportXStartY[DC__NUM_DPP__MAX];
+ unsigned int ViewportXStartC[DC__NUM_DPP__MAX];
+ enum dm_rotation_angle SourceRotation[DC__NUM_DPP__MAX];
+ bool DRRDisplay[DC__NUM_DPP__MAX];
+ bool PteBufferMode[DC__NUM_DPP__MAX];
+ enum dm_output_type OutputType[DC__NUM_DPP__MAX];
+ enum dm_output_rate OutputRate[DC__NUM_DPP__MAX];
unsigned int NumberOfActivePlanes;
unsigned int NumberOfDSCSlices[DC__NUM_DPP__MAX];
@@ -355,6 +611,8 @@ struct vba_vars_st {
unsigned int CursorBPP[DC__NUM_DPP__MAX][DC__NUM_CURSOR__MAX];
bool XFCEnabled[DC__NUM_DPP__MAX];
bool ScalerEnabled[DC__NUM_DPP__MAX];
+ unsigned int VBlankNom[DC__NUM_DPP__MAX];
+ bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment;
// Intermediates/Informational
bool ImmediateFlipSupport;
@@ -392,6 +650,16 @@ struct vba_vars_st {
double StutterEfficiencyNotIncludingVBlank;
double NonUrgentLatencyTolerance;
double MinActiveDRAMClockChangeLatencySupported;
+ double Z8StutterEfficiencyBestCase;
+ unsigned int Z8NumberOfStutterBurstsPerFrameBestCase;
+ double Z8StutterEfficiencyNotIncludingVBlankBestCase;
+ double StutterPeriodBestCase;
+ Watermarks Watermark;
+ bool DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+ unsigned int CompBufReservedSpaceKBytes;
+ unsigned int CompBufReservedSpace64B;
+ unsigned int CompBufReservedSpaceZs;
+ bool CompBufReservedSpaceNeedAdjustment;
// These are the clocks calcuated by the library but they are not actually
// used explicitly. They are fetched by tests and then possibly used. The
@@ -399,6 +667,10 @@ struct vba_vars_st {
double DISPCLK_calculated;
double DPPCLK_calculated[DC__NUM_DPP__MAX];
+ bool ImmediateFlipSupportedSurface[DC__NUM_DPP__MAX];
+
+ bool Use_One_Row_For_Frame[DC__NUM_DPP__MAX];
+ bool Use_One_Row_For_Frame_Flip[DC__NUM_DPP__MAX];
unsigned int VUpdateOffsetPix[DC__NUM_DPP__MAX];
double VUpdateWidthPix[DC__NUM_DPP__MAX];
double VReadyOffsetPix[DC__NUM_DPP__MAX];
@@ -429,6 +701,7 @@ struct vba_vars_st {
double DRAMSpeedPerState[DC__VOLTAGE_STATES];
double MaxDispclk[DC__VOLTAGE_STATES];
int VoltageOverrideLevel;
+ double PHYCLKD32PerState[DC__VOLTAGE_STATES];
/*outputs*/
bool ScaleRatioAndTapsSupport;
@@ -452,6 +725,51 @@ struct vba_vars_st {
bool PitchSupport;
enum dm_validation_status ValidationStatus[DC__VOLTAGE_STATES];
+ /* Mode Support Reason */
+ bool P2IWith420;
+ bool DSCOnlyIfNecessaryWithBPP;
+ bool DSC422NativeNotSupported;
+ bool LinkRateDoesNotMatchDPVersion;
+ bool LinkRateForMultistreamNotIndicated;
+ bool BPPForMultistreamNotIndicated;
+ bool MultistreamWithHDMIOreDP;
+ bool MSOOrODMSplitWithNonDPLink;
+ bool NotEnoughLanesForMSO;
+ bool ViewportExceedsSurface;
+
+ bool ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified;
+ bool ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
+ bool InvalidCombinationOfMALLUseForPStateAndStaticScreen;
+ bool InvalidCombinationOfMALLUseForPState;
+
+ enum dm_output_link_dp_rate OutputLinkDPRate[DC__NUM_DPP__MAX];
+ double PrefetchLinesYThisState[DC__NUM_DPP__MAX];
+ double PrefetchLinesCThisState[DC__NUM_DPP__MAX];
+ double meta_row_bandwidth_this_state[DC__NUM_DPP__MAX];
+ double dpte_row_bandwidth_this_state[DC__NUM_DPP__MAX];
+ double DPTEBytesPerRowThisState[DC__NUM_DPP__MAX];
+ double PDEAndMetaPTEBytesPerFrameThisState[DC__NUM_DPP__MAX];
+ double MetaRowBytesThisState[DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame_flip[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame_this_state[DC__NUM_DPP__MAX];
+ bool use_one_row_for_frame_flip_this_state[DC__NUM_DPP__MAX];
+
+ unsigned int OutputTypeAndRatePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
+ double RequiredDISPCLKPerSurface[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int MicroTileHeightY[DC__NUM_DPP__MAX];
+ unsigned int MicroTileHeightC[DC__NUM_DPP__MAX];
+ unsigned int MicroTileWidthY[DC__NUM_DPP__MAX];
+ unsigned int MicroTileWidthC[DC__NUM_DPP__MAX];
+ bool ImmediateFlipRequiredFinal;
+ bool DCCProgrammingAssumesScanDirectionUnknownFinal;
+ bool EnoughWritebackUnits;
+ bool ODMCombine2To1SupportCheckOK[DC__VOLTAGE_STATES];
+ bool NumberOfDP2p0Support;
+ unsigned int MaxNumDP2p0Streams;
+ unsigned int MaxNumDP2p0Outputs;
+ enum dm_output_type OutputTypePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
+ enum dm_output_rate OutputRatePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
double WritebackLineBufferLumaBufferSize;
double WritebackLineBufferChromaBufferSize;
double WritebackMinHSCLRatio;
@@ -647,8 +965,7 @@ struct vba_vars_st {
double dummy7[DC__NUM_DPP__MAX];
double dummy8[DC__NUM_DPP__MAX];
double dummy13[DC__NUM_DPP__MAX];
- unsigned int dummyinteger1ms[DC__NUM_DPP__MAX];
- double dummyinteger2ms[DC__NUM_DPP__MAX];
+ double dummy_double_array[2][DC__NUM_DPP__MAX];
unsigned int dummyinteger3[DC__NUM_DPP__MAX];
unsigned int dummyinteger4[DC__NUM_DPP__MAX];
unsigned int dummyinteger5;
@@ -658,14 +975,8 @@ struct vba_vars_st {
unsigned int dummyinteger9;
unsigned int dummyinteger10;
unsigned int dummyinteger11;
- unsigned int dummyinteger12;
- unsigned int dummyinteger30;
- unsigned int dummyinteger31;
- unsigned int dummyinteger32;
- unsigned int dummyintegerarr1[DC__NUM_DPP__MAX];
- unsigned int dummyintegerarr2[DC__NUM_DPP__MAX];
- unsigned int dummyintegerarr3[DC__NUM_DPP__MAX];
- unsigned int dummyintegerarr4[DC__NUM_DPP__MAX];
+ unsigned int dummy_integer_array[8][DC__NUM_DPP__MAX];
+
bool dummysinglestring;
bool SingleDPPViewportSizeSupportPerPlane[DC__NUM_DPP__MAX];
double PlaneRequiredDISPCLKWithODMCombine2To1;
@@ -805,7 +1116,6 @@ struct vba_vars_st {
double TimePerChromaMetaChunkFlip[DC__NUM_DPP__MAX];
unsigned int DCCCMaxUncompressedBlock[DC__NUM_DPP__MAX];
unsigned int DCCCMaxCompressedBlock[DC__NUM_DPP__MAX];
- unsigned int DCCCIndependent64ByteBlock[DC__NUM_DPP__MAX];
double VStartupMargin;
bool NotEnoughTimeForDynamicMetadata[DC__NUM_DPP__MAX];
@@ -896,8 +1206,8 @@ struct vba_vars_st {
double meta_row_bandwidth[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
double DETBufferSizeYAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
double DETBufferSizeCAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
- int swath_width_luma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
- int swath_width_chroma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int swath_width_luma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int swath_width_chroma_ub_all_states[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
bool NotUrgentLatencyHiding[DC__VOLTAGE_STATES][2];
unsigned int SwathHeightYAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
unsigned int SwathHeightCAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
@@ -910,8 +1220,6 @@ struct vba_vars_st {
double WritebackDelayTime[DC__NUM_DPP__MAX];
unsigned int DCCYIndependentBlock[DC__NUM_DPP__MAX];
unsigned int DCCCIndependentBlock[DC__NUM_DPP__MAX];
- unsigned int dummyinteger15;
- unsigned int dummyinteger16;
unsigned int dummyinteger17;
unsigned int dummyinteger18;
unsigned int dummyinteger19;
@@ -972,6 +1280,58 @@ struct vba_vars_st {
int Z8NumberOfStutterBurstsPerFrame;
unsigned int MaximumDSCBitsPerComponent;
unsigned int NotEnoughUrgentLatencyHidingA[DC__VOLTAGE_STATES][2];
+ double ReadBandwidthSurfaceLuma[DC__NUM_DPP__MAX];
+ double ReadBandwidthSurfaceChroma[DC__NUM_DPP__MAX];
+ double SurfaceRequiredDISPCLKWithoutODMCombine;
+ double SurfaceRequiredDISPCLK;
+ double MinActiveFCLKChangeLatencySupported;
+ int MinVoltageLevel;
+ int MaxVoltageLevel;
+ unsigned int TotalNumberOfSingleDPPSurfaces[DC__VOLTAGE_STATES][2];
+ unsigned int CompressedBufferSizeInkByteAllStates[DC__VOLTAGE_STATES][2];
+ unsigned int DETBufferSizeInKByteAllStates[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
+ unsigned int DETBufferSizeInKByteThisState[DC__NUM_DPP__MAX];
+ unsigned int SurfaceSizeInMALL[DC__NUM_DPP__MAX];
+ bool ExceededMALLSize;
+ bool PTE_BUFFER_MODE[DC__NUM_DPP__MAX];
+ unsigned int BIGK_FRAGMENT_SIZE[DC__NUM_DPP__MAX];
+ unsigned int CompressedBufferSizeInkByteThisState;
+ enum dm_fclock_change_support FCLKChangeSupport[DC__VOLTAGE_STATES][2];
+ bool USRRetrainingSupport[DC__VOLTAGE_STATES][2];
+ enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX];
+ bool UnboundedRequestEnabledAllStates[DC__VOLTAGE_STATES][2];
+ bool SingleDPPViewportSizeSupportPerSurface[DC__NUM_DPP__MAX];
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[DC__NUM_DPP__MAX];
+ bool UnboundedRequestEnabledThisState;
+ bool DRAMClockChangeRequirementFinal;
+ bool FCLKChangeRequirementFinal;
+ bool USRRetrainingRequiredFinal;
+ unsigned int DETSizeOverride[DC__NUM_DPP__MAX];
+ unsigned int nomDETInKByte;
+ enum mpc_combine_affinity MPCCombineUse[DC__NUM_DPP__MAX];
+ bool MPCCombineMethodIncompatible;
+ unsigned int RequiredSlots[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
+ bool ExceededMultistreamSlots[DC__VOLTAGE_STATES];
+ enum odm_combine_policy ODMUse[DC__NUM_DPP__MAX];
+ unsigned int OutputMultistreamId[DC__NUM_DPP__MAX];
+ bool OutputMultistreamEn[DC__NUM_DPP__MAX];
+ bool UsesMALLForStaticScreen[DC__NUM_DPP__MAX];
+ double MaxActiveDRAMClockChangeLatencySupported[DC__NUM_DPP__MAX];
+ double WritebackAllowFCLKChangeEndPosition[DC__NUM_DPP__MAX];
+ bool PTEBufferSizeNotExceededPerState[DC__NUM_DPP__MAX]; // new in DML32
+ bool DCCMetaBufferSizeNotExceededPerState[DC__NUM_DPP__MAX]; // new in DML32
+ bool NotEnoughDSCSlices[DC__VOLTAGE_STATES];
+ bool PixelsPerLinePerDSCUnitSupport[DC__VOLTAGE_STATES];
+ bool DCCMetaBufferSizeNotExceeded[DC__VOLTAGE_STATES][2];
+ unsigned int dpte_row_height_linear[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_linear_chroma[DC__NUM_DPP__MAX];
+ unsigned int BlockHeightY[DC__NUM_DPP__MAX];
+ unsigned int BlockHeightC[DC__NUM_DPP__MAX];
+ unsigned int BlockWidthY[DC__NUM_DPP__MAX];
+ unsigned int BlockWidthC[DC__NUM_DPP__MAX];
+ unsigned int SubViewportLinesNeededInMALL[DC__NUM_DPP__MAX];
+ bool VActiveBandwithSupport[DC__VOLTAGE_STATES][2];
+ struct dummy_vars dummy_vars;
};
bool CalculateMinAndMaxPrefetchMode(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c
index d2273674e872..b4b51e51fc25 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c
@@ -23,7 +23,6 @@
*
*/
-#include "dml_wrapper.h"
#include "resource.h"
#include "core_types.h"
#include "dsc.h"
@@ -86,25 +85,6 @@ static void get_pixel_clock_parameters(
}
-static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
-{
- get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params);
-
- if (pipe_ctx->clock_source)
- pipe_ctx->clock_source->funcs->get_pix_clk_dividers(
- pipe_ctx->clock_source,
- &pipe_ctx->stream_res.pix_clk_params,
- &pipe_ctx->pll_settings);
-
- pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding;
-
- resource_build_bit_depth_reduction_params(pipe_ctx->stream,
- &pipe_ctx->stream->bit_depth_params);
- build_clamping_params(pipe_ctx->stream);
-
- return DC_OK;
-}
-
static void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
struct bit_depth_reduction_params *fmt_bit_depth)
{
@@ -231,6 +211,30 @@ static void resource_build_bit_depth_reduction_params(struct dc_stream_state *st
fmt_bit_depth->pixel_encoding = pixel_encoding;
}
+/* Move this after the above function as VS complains about
+ * declaration issues for resource_build_bit_depth_reduction_params.
+ */
+
+static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
+{
+
+ get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params);
+
+ if (pipe_ctx->clock_source)
+ pipe_ctx->clock_source->funcs->get_pix_clk_dividers(
+ pipe_ctx->clock_source,
+ &pipe_ctx->stream_res.pix_clk_params,
+ &pipe_ctx->pll_settings);
+
+ pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding;
+
+ resource_build_bit_depth_reduction_params(pipe_ctx->stream,
+ &pipe_ctx->stream->bit_depth_params);
+ build_clamping_params(pipe_ctx->stream);
+
+ return DC_OK;
+}
+
bool dml_validate_dsc(struct dc *dc, struct dc_state *new_ctx)
{
int i;
@@ -1130,7 +1134,7 @@ static int dml_populate_dml_pipes_from_context(
{
int i, pipe_cnt;
struct resource_context *res_ctx = &context->res_ctx;
- struct pipe_ctx *pipe;
+ struct pipe_ctx *pipe = NULL; // Fix potentially uninitialized error from VS
populate_dml_pipes_from_context_base(dc, context, pipes, fast_validate);
@@ -1296,6 +1300,7 @@ static void dml_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
+ context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us;
}
}
@@ -1593,11 +1598,8 @@ static void dml_calculate_dlg_params(
context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW;
*/
context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
- /* TODO : Uncomment the below line and make changes
- * as per DML nomenclature once it is available.
- * context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = context->bw_ctx.dml.vba.fclk_pstate_support;
- */
-
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support =
+ context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;
@@ -1699,12 +1701,11 @@ static void dml_calculate_wm_and_dlg(
context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- //context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
//context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
/* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */
- context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 4;
- context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 8;
+ //context->bw_ctx.bw.dcn.watermarks.b.usr_retraining = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 8;
/* Set D:
* All clocks min.
@@ -1736,13 +1737,11 @@ static void dml_calculate_wm_and_dlg(
context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- //context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
//context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
/* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */
- context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 4;
- context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 8;
-
+ //context->bw_ctx.bw.dcn.watermarks.d.usr_retraining = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 8;
/* Set C, for Dummy P-State:
* All clocks min.
* DCFCLK: Min, as reported by PM FW, when available
@@ -1773,13 +1772,11 @@ static void dml_calculate_wm_and_dlg(
context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- //context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_wm_fclk_pstate(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
//context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
/* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */
- context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 4;
- context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 8;
-
+ //context->bw_ctx.bw.dcn.watermarks.c.usr_retraining = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 8;
if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) {
/* The only difference between A and C is p-state latency, if p-state is not supported
* with full p-state latency we want to calculate DLG based on dummy p-state latency,