From e40dd9c6b75d667daea3b320617d4b80495e8afe Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 12 Oct 2023 15:10:53 -0400 Subject: [PATCH 01/54] drm/amdgpu/pm: update SMU 13.0.0 PMFW version check Update the PMFW version check the the ROCm optimizations. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 9de8284ab9a9..60e81f015407 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2562,7 +2562,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE && (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0)) && ((smu->adev->pm.fw_version == 0x004e6601) || - (smu->adev->pm.fw_version >= 0x004e7300))) { + (smu->adev->pm.fw_version >= 0x004e7400))) { workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, PP_SMC_POWER_PROFILE_POWERSAVING); From ab29ac57ad0b3ab0be7c7635e585651da9f2cd2c Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 12 Oct 2023 17:48:40 +0800 Subject: [PATCH 02/54] drm/amdgpu/umsch: add suspend and resume callback Add missing IP callbacks. Signed-off-by: Lang Yu Acked-by: Alex Deucher Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index 1c523eb5f342..ca45ba8ac171 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -843,6 +843,20 @@ static int umsch_mm_hw_fini(void *handle) return 0; } +static int umsch_mm_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return umsch_mm_hw_fini(adev); +} + +static int umsch_mm_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return umsch_mm_hw_init(adev); +} + static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { .name = "umsch_mm_v4_0", .early_init = umsch_mm_early_init, @@ -851,6 +865,8 @@ static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { .sw_fini = umsch_mm_sw_fini, .hw_init = umsch_mm_hw_init, .hw_fini = umsch_mm_hw_fini, + .suspend = umsch_mm_suspend, + .resume = umsch_mm_resume, }; const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = { From 2d955a06a5db7388d177fe0d3ce638e7d7b90a16 Mon Sep 17 00:00:00 2001 From: Mangesh Gadre Date: Wed, 11 Oct 2023 16:33:17 +0800 Subject: [PATCH 03/54] Revert "drm/amdgpu: Program xcp_ctl registers as needed" This reverts commit 0bdebfef3fb2b6291000765eaa9c6c8030293fce. XCP_CTL register is programmed by firmware and register access is protected. Signed-off-by: Mangesh Gadre Reviewed-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index db179d085efa..a1c2c952d882 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -623,7 +623,7 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, int num_xccs_per_xcp) { int ret, i, num_xcc; - u32 tmp = 0, regval; + u32 tmp = 0; if (adev->psp.funcs) { ret = psp_spatial_partition(&adev->psp, @@ -631,24 +631,23 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, num_xccs_per_xcp); if (ret) return ret; - } + } else { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); - num_xcc = NUM_XCC(adev->gfx.xcc_mask); - - for (i = 0; i < num_xcc; i++) { - tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, - num_xccs_per_xcp); - tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, - i % num_xccs_per_xcp); - regval = RREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL); - if (regval != tmp) + for (i = 0; i < num_xcc; i++) { + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, + num_xccs_per_xcp); + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, + i % num_xccs_per_xcp); WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL, tmp); + } + ret = 0; } adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp; - return 0; + return ret; } static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) From 16fb2a41e64e3133e9457c85490f6ee36c2ffaaf Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Thu, 12 Oct 2023 17:07:10 +0800 Subject: [PATCH 04/54] drm/amd/display: Add missing lines of code in dc.c [Why & How] A critial part of "drm/amd/display: Fix windowed MPO video with ODM combine for DCN32" is lost during promotion to upstream. This patch addes the code back to dc.c. Signed-off-by: Stylon Wang Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f602ff0d4146..e13d8bab0b33 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3096,6 +3096,9 @@ static bool update_planes_and_stream_state(struct dc *dc, if (update_type >= update_surface_trace_level) update_surface_trace(dc, srf_updates, surface_count); + for (i = 0; i < surface_count; i++) + copy_surface_update_to_plane(srf_updates[i].surface, &srf_updates[i]); + if (update_type >= UPDATE_TYPE_FULL) { struct dc_plane_state *new_planes[MAX_SURFACES] = {0}; @@ -3137,8 +3140,6 @@ static bool update_planes_and_stream_state(struct dc *dc, for (i = 0; i < surface_count; i++) { struct dc_plane_state *surface = srf_updates[i].surface; - copy_surface_update_to_plane(surface, &srf_updates[i]); - if (update_type >= UPDATE_TYPE_MED) { for (j = 0; j < dc->res_pool->pipe_count; j++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; From 53dd920c1f471a5763c660a7b94fe0aaf746d357 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Thu, 5 Oct 2023 15:40:42 +0800 Subject: [PATCH 05/54] drm/amdgpu : Add hive ras recovery check If one of the devices in the hive detects a fatal error, need to send ras recovery reset message to PMFW of all devices in the hive. For that add a flag in hive to indicate that it's undergoing ras recovery Signed-off-by: Asad Kamal Reviewed-by: Hawking Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 10 +++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 973073e07b2a..c7f8dcb3b4d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2140,9 +2140,11 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_device *remote_adev = NULL; struct amdgpu_device *adev = ras->adev; struct list_head device_list, *device_list_handle = NULL; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + if (hive) + atomic_set(&hive->ras_recovery, 1); if (!ras->disable_ras_err_cnt_harvest) { - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); /* Build list of devices to query RAS related errors */ if (hive && adev->gmc.xgmi.num_physical_nodes > 1) { @@ -2159,7 +2161,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) amdgpu_ras_log_on_err_counter(remote_adev); } - amdgpu_put_xgmi_hive(hive); } if (amdgpu_device_should_recover_gpu(ras->adev)) { @@ -2194,6 +2195,10 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } atomic_set(&ras->in_recovery, 0); + if (hive) { + atomic_set(&hive->ras_recovery, 0); + amdgpu_put_xgmi_hive(hive); + } } /* alloc/realloc bps array */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 86fbf56938f4..6cab882e8061 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -44,6 +44,7 @@ struct amdgpu_hive_info { struct amdgpu_reset_domain *reset_domain; uint32_t device_remove_count; + atomic_t ras_recovery; }; struct amdgpu_pcs_ras_field { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 74fc945a8f9b..c2efb0d24bf8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2238,16 +2238,24 @@ failed: static int smu_v13_0_6_mode1_reset(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; + struct amdgpu_hive_info *hive = NULL; + u32 hive_ras_recovery = 0; struct amdgpu_ras *ras; u32 fatal_err, param; int ret = 0; + hive = amdgpu_get_xgmi_hive(adev); ras = amdgpu_ras_get_context(adev); fatal_err = 0; param = SMU_RESET_MODE_1; + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + /* fatal error triggered by ras, PMFW supports the flag */ - if (ras && atomic_read(&ras->in_recovery)) + if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) fatal_err = 1; param |= (fatal_err << 16); From 28ab9a02b6cf3323c677e75045141d1d24631385 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 11 Oct 2023 09:49:14 -0400 Subject: [PATCH 06/54] drm/amdgpu/mes11: remove aggregated doorbell code It's not enabled in hardware so the code is dead. Remove it. Reviewed-by: Jack Xiao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 86 +++++--------------------- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 56 ----------------- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 83 ++++++++----------------- 3 files changed, 40 insertions(+), 185 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 27b224b0688a..91c07ab4f14e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5170,45 +5170,17 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -5233,42 +5205,14 @@ static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx11 now */ - } + BUG(); /* only DOORBELL method supported on gfx11 now */ } } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 31b26e6f0b30..4dfec56e1b7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -414,60 +414,6 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } -static void mes_v11_0_init_aggregated_doorbell(struct amdgpu_mes *mes) -{ - struct amdgpu_device *adev = mes->adev; - uint32_t data; - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1); - data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << - CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2); - data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << - CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3); - data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << - CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4); - data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << - CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5); - data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << - CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data); - - data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data); -} - static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, @@ -1243,8 +1189,6 @@ static int mes_v11_0_hw_init(void *handle) if (r) goto failure; - mes_v11_0_init_aggregated_doorbell(&adev->mes); - r = mes_v11_0_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 1f8122fd1ad7..7e4d5188cbfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -156,68 +156,35 @@ static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; - - DRM_DEBUG("Setting write pointer\n"); - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, - ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "regSDMA%i_GFX_RB_WPTR == 0x%08x " - "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, - ring->me, regSDMA0_QUEUE0_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, - ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } From 828f8e31379b28fe7f07fb5865b8ed099d223fca Mon Sep 17 00:00:00 2001 From: "Kunwu.Chan" Date: Tue, 10 Oct 2023 14:10:39 +0800 Subject: [PATCH 07/54] drm/amd/pm: Fix a memory leak on an error path Add missing free on an error path. Fixes: 511a95552ec8 ("drm/amd/pm: Add SMU 13.0.6 support") Reviewed-by: Yang Wang Signed-off-by: Kunwu.Chan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index c2efb0d24bf8..15daaf7fa2f5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2035,8 +2035,10 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); ret = smu_v13_0_6_get_metrics_table(smu, metrics, true); - if (ret) + if (ret) { + kfree(metrics); return ret; + } smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4); From 20ace55bc0c222eff83fc4ff5d990c110817b49d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 21 Sep 2023 15:43:30 -0400 Subject: [PATCH 08/54] drm/amdgpu: update to the latest GC 11.5 headers Add some additional bitfields. Signed-off-by: Alex Deucher --- .../include/asic_reg/gc/gc_11_5_0_sh_mask.h | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_sh_mask.h index 3404bf10428d..f10e5d1f592b 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_sh_mask.h @@ -19227,6 +19227,9 @@ #define CB_COLOR0_FDCC_CONTROL__FDCC_ENABLE__SHIFT 0x16 #define CB_COLOR0_FDCC_CONTROL__DCC_COMPRESS_DISABLE__SHIFT 0x17 #define CB_COLOR0_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE__SHIFT 0x18 +#define CB_COLOR0_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS__SHIFT 0x19 +#define CB_COLOR0_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE__SHIFT 0x1a +#define CB_COLOR0_FDCC_CONTROL__MAX_COMP_FRAGS__SHIFT 0x1b #define CB_COLOR0_FDCC_CONTROL__SAMPLE_MASK_TRACKER_DISABLE_MASK 0x00000001L #define CB_COLOR0_FDCC_CONTROL__SAMPLE_MASK_TRACKER_FEA_FORCE_MASK 0x00000002L #define CB_COLOR0_FDCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK 0x0000000CL @@ -19241,6 +19244,9 @@ #define CB_COLOR0_FDCC_CONTROL__FDCC_ENABLE_MASK 0x00400000L #define CB_COLOR0_FDCC_CONTROL__DCC_COMPRESS_DISABLE_MASK 0x00800000L #define CB_COLOR0_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE_MASK 0x01000000L +#define CB_COLOR0_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS_MASK 0x02000000L +#define CB_COLOR0_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE_MASK 0x04000000L +#define CB_COLOR0_FDCC_CONTROL__MAX_COMP_FRAGS_MASK 0x38000000L //CB_COLOR0_DCC_BASE #define CB_COLOR0_DCC_BASE__BASE_256B__SHIFT 0x0 #define CB_COLOR0_DCC_BASE__BASE_256B_MASK 0xFFFFFFFFL @@ -19301,6 +19307,9 @@ #define CB_COLOR1_FDCC_CONTROL__FDCC_ENABLE__SHIFT 0x16 #define CB_COLOR1_FDCC_CONTROL__DCC_COMPRESS_DISABLE__SHIFT 0x17 #define CB_COLOR1_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE__SHIFT 0x18 +#define CB_COLOR1_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS__SHIFT 0x19 +#define CB_COLOR1_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE__SHIFT 0x1a +#define CB_COLOR1_FDCC_CONTROL__MAX_COMP_FRAGS__SHIFT 0x1b #define CB_COLOR1_FDCC_CONTROL__SAMPLE_MASK_TRACKER_DISABLE_MASK 0x00000001L #define CB_COLOR1_FDCC_CONTROL__SAMPLE_MASK_TRACKER_FEA_FORCE_MASK 0x00000002L #define CB_COLOR1_FDCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK 0x0000000CL @@ -19315,6 +19324,9 @@ #define CB_COLOR1_FDCC_CONTROL__FDCC_ENABLE_MASK 0x00400000L #define CB_COLOR1_FDCC_CONTROL__DCC_COMPRESS_DISABLE_MASK 0x00800000L #define CB_COLOR1_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE_MASK 0x01000000L +#define CB_COLOR1_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS_MASK 0x02000000L +#define CB_COLOR1_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE_MASK 0x04000000L +#define CB_COLOR1_FDCC_CONTROL__MAX_COMP_FRAGS_MASK 0x38000000L //CB_COLOR1_DCC_BASE #define CB_COLOR1_DCC_BASE__BASE_256B__SHIFT 0x0 #define CB_COLOR1_DCC_BASE__BASE_256B_MASK 0xFFFFFFFFL @@ -19375,6 +19387,9 @@ #define CB_COLOR2_FDCC_CONTROL__FDCC_ENABLE__SHIFT 0x16 #define CB_COLOR2_FDCC_CONTROL__DCC_COMPRESS_DISABLE__SHIFT 0x17 #define CB_COLOR2_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE__SHIFT 0x18 +#define CB_COLOR2_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS__SHIFT 0x19 +#define CB_COLOR2_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE__SHIFT 0x1a +#define CB_COLOR2_FDCC_CONTROL__MAX_COMP_FRAGS__SHIFT 0x1b #define CB_COLOR2_FDCC_CONTROL__SAMPLE_MASK_TRACKER_DISABLE_MASK 0x00000001L #define CB_COLOR2_FDCC_CONTROL__SAMPLE_MASK_TRACKER_FEA_FORCE_MASK 0x00000002L #define CB_COLOR2_FDCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK 0x0000000CL @@ -19389,6 +19404,9 @@ #define CB_COLOR2_FDCC_CONTROL__FDCC_ENABLE_MASK 0x00400000L #define CB_COLOR2_FDCC_CONTROL__DCC_COMPRESS_DISABLE_MASK 0x00800000L #define CB_COLOR2_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE_MASK 0x01000000L +#define CB_COLOR2_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS_MASK 0x02000000L +#define CB_COLOR2_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE_MASK 0x04000000L +#define CB_COLOR2_FDCC_CONTROL__MAX_COMP_FRAGS_MASK 0x38000000L //CB_COLOR2_DCC_BASE #define CB_COLOR2_DCC_BASE__BASE_256B__SHIFT 0x0 #define CB_COLOR2_DCC_BASE__BASE_256B_MASK 0xFFFFFFFFL @@ -19449,6 +19467,9 @@ #define CB_COLOR3_FDCC_CONTROL__FDCC_ENABLE__SHIFT 0x16 #define CB_COLOR3_FDCC_CONTROL__DCC_COMPRESS_DISABLE__SHIFT 0x17 #define CB_COLOR3_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE__SHIFT 0x18 +#define CB_COLOR3_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS__SHIFT 0x19 +#define CB_COLOR3_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE__SHIFT 0x1a +#define CB_COLOR3_FDCC_CONTROL__MAX_COMP_FRAGS__SHIFT 0x1b #define CB_COLOR3_FDCC_CONTROL__SAMPLE_MASK_TRACKER_DISABLE_MASK 0x00000001L #define CB_COLOR3_FDCC_CONTROL__SAMPLE_MASK_TRACKER_FEA_FORCE_MASK 0x00000002L #define CB_COLOR3_FDCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK 0x0000000CL @@ -19463,6 +19484,9 @@ #define CB_COLOR3_FDCC_CONTROL__FDCC_ENABLE_MASK 0x00400000L #define CB_COLOR3_FDCC_CONTROL__DCC_COMPRESS_DISABLE_MASK 0x00800000L #define CB_COLOR3_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE_MASK 0x01000000L +#define CB_COLOR3_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS_MASK 0x02000000L +#define CB_COLOR3_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE_MASK 0x04000000L +#define CB_COLOR3_FDCC_CONTROL__MAX_COMP_FRAGS_MASK 0x38000000L //CB_COLOR3_DCC_BASE #define CB_COLOR3_DCC_BASE__BASE_256B__SHIFT 0x0 #define CB_COLOR3_DCC_BASE__BASE_256B_MASK 0xFFFFFFFFL @@ -19523,6 +19547,9 @@ #define CB_COLOR4_FDCC_CONTROL__FDCC_ENABLE__SHIFT 0x16 #define CB_COLOR4_FDCC_CONTROL__DCC_COMPRESS_DISABLE__SHIFT 0x17 #define CB_COLOR4_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE__SHIFT 0x18 +#define CB_COLOR4_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS__SHIFT 0x19 +#define CB_COLOR4_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE__SHIFT 0x1a +#define CB_COLOR4_FDCC_CONTROL__MAX_COMP_FRAGS__SHIFT 0x1b #define CB_COLOR4_FDCC_CONTROL__SAMPLE_MASK_TRACKER_DISABLE_MASK 0x00000001L #define CB_COLOR4_FDCC_CONTROL__SAMPLE_MASK_TRACKER_FEA_FORCE_MASK 0x00000002L #define CB_COLOR4_FDCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK 0x0000000CL @@ -19537,6 +19564,9 @@ #define CB_COLOR4_FDCC_CONTROL__FDCC_ENABLE_MASK 0x00400000L #define CB_COLOR4_FDCC_CONTROL__DCC_COMPRESS_DISABLE_MASK 0x00800000L #define CB_COLOR4_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE_MASK 0x01000000L +#define CB_COLOR4_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS_MASK 0x02000000L +#define CB_COLOR4_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE_MASK 0x04000000L +#define CB_COLOR4_FDCC_CONTROL__MAX_COMP_FRAGS_MASK 0x38000000L //CB_COLOR4_DCC_BASE #define CB_COLOR4_DCC_BASE__BASE_256B__SHIFT 0x0 #define CB_COLOR4_DCC_BASE__BASE_256B_MASK 0xFFFFFFFFL @@ -19597,6 +19627,9 @@ #define CB_COLOR5_FDCC_CONTROL__FDCC_ENABLE__SHIFT 0x16 #define CB_COLOR5_FDCC_CONTROL__DCC_COMPRESS_DISABLE__SHIFT 0x17 #define CB_COLOR5_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE__SHIFT 0x18 +#define CB_COLOR5_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS__SHIFT 0x19 +#define CB_COLOR5_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE__SHIFT 0x1a +#define CB_COLOR5_FDCC_CONTROL__MAX_COMP_FRAGS__SHIFT 0x1b #define CB_COLOR5_FDCC_CONTROL__SAMPLE_MASK_TRACKER_DISABLE_MASK 0x00000001L #define CB_COLOR5_FDCC_CONTROL__SAMPLE_MASK_TRACKER_FEA_FORCE_MASK 0x00000002L #define CB_COLOR5_FDCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK 0x0000000CL @@ -19611,6 +19644,9 @@ #define CB_COLOR5_FDCC_CONTROL__FDCC_ENABLE_MASK 0x00400000L #define CB_COLOR5_FDCC_CONTROL__DCC_COMPRESS_DISABLE_MASK 0x00800000L #define CB_COLOR5_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE_MASK 0x01000000L +#define CB_COLOR5_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS_MASK 0x02000000L +#define CB_COLOR5_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE_MASK 0x04000000L +#define CB_COLOR5_FDCC_CONTROL__MAX_COMP_FRAGS_MASK 0x38000000L //CB_COLOR5_DCC_BASE #define CB_COLOR5_DCC_BASE__BASE_256B__SHIFT 0x0 #define CB_COLOR5_DCC_BASE__BASE_256B_MASK 0xFFFFFFFFL @@ -19671,6 +19707,9 @@ #define CB_COLOR6_FDCC_CONTROL__FDCC_ENABLE__SHIFT 0x16 #define CB_COLOR6_FDCC_CONTROL__DCC_COMPRESS_DISABLE__SHIFT 0x17 #define CB_COLOR6_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE__SHIFT 0x18 +#define CB_COLOR6_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS__SHIFT 0x19 +#define CB_COLOR6_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE__SHIFT 0x1a +#define CB_COLOR6_FDCC_CONTROL__MAX_COMP_FRAGS__SHIFT 0x1b #define CB_COLOR6_FDCC_CONTROL__SAMPLE_MASK_TRACKER_DISABLE_MASK 0x00000001L #define CB_COLOR6_FDCC_CONTROL__SAMPLE_MASK_TRACKER_FEA_FORCE_MASK 0x00000002L #define CB_COLOR6_FDCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK 0x0000000CL @@ -19685,6 +19724,9 @@ #define CB_COLOR6_FDCC_CONTROL__FDCC_ENABLE_MASK 0x00400000L #define CB_COLOR6_FDCC_CONTROL__DCC_COMPRESS_DISABLE_MASK 0x00800000L #define CB_COLOR6_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE_MASK 0x01000000L +#define CB_COLOR6_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS_MASK 0x02000000L +#define CB_COLOR6_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE_MASK 0x04000000L +#define CB_COLOR6_FDCC_CONTROL__MAX_COMP_FRAGS_MASK 0x38000000L //CB_COLOR6_DCC_BASE #define CB_COLOR6_DCC_BASE__BASE_256B__SHIFT 0x0 #define CB_COLOR6_DCC_BASE__BASE_256B_MASK 0xFFFFFFFFL @@ -19745,6 +19787,9 @@ #define CB_COLOR7_FDCC_CONTROL__FDCC_ENABLE__SHIFT 0x16 #define CB_COLOR7_FDCC_CONTROL__DCC_COMPRESS_DISABLE__SHIFT 0x17 #define CB_COLOR7_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE__SHIFT 0x18 +#define CB_COLOR7_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS__SHIFT 0x19 +#define CB_COLOR7_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE__SHIFT 0x1a +#define CB_COLOR7_FDCC_CONTROL__MAX_COMP_FRAGS__SHIFT 0x1b #define CB_COLOR7_FDCC_CONTROL__SAMPLE_MASK_TRACKER_DISABLE_MASK 0x00000001L #define CB_COLOR7_FDCC_CONTROL__SAMPLE_MASK_TRACKER_FEA_FORCE_MASK 0x00000002L #define CB_COLOR7_FDCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK 0x0000000CL @@ -19759,6 +19804,9 @@ #define CB_COLOR7_FDCC_CONTROL__FDCC_ENABLE_MASK 0x00400000L #define CB_COLOR7_FDCC_CONTROL__DCC_COMPRESS_DISABLE_MASK 0x00800000L #define CB_COLOR7_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE_MASK 0x01000000L +#define CB_COLOR7_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS_MASK 0x02000000L +#define CB_COLOR7_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE_MASK 0x04000000L +#define CB_COLOR7_FDCC_CONTROL__MAX_COMP_FRAGS_MASK 0x38000000L //CB_COLOR7_DCC_BASE #define CB_COLOR7_DCC_BASE__BASE_256B__SHIFT 0x0 #define CB_COLOR7_DCC_BASE__BASE_256B_MASK 0xFFFFFFFFL From 2ceec37b0e3d470c4ef0ca0b7b71df52b99e040b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 17 Oct 2023 14:37:43 -0500 Subject: [PATCH 09/54] drm/amd: Add missing kernel doc for prepare_suspend() prepare_suspend() is intended to be used for any IP blocks that must allocate memory during the suspend sequence. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20231017143555.6a6450fc@canb.auug.org.au/ Fixes: cb11ca3233aa ("drm/amd: Add concept of running prepare_suspend() sequence for IP blocks") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/amd_shared.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 614aba75606f..7f98394338c2 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -273,6 +273,8 @@ enum amd_dpm_forced_level; * @hw_init: sets up the hw state * @hw_fini: tears down the hw state * @late_fini: final cleanup + * @prepare_suspend: handle IP specific changes to prepare for suspend + * (such as allocating any required memory) * @suspend: handles IP specific hw/sw changes for suspend * @resume: handles IP specific hw/sw changes for resume * @is_idle: returns current IP block idle status From d8c1925ba8cde2863297728a4c8fbf8fe766757a Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 16 Oct 2023 22:10:34 +0800 Subject: [PATCH 10/54] drm/amdgpu: update retry times for psp BL wait Increase retry time for PSP BL wait, to compensate for longer time to set c2pmsg 35 ready bit during mode1 with RAS Signed-off-by: Asad Kamal Reviewed-by: Hawking Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 573046702861..4142e2fcd866 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -168,7 +168,7 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) * If there is an error in processing command, bits[7:0] will be set. * This is applicable for PSP v13.0.6 and newer. */ - for (retry_loop = 0; retry_loop < 10; retry_loop++) { + for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), 0x80000000, 0xffffffff, false); From d757dfd667aad54c6ed0b6f22a11ad5a317663de Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 13 Oct 2023 14:26:02 -0500 Subject: [PATCH 11/54] drm/amd: Move microcode init step to early_init() The intention for early init is to find any missing microcode early and fail the driver load if it's missing. Move this step to earlier in driver init to match other IP blocks. Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 91c07ab4f14e..f994508a36ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -589,6 +589,14 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) adev->gfx.mec2_fw = NULL; gfx_v11_0_check_fw_cp_gfx_shadow(adev); + + if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { + err = adev->gfx.imu.funcs->init_microcode(adev); + if (err) + DRM_ERROR("Failed to init imu firmware!\n"); + return err; + } + out: if (err) { amdgpu_ucode_release(&adev->gfx.pfp_fw); @@ -1395,14 +1403,6 @@ static int gfx_v11_0_sw_init(void *handle) adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; - if (adev->gfx.imu.funcs) { - if (adev->gfx.imu.funcs->init_microcode) { - r = adev->gfx.imu.funcs->init_microcode(adev); - if (r) - DRM_ERROR("Failed to load imu firmware!\n"); - } - } - gfx_v11_0_me_init(adev); r = gfx_v11_0_rlc_init(adev); From 4916615fe96fb530517b0d46702c750c20a5601c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 13 Oct 2023 14:26:03 -0500 Subject: [PATCH 12/54] drm/amd: Don't parse IMU ucode version if it won't be loaded When the IMU ucode is loaded by the PSP parsing the version that comes from Linux will vary. Rather than showing the wrong data to kernel interface consumers, avoid populating it in this case. Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/imu_v11_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 875fb5ac70b5..c0bdab3bf0e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -55,7 +55,6 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data; - adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version); //adev->gfx.imu_feature_version = le32_to_cpu(imu_hdr->ucode_feature_version); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { @@ -69,7 +68,8 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) info->fw = adev->gfx.imu_fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE); - } + } else + adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version); out: if (err) { From e56690bb37eb202cfc31deb6b794dc8fca9b9a89 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 13 Oct 2023 14:26:04 -0500 Subject: [PATCH 13/54] drm/amd: Read IMU FW version from scratch register during hw_init If the IMU version wasn't discovered from the header, such as when the firmware was directly loaded by PSP then there is no firmware version to show to userspace from sysfs or IOCTL. The IMU F/W stores the version in the first scratch register though, so fetch it in these cases to let the driver export. Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f994508a36ea..fd22943685f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4373,6 +4373,10 @@ static int gfx_v11_0_hw_init(void *handle) if (r) return r; + /* get IMU version from HW if it's not set */ + if (!adev->gfx.imu_fw_version) + adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); + return r; } From bf2cc5e959951e81bc25beb0b8feb7ec8ab6e5cc Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Fri, 13 Oct 2023 16:10:53 +0800 Subject: [PATCH 14/54] drm/amd/display: Remove brackets in macro to conform to coding style [Why] Many of the register macros defined ind dcn32_resource.h have extra brackets. This is not conforming to the style of those defined in other DC header files. [How] Remove these brackets in dcn32_resource.h Reviewed-by: Aurabindo Pillai Signed-off-by: Stylon Wang Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 154 ++++-------------- 1 file changed, 36 insertions(+), 118 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index f075982363be..b931008114c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -187,7 +187,6 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int /* CLK SRC */ #define CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid) \ - ( \ SRI_ARR_ALPHABET(PIXCLK_RESYNC_CNTL, PHYPLL, index, pllid), \ SRII_ARR_2(PHASE, DP_DTO, 0, index), \ SRII_ARR_2(PHASE, DP_DTO, 1, index), \ @@ -200,12 +199,10 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 0, index), \ SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 1, index), \ SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 2, index), \ - SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 3, index) \ - ) + SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 3, index) /* ABM */ #define ABM_DCN32_REG_LIST_RI(id) \ - ( \ SRI_ARR(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \ SRI_ARR(DC_ABM1_LS_SAMPLE_RATE, ABM, id), \ SRI_ARR(BL1_PWM_BL_UPDATE_SAMPLE_RATE, ABM, id), \ @@ -217,12 +214,10 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI_ARR(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM, id), \ SRI_ARR(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \ SRI_ARR(DC_ABM1_ACE_OFFSET_SLOPE_0, ABM, id), \ - SRI_ARR(DC_ABM1_ACE_THRES_12, ABM, id), NBIO_SR_ARR(BIOS_SCRATCH_2, id) \ - ) + SRI_ARR(DC_ABM1_ACE_THRES_12, ABM, id), NBIO_SR_ARR(BIOS_SCRATCH_2, id) /* Audio */ #define AUD_COMMON_REG_LIST_RI(id) \ - ( \ SRI_ARR(AZALIA_F0_CODEC_ENDPOINT_INDEX, AZF0ENDPOINT, id), \ SRI_ARR(AZALIA_F0_CODEC_ENDPOINT_DATA, AZF0ENDPOINT, id), \ SR_ARR(AZALIA_F0_CODEC_FUNCTION_PARAMETER_STREAM_FORMATS, id), \ @@ -231,41 +226,33 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SR_ARR(DCCG_AUDIO_DTO_SOURCE, id), SR_ARR(DCCG_AUDIO_DTO0_MODULE, id), \ SR_ARR(DCCG_AUDIO_DTO0_PHASE, id), SR_ARR(DCCG_AUDIO_DTO1_MODULE, id), \ SR_ARR(DCCG_AUDIO_DTO1_PHASE, id) \ - ) /* VPG */ #define VPG_DCN3_REG_LIST_RI(id) \ - ( \ SRI_ARR(VPG_GENERIC_STATUS, VPG, id), \ SRI_ARR(VPG_GENERIC_PACKET_ACCESS_CTRL, VPG, id), \ SRI_ARR(VPG_GENERIC_PACKET_DATA, VPG, id), \ SRI_ARR(VPG_GSP_FRAME_UPDATE_CTRL, VPG, id), \ - SRI_ARR(VPG_GSP_IMMEDIATE_UPDATE_CTRL, VPG, id) \ - ) + SRI_ARR(VPG_GSP_IMMEDIATE_UPDATE_CTRL, VPG, id) /* AFMT */ #define AFMT_DCN3_REG_LIST_RI(id) \ - ( \ SRI_ARR(AFMT_INFOFRAME_CONTROL0, AFMT, id), \ SRI_ARR(AFMT_VBI_PACKET_CONTROL, AFMT, id), \ SRI_ARR(AFMT_AUDIO_PACKET_CONTROL, AFMT, id), \ SRI_ARR(AFMT_AUDIO_PACKET_CONTROL2, AFMT, id), \ SRI_ARR(AFMT_AUDIO_SRC_CONTROL, AFMT, id), \ SRI_ARR(AFMT_60958_0, AFMT, id), SRI_ARR(AFMT_60958_1, AFMT, id), \ - SRI_ARR(AFMT_60958_2, AFMT, id), SRI_ARR(AFMT_MEM_PWR, AFMT, id) \ - ) + SRI_ARR(AFMT_60958_2, AFMT, id), SRI_ARR(AFMT_MEM_PWR, AFMT, id) /* APG */ #define APG_DCN31_REG_LIST_RI(id) \ - (\ SRI_ARR(APG_CONTROL, APG, id), SRI_ARR(APG_CONTROL2, APG, id), \ - SRI_ARR(APG_MEM_PWR, APG, id), SRI_ARR(APG_DBG_GEN_CONTROL, APG, id) \ - ) + SRI_ARR(APG_MEM_PWR, APG, id), SRI_ARR(APG_DBG_GEN_CONTROL, APG, id) /* Stream encoder */ #define SE_DCN32_REG_LIST_RI(id) \ - ( \ SRI_ARR(AFMT_CNTL, DIG, id), SRI_ARR(DIG_FE_CNTL, DIG, id), \ SRI_ARR(HDMI_CONTROL, DIG, id), SRI_ARR(HDMI_DB_CONTROL, DIG, id), \ SRI_ARR(HDMI_GC, DIG, id), \ @@ -309,28 +296,22 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \ SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \ SRI_ARR(DIG_FE_CNTL, DIG, id), SRI_ARR(DIG_CLOCK_PATTERN, DIG, id), \ - SRI_ARR(DIG_FIFO_CTRL0, DIG, id) \ - ) + SRI_ARR(DIG_FIFO_CTRL0, DIG, id) /* Aux regs */ #define AUX_REG_LIST_RI(id) \ - ( \ SRI_ARR(AUX_CONTROL, DP_AUX, id), SRI_ARR(AUX_DPHY_RX_CONTROL0, DP_AUX, id), \ - SRI_ARR(AUX_DPHY_RX_CONTROL1, DP_AUX, id) \ - ) + SRI_ARR(AUX_DPHY_RX_CONTROL1, DP_AUX, id) #define DCN2_AUX_REG_LIST_RI(id) \ - ( \ - AUX_REG_LIST_RI(id), SRI_ARR(AUX_DPHY_TX_CONTROL, DP_AUX, id) \ - ) + AUX_REG_LIST_RI(id), SRI_ARR(AUX_DPHY_TX_CONTROL, DP_AUX, id) /* HDP */ #define HPD_REG_LIST_RI(id) SRI_ARR(DC_HPD_CONTROL, HPD, id) /* Link encoder */ #define LE_DCN3_REG_LIST_RI(id) \ - ( \ SRI_ARR(DIG_BE_CNTL, DIG, id), SRI_ARR(DIG_BE_EN_CNTL, DIG, id), \ SRI_ARR(TMDS_CTL_BITS, DIG, id), \ SRI_ARR(TMDS_DCBALANCER_CONTROL, DIG, id), SRI_ARR(DP_CONFIG, DP, id), \ @@ -344,26 +325,20 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI_ARR(DP_SEC_CNTL, DP, id), SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \ SRI_ARR(DP_DPHY_FAST_TRAINING, DP, id), SRI_ARR(DP_SEC_CNTL1, DP, id), \ SRI_ARR(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \ - SRI_ARR(DP_DPHY_HBR2_PATTERN_CONTROL, DP, id) \ - ) + SRI_ARR(DP_DPHY_HBR2_PATTERN_CONTROL, DP, id) #define LE_DCN31_REG_LIST_RI(id) \ - ( \ LE_DCN3_REG_LIST_RI(id), SRI_ARR(DP_DPHY_INTERNAL_CTRL, DP, id), \ SR_ARR(DIO_LINKA_CNTL, id), SR_ARR(DIO_LINKB_CNTL, id), \ SR_ARR(DIO_LINKC_CNTL, id), SR_ARR(DIO_LINKD_CNTL, id), \ - SR_ARR(DIO_LINKE_CNTL, id), SR_ARR(DIO_LINKF_CNTL, id) \ - ) + SR_ARR(DIO_LINKE_CNTL, id), SR_ARR(DIO_LINKF_CNTL, id) #define UNIPHY_DCN2_REG_LIST_RI(id, phyid) \ - ( \ SRI_ARR_ALPHABET(CLOCK_ENABLE, SYMCLK, id, phyid), \ - SRI_ARR_ALPHABET(CHANNEL_XBAR_CNTL, UNIPHY, id, phyid) \ - ) + SRI_ARR_ALPHABET(CHANNEL_XBAR_CNTL, UNIPHY, id, phyid) /* HPO DP stream encoder */ #define DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id) \ - ( \ SR_ARR(DP_STREAM_MAPPER_CONTROL0, id), \ SR_ARR(DP_STREAM_MAPPER_CONTROL1, id), \ SR_ARR(DP_STREAM_MAPPER_CONTROL2, id), \ @@ -398,12 +373,10 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI_ARR(DP_SYM32_ENC_SDP_METADATA_PACKET_CONTROL, DP_SYM32_ENC, id), \ SRI_ARR(DP_SYM32_ENC_SDP_AUDIO_CONTROL0, DP_SYM32_ENC, id), \ SRI_ARR(DP_SYM32_ENC_VID_CRC_CONTROL, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_HBLANK_CONTROL, DP_SYM32_ENC, id) \ - ) + SRI_ARR(DP_SYM32_ENC_HBLANK_CONTROL, DP_SYM32_ENC, id) /* HPO DP link encoder regs */ #define DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id) \ - ( \ SRI_ARR(DP_LINK_ENC_CLOCK_CONTROL, DP_LINK_ENC, id), \ SRI_ARR(DP_DPHY_SYM32_CONTROL, DP_DPHY_SYM32, id), \ SRI_ARR(DP_DPHY_SYM32_STATUS, DP_DPHY_SYM32, id), \ @@ -432,12 +405,10 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL1, DP_DPHY_SYM32, id), \ SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL2, DP_DPHY_SYM32, id), \ SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL3, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_SAT_UPDATE, DP_DPHY_SYM32, id) \ - ) + SRI_ARR(DP_DPHY_SYM32_SAT_UPDATE, DP_DPHY_SYM32, id) /* DPP */ #define DPP_REG_LIST_DCN30_COMMON_RI(id) \ - ( \ SRI_ARR(CM_DEALPHA, CM, id), SRI_ARR(CM_MEM_PWR_STATUS, CM, id), \ SRI_ARR(CM_BIAS_CR_R, CM, id), SRI_ARR(CM_BIAS_Y_G_CB_B, CM, id), \ SRI_ARR(PRE_DEGAM, CNVC_CFG, id), SRI_ARR(CM_GAMCOR_CONTROL, CM, id), \ @@ -552,12 +523,10 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \ SRI_ARR(OBUF_MEM_PWR_CTRL, DSCL, id), \ SRI_ARR(DSCL_MEM_PWR_STATUS, DSCL, id), \ - SRI_ARR(DSCL_MEM_PWR_CTRL, DSCL, id) \ - ) + SRI_ARR(DSCL_MEM_PWR_CTRL, DSCL, id) /* OPP */ #define OPP_REG_LIST_DCN_RI(id) \ - ( \ SRI_ARR(FMT_BIT_DEPTH_CONTROL, FMT, id), SRI_ARR(FMT_CONTROL, FMT, id), \ SRI_ARR(FMT_DITHER_RAND_R_SEED, FMT, id), \ SRI_ARR(FMT_DITHER_RAND_G_SEED, FMT, id), \ @@ -569,37 +538,29 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI_ARR(OPPBUF_3D_PARAMETERS_0, OPPBUF, id), \ SRI_ARR(OPPBUF_3D_PARAMETERS_1, OPPBUF, id), \ SRI_ARR(OPP_PIPE_CONTROL, OPP_PIPE, id) \ - ) #define OPP_REG_LIST_DCN10_RI(id) OPP_REG_LIST_DCN_RI(id) #define OPP_DPG_REG_LIST_RI(id) \ - ( \ SRI_ARR(DPG_CONTROL, DPG, id), SRI_ARR(DPG_DIMENSIONS, DPG, id), \ SRI_ARR(DPG_OFFSET_SEGMENT, DPG, id), SRI_ARR(DPG_COLOUR_B_CB, DPG, id), \ SRI_ARR(DPG_COLOUR_G_Y, DPG, id), SRI_ARR(DPG_COLOUR_R_CR, DPG, id), \ - SRI_ARR(DPG_RAMP_CONTROL, DPG, id), SRI_ARR(DPG_STATUS, DPG, id) \ - ) + SRI_ARR(DPG_RAMP_CONTROL, DPG, id), SRI_ARR(DPG_STATUS, DPG, id) #define OPP_REG_LIST_DCN30_RI(id) \ - ( \ OPP_REG_LIST_DCN10_RI(id), OPP_DPG_REG_LIST_RI(id), \ - SRI_ARR(FMT_422_CONTROL, FMT, id) \ - ) + SRI_ARR(FMT_422_CONTROL, FMT, id) /* Aux engine regs */ #define AUX_COMMON_REG_LIST0_RI(id) \ - ( \ SRI_ARR(AUX_CONTROL, DP_AUX, id), SRI_ARR(AUX_ARB_CONTROL, DP_AUX, id), \ SRI_ARR(AUX_SW_DATA, DP_AUX, id), SRI_ARR(AUX_SW_CONTROL, DP_AUX, id), \ SRI_ARR(AUX_INTERRUPT_CONTROL, DP_AUX, id), \ SRI_ARR(AUX_DPHY_RX_CONTROL1, DP_AUX, id), \ - SRI_ARR(AUX_SW_STATUS, DP_AUX, id) \ - ) + SRI_ARR(AUX_SW_STATUS, DP_AUX, id) /* DWBC */ #define DWBC_COMMON_REG_LIST_DCN30_RI(id) \ - ( \ SR_ARR(DWB_ENABLE_CLK_CTRL, id), SR_ARR(DWB_MEM_PWR_CTRL, id), \ SR_ARR(FC_MODE_CTRL, id), SR_ARR(FC_FLOW_CTRL, id), \ SR_ARR(FC_WINDOW_START, id), SR_ARR(FC_WINDOW_SIZE, id), \ @@ -693,13 +654,11 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SR_ARR(DWB_OGAM_RAMB_REGION_26_27, id), \ SR_ARR(DWB_OGAM_RAMB_REGION_28_29, id), \ SR_ARR(DWB_OGAM_RAMB_REGION_30_31, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_32_33, id) \ - ) + SR_ARR(DWB_OGAM_RAMB_REGION_32_33, id) /* MCIF */ #define MCIF_WB_COMMON_REG_LIST_DCN32_RI(inst) \ - ( \ SRI2_ARR(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB, inst), \ SRI2_ARR(MCIF_WB_BUFMGR_STATUS, MCIF_WB, inst), \ SRI2_ARR(MCIF_WB_BUF_PITCH, MCIF_WB, inst), \ @@ -747,13 +706,11 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI2_ARR(MMHUBBUB_WARMUP_ADDR_REGION, MMHUBBUB, inst), \ SRI2_ARR(MMHUBBUB_WARMUP_BASE_ADDR_HIGH, MMHUBBUB, inst), \ SRI2_ARR(MMHUBBUB_WARMUP_BASE_ADDR_LOW, MMHUBBUB, inst), \ - SRI2_ARR(MMHUBBUB_WARMUP_CONTROL_STATUS, MMHUBBUB, inst) \ - ) + SRI2_ARR(MMHUBBUB_WARMUP_CONTROL_STATUS, MMHUBBUB, inst) /* DSC */ #define DSC_REG_LIST_DCN20_RI(id) \ - ( \ SRI_ARR(DSC_TOP_CONTROL, DSC_TOP, id), \ SRI_ARR(DSC_DEBUG_CONTROL, DSC_TOP, id), \ SRI_ARR(DSCC_CONFIG0, DSCC, id), SRI_ARR(DSCC_CONFIG1, DSCC, id), \ @@ -801,8 +758,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI_ARR(DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id), \ SRI_ARR(DSCCIF_CONFIG0, DSCCIF, id), \ SRI_ARR(DSCCIF_CONFIG1, DSCCIF, id), \ - SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) \ - ) + SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) /* MPC */ @@ -810,32 +766,25 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRII_DWB(DWB_MUX, MUX, MPC_DWB, inst) #define MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0_RI(inst) \ - ( \ - SRII(MUX, MPC_OUT, inst), VUPDATE_SRII(CUR, VUPDATE_LOCK_SET, inst) \ - ) + SRII(MUX, MPC_OUT, inst), VUPDATE_SRII(CUR, VUPDATE_LOCK_SET, inst) #define MPC_OUT_MUX_REG_LIST_DCN3_0_RI(inst) \ - ( \ MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0_RI(inst), SRII(CSC_MODE, MPC_OUT, inst), \ SRII(CSC_C11_C12_A, MPC_OUT, inst), SRII(CSC_C33_C34_A, MPC_OUT, inst), \ SRII(CSC_C11_C12_B, MPC_OUT, inst), SRII(CSC_C33_C34_B, MPC_OUT, inst), \ SRII(DENORM_CONTROL, MPC_OUT, inst), \ SRII(DENORM_CLAMP_G_Y, MPC_OUT, inst), \ - SRII(DENORM_CLAMP_B_CB, MPC_OUT, inst), SR(MPC_OUT_CSC_COEF_FORMAT) \ - ) + SRII(DENORM_CLAMP_B_CB, MPC_OUT, inst), SR(MPC_OUT_CSC_COEF_FORMAT) #define MPC_COMMON_REG_LIST_DCN1_0_RI(inst) \ - ( \ SRII(MPCC_TOP_SEL, MPCC, inst), SRII(MPCC_BOT_SEL, MPCC, inst), \ SRII(MPCC_CONTROL, MPCC, inst), SRII(MPCC_STATUS, MPCC, inst), \ SRII(MPCC_OPP_ID, MPCC, inst), SRII(MPCC_BG_G_Y, MPCC, inst), \ SRII(MPCC_BG_R_CR, MPCC, inst), SRII(MPCC_BG_B_CB, MPCC, inst), \ SRII(MPCC_SM_CONTROL, MPCC, inst), \ - SRII(MPCC_UPDATE_LOCK_SEL, MPCC, inst) \ - ) + SRII(MPCC_UPDATE_LOCK_SEL, MPCC, inst) #define MPC_REG_LIST_DCN3_0_RI(inst) \ - ( \ MPC_COMMON_REG_LIST_DCN1_0_RI(inst), SRII(MPCC_TOP_GAIN, MPCC, inst), \ SRII(MPCC_BOT_GAIN_INSIDE, MPCC, inst), \ SRII(MPCC_BOT_GAIN_OUTSIDE, MPCC, inst), \ @@ -889,8 +838,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRII(MPCC_OGAM_RAMB_START_BASE_CNTL_G, MPCC_OGAM, inst), \ SRII(MPCC_OGAM_RAMB_START_BASE_CNTL_R, MPCC_OGAM, inst), \ SRII(MPCC_OGAM_CONTROL, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_LUT_CONTROL, MPCC_OGAM, inst) \ - ) + SRII(MPCC_OGAM_LUT_CONTROL, MPCC_OGAM, inst) #define MPC_REG_LIST_DCN3_2_RI(inst) \ MPC_REG_LIST_DCN3_0_RI(inst),\ @@ -1034,11 +982,9 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRII(MPCC_MCM_1DLUT_RAMB_REGION_30_31, MPCC_MCM, inst),\ SRII(MPCC_MCM_1DLUT_RAMB_REGION_32_33, MPCC_MCM, inst),\ SRII(MPCC_MCM_MEM_PWR_CTRL, MPCC_MCM, inst) - /* OPTC */ #define OPTC_COMMON_REG_LIST_DCN3_2_RI(inst) \ - ( \ SRI_ARR(OTG_VSTARTUP_PARAM, OTG, inst), \ SRI_ARR(OTG_VUPDATE_PARAM, OTG, inst), \ SRI_ARR(OTG_VREADY_PARAM, OTG, inst), \ @@ -1100,22 +1046,17 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI_ARR(OPTC_BYTES_PER_PIXEL, ODM, inst), \ SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \ SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \ - SRI_ARR(OTG_DRR_CONTROL, OTG, inst) \ - ) + SRI_ARR(OTG_DRR_CONTROL, OTG, inst) /* HUBP */ #define HUBP_REG_LIST_DCN_VM_RI(id) \ - ( \ SRI_ARR(NOM_PARAMETERS_0, HUBPREQ, id), \ SRI_ARR(NOM_PARAMETERS_1, HUBPREQ, id), \ SRI_ARR(NOM_PARAMETERS_2, HUBPREQ, id), \ SRI_ARR(NOM_PARAMETERS_3, HUBPREQ, id), \ - SRI_ARR(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id) \ - ) - + SRI_ARR(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id) #define HUBP_REG_LIST_DCN_RI(id) \ - ( \ SRI_ARR(DCHUBP_CNTL, HUBP, id), SRI_ARR(HUBPREQ_DEBUG_DB, HUBP, id), \ SRI_ARR(HUBPREQ_DEBUG, HUBP, id), SRI_ARR(DCSURF_ADDR_CONFIG, HUBP, id), \ SRI_ARR(DCSURF_TILING_CONFIG, HUBP, id), \ @@ -1186,11 +1127,8 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI_ARR(DCN_SURF1_TTU_CNTL1, HUBPREQ, id), \ SRI_ARR(DCN_CUR0_TTU_CNTL0, HUBPREQ, id), \ SRI_ARR(DCN_CUR0_TTU_CNTL1, HUBPREQ, id), \ - SRI_ARR(HUBP_CLK_CNTL, HUBP, id) \ - ) - + SRI_ARR(HUBP_CLK_CNTL, HUBP, id) #define HUBP_REG_LIST_DCN2_COMMON_RI(id) \ - ( \ HUBP_REG_LIST_DCN_RI(id), HUBP_REG_LIST_DCN_VM_RI(id), \ SRI_ARR(PREFETCH_SETTINGS, HUBPREQ, id), \ SRI_ARR(PREFETCH_SETTINGS_C, HUBPREQ, id), \ @@ -1217,35 +1155,24 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SRI_ARR(DCN_CUR1_TTU_CNTL0, HUBPREQ, id), \ SRI_ARR(DCN_CUR1_TTU_CNTL1, HUBPREQ, id), \ SRI_ARR(DCSURF_FLIP_CONTROL2, HUBPREQ, id), \ - SRI_ARR(VMID_SETTINGS_0, HUBPREQ, id) \ - ) - + SRI_ARR(VMID_SETTINGS_0, HUBPREQ, id) #define HUBP_REG_LIST_DCN21_RI(id) \ - ( \ HUBP_REG_LIST_DCN2_COMMON_RI(id), SRI_ARR(FLIP_PARAMETERS_3, HUBPREQ, id), \ SRI_ARR(FLIP_PARAMETERS_4, HUBPREQ, id), \ SRI_ARR(FLIP_PARAMETERS_5, HUBPREQ, id), \ SRI_ARR(FLIP_PARAMETERS_6, HUBPREQ, id), \ SRI_ARR(VBLANK_PARAMETERS_5, HUBPREQ, id), \ - SRI_ARR(VBLANK_PARAMETERS_6, HUBPREQ, id) \ - ) - + SRI_ARR(VBLANK_PARAMETERS_6, HUBPREQ, id) #define HUBP_REG_LIST_DCN30_RI(id) \ - ( \ - HUBP_REG_LIST_DCN21_RI(id), SRI_ARR(DCN_DMDATA_VM_CNTL, HUBPREQ, id) \ - ) - + HUBP_REG_LIST_DCN21_RI(id), SRI_ARR(DCN_DMDATA_VM_CNTL, HUBPREQ, id) #define HUBP_REG_LIST_DCN32_RI(id) \ - ( \ HUBP_REG_LIST_DCN30_RI(id), SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ - SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id) \ - ) + SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id) /* HUBBUB */ #define HUBBUB_REG_LIST_DCN32_RI(id) \ - ( \ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A), \ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B), \ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C), \ @@ -1286,13 +1213,11 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int SR(DCHUBBUB_ARB_MALL_CNTL), \ SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB), \ SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS), \ - SR(SDPIF_REQUEST_RATE_LIMIT) \ - ) + SR(SDPIF_REQUEST_RATE_LIMIT) /* DCCG */ #define DCCG_REG_LIST_DCN32_RI() \ - ( \ SR(DPPCLK_DTO_CTRL), DCCG_SRII(DTO_PARAM, DPPCLK, 0), \ DCCG_SRII(DTO_PARAM, DPPCLK, 1), DCCG_SRII(DTO_PARAM, DPPCLK, 2), \ DCCG_SRII(DTO_PARAM, DPPCLK, 3), DCCG_SRII(CLOCK_CNTL, HDMICHARCLK, 0), \ @@ -1308,38 +1233,31 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int DCCG_SRII(PHASE, DTBCLK_DTO, 2), DCCG_SRII(PHASE, DTBCLK_DTO, 3), \ SR(DCCG_AUDIO_DTBCLK_DTO_MODULO), SR(DCCG_AUDIO_DTBCLK_DTO_PHASE), \ SR(OTG_PIXEL_RATE_DIV), SR(DTBCLK_P_CNTL), \ - SR(DCCG_AUDIO_DTO_SOURCE), SR(DENTIST_DISPCLK_CNTL) \ - ) + SR(DCCG_AUDIO_DTO_SOURCE), SR(DENTIST_DISPCLK_CNTL) /* VMID */ #define DCN20_VMID_REG_LIST_RI(id) \ - ( \ SRI_ARR(CNTL, DCN_VM_CONTEXT, id), \ SRI_ARR(PAGE_TABLE_BASE_ADDR_HI32, DCN_VM_CONTEXT, id), \ SRI_ARR(PAGE_TABLE_BASE_ADDR_LO32, DCN_VM_CONTEXT, id), \ SRI_ARR(PAGE_TABLE_START_ADDR_HI32, DCN_VM_CONTEXT, id), \ SRI_ARR(PAGE_TABLE_START_ADDR_LO32, DCN_VM_CONTEXT, id), \ SRI_ARR(PAGE_TABLE_END_ADDR_HI32, DCN_VM_CONTEXT, id), \ - SRI_ARR(PAGE_TABLE_END_ADDR_LO32, DCN_VM_CONTEXT, id) \ - ) + SRI_ARR(PAGE_TABLE_END_ADDR_LO32, DCN_VM_CONTEXT, id) /* I2C HW */ #define I2C_HW_ENGINE_COMMON_REG_LIST_RI(id) \ - ( \ SRI_ARR_I2C(SETUP, DC_I2C_DDC, id), SRI_ARR_I2C(SPEED, DC_I2C_DDC, id), \ SRI_ARR_I2C(HW_STATUS, DC_I2C_DDC, id), \ SR_ARR_I2C(DC_I2C_ARBITRATION, id), \ SR_ARR_I2C(DC_I2C_CONTROL, id), SR_ARR_I2C(DC_I2C_SW_STATUS, id), \ SR_ARR_I2C(DC_I2C_TRANSACTION0, id), SR_ARR_I2C(DC_I2C_TRANSACTION1, id),\ SR_ARR_I2C(DC_I2C_TRANSACTION2, id), SR_ARR_I2C(DC_I2C_TRANSACTION3, id),\ - SR_ARR_I2C(DC_I2C_DATA, id), SR_ARR_I2C(MICROSECOND_TIME_BASE_DIV, id) \ - ) + SR_ARR_I2C(DC_I2C_DATA, id), SR_ARR_I2C(MICROSECOND_TIME_BASE_DIV, id) #define I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) \ - ( \ I2C_HW_ENGINE_COMMON_REG_LIST_RI(id), SR_ARR_I2C(DIO_MEM_PWR_CTRL, id), \ - SR_ARR_I2C(DIO_MEM_PWR_STATUS, id) \ - ) + SR_ARR_I2C(DIO_MEM_PWR_STATUS, id) #endif /* _DCN32_RESOURCE_H_ */ From 8e9a110cb22bbf8be33ad0113d5e2191ca446e30 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 13 Oct 2023 11:21:29 +0800 Subject: [PATCH 15/54] drm/amdkfd: clean up some inconsistent indenting No functional modification involved. drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:305 svm_range_free() warn: inconsistent indenting. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6804 Signed-off-by: Jiapeng Chong Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f4038b33c404..eef76190800c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -302,7 +302,7 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap) for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) { if (prange->dma_addr[gpuidx]) { kvfree(prange->dma_addr[gpuidx]); - prange->dma_addr[gpuidx] = NULL; + prange->dma_addr[gpuidx] = NULL; } } From 5509e59611368da61280941e6a24cf2c9fc750e3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 17 Oct 2023 09:06:16 +0100 Subject: [PATCH 16/54] drm/amd/display: Fix a handful of spelling mistakes in dml_print output There are a few spelling mistakes and an minor grammatical issue in some dml_print messages. Fix these. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index fddd52f3f601..b2e187098fbc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -1507,7 +1507,7 @@ static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st * dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", s->TimeForFetchingMetaPTE); dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", s->TimeForFetchingRowInVBlank); dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime); - dml_print("DML: To: %fus - time for propogation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime); + dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime); dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingMetaPTE - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); @@ -9306,7 +9306,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required && locals->ImmediateFlipSupportedForPipe[k] == false) { locals->ImmediateFlipSupported = false; #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Pipe %0d not supporing iflip\n", __func__, k); + dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); #endif } } @@ -9359,7 +9359,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc if (locals->PrefetchAndImmediateFlipSupported) { dml_print("DML::%s: Good, Prefetch and flip scheduling solution found at VStartupLines=%u (MaxVStartupAllPlanes=%u)\n", __func__, s->VStartupLines-1, s->MaxVStartupAllPlanes); } else { - dml_print("DML::%s: Bad, Prefetch and flip scheduling soluation NOT found solution! (MaxVStartupAllPlanes=%u)\n", __func__, s->MaxVStartupAllPlanes); + dml_print("DML::%s: Bad, Prefetch and flip scheduling solution did NOT find solution! (MaxVStartupAllPlanes=%u)\n", __func__, s->MaxVStartupAllPlanes); } //Watermarks and NB P-State/DRAM Clock Change Support From b1338a8e71acaf68892b390dee0271fe7323b64d Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Tue, 17 Oct 2023 21:49:09 +0800 Subject: [PATCH 17/54] drm/amdgpu: Workaround to skip kiq ring test during ras gpu recovery This is workaround, kiq ring test failed in suspend stage when do ras recovery. Signed-off-by: Stanley.Yang Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 9a158018ae16..c92e0aba69e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -29,6 +29,7 @@ #include "amdgpu_rlc.h" #include "amdgpu_ras.h" #include "amdgpu_xcp.h" +#include "amdgpu_xgmi.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -501,6 +502,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; + struct amdgpu_hive_info *hive; + struct amdgpu_ras *ras; + int hive_ras_recovery = 0; int i, r = 0; int j; @@ -521,6 +525,23 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) RESET_QUEUES, 0, 0); } + /** + * This is workaround: only skip kiq_ring test + * during ras recovery in suspend stage for gfx9.4.3 + */ + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + + ras = amdgpu_ras_get_context(adev); + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) && + ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) { + spin_unlock(&kiq->ring_lock); + return 0; + } + if (kiq_ring->sched.ready && !adev->job_hang) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); From e6f8588733342c61948fde673a862b53c0d972bc Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 17 Oct 2023 16:51:03 -0400 Subject: [PATCH 18/54] drm/amdgpu: Fix possible null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit abo->tbo.resource may be NULL in amdgpu_vm_bo_update. Fixes: 180253782038 ("drm/ttm: stop allocating dummy resources during BO creation") Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index afc19341334f..f3c9f93d8899 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1090,7 +1090,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct drm_gem_object *gobj = dma_buf->priv; struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); - if (abo->tbo.resource->mem_type == TTM_PL_VRAM) + if (abo->tbo.resource && + abo->tbo.resource->mem_type == TTM_PL_VRAM) bo = gem_to_amdgpu_bo(gobj); } mem = bo->tbo.resource; From 207430b76a48b0b245bab08efe346148a5558df7 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 17 Jul 2023 15:28:52 -0400 Subject: [PATCH 19/54] drm/amdgpu: Reserve fences for VM update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In amdgpu_dma_buf_move_notify reserve fences for the page table updates in amdgpu_vm_clear_freed and amdgpu_vm_handle_moved. This fixes a BUG_ON in dma_resv_add_fence when using SDMA for page table updates. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 76b618735dc0..b5e28fa3f414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -404,7 +404,10 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) continue; } - r = amdgpu_vm_clear_freed(adev, vm, NULL); + /* Reserve fences for two SDMA page table updates */ + r = dma_resv_reserve_fences(resv, 2); + if (!r) + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r) r = amdgpu_vm_handle_moved(adev, vm); From afcf949cf331de791e3fbfc65c0bb82dd9df6d57 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 18 Oct 2023 15:16:39 +0800 Subject: [PATCH 20/54] drm/amdgpu: Log UE corrected by replay as correctable error Support replay mode where UE could be converted to CE. Signed-off-by: Candice Li Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index b664ee3ee92d..025e6aeb058d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -105,7 +105,9 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 || + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0))) *error_count += 1; } @@ -125,7 +127,6 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) @@ -293,7 +294,7 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1) { mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); From 1958946858a62b6b5392ed075aa219d199bcae39 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Thu, 12 Oct 2023 09:33:45 +0800 Subject: [PATCH 21/54] drm/amd/pm: Support for getting power1_cap_min value Support for getting power1_cap_min value on smu13 and smu11. For other Asics, we still use 0 as the default value. Signed-off-by: Ma Jun Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 14 +++---- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 ++++++--- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 8 ++-- .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 35 ++++++++++------- .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 33 ++++++++++------ .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 38 +++++++++++-------- .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 5 ++- .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 13 +++++-- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 34 +++++++++++------ .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 9 +++-- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 34 +++++++++++------ drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 2 +- 12 files changed, 151 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index c7c29aa24c36..d76c3abf406f 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2921,14 +2921,6 @@ static ssize_t amdgpu_hwmon_show_power_input(struct device *dev, return sysfs_emit(buf, "%zd\n", val); } -static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%i\n", 0); -} - - static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev, struct device_attribute *attr, char *buf, @@ -2965,6 +2957,12 @@ static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev, return size; } +static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN); +} static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, struct device_attribute *attr, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 7c3356d6da5e..7087f9840ab7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -849,7 +849,8 @@ static int smu_late_init(void *handle) ret = smu_get_asic_power_limits(smu, &smu->current_power_limit, &smu->default_power_limit, - &smu->max_power_limit); + &smu->max_power_limit, + &smu->min_power_limit); if (ret) { dev_err(adev->dev, "Failed to get asic power limits!\n"); return ret; @@ -2447,6 +2448,8 @@ int smu_get_power_limit(void *handle, limit_level = SMU_PPT_LIMIT_MAX; break; case PP_PWR_LIMIT_MIN: + limit_level = SMU_PPT_LIMIT_MIN; + break; default: return -EOPNOTSUPP; break; @@ -2466,8 +2469,7 @@ int smu_get_power_limit(void *handle, case IP_VERSION(11, 0, 13): ret = smu_get_asic_power_limits(smu, &smu->current_power_limit, - NULL, - NULL); + NULL, NULL, NULL); break; default: break; @@ -2480,6 +2482,9 @@ int smu_get_power_limit(void *handle, case SMU_PPT_LIMIT_MAX: *limit = smu->max_power_limit; break; + case SMU_PPT_LIMIT_MIN: + *limit = smu->min_power_limit; + break; default: break; } @@ -2502,10 +2507,10 @@ static int smu_set_power_limit(void *handle, uint32_t limit) if (smu->ppt_funcs->set_power_limit) return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); - if (limit > smu->max_power_limit) { + if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { dev_err(smu->adev->dev, - "New power limit (%d) is over the max allowed %d\n", - limit, smu->max_power_limit); + "New power limit (%d) is out of range [%d,%d]\n", + limit, smu->min_power_limit, smu->max_power_limit); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 1454eed76604..839553a86aa2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -500,6 +500,7 @@ struct smu_context { uint32_t current_power_limit; uint32_t default_power_limit; uint32_t max_power_limit; + uint32_t min_power_limit; /* soft pptable */ uint32_t ppt_offset_bytes; @@ -821,9 +822,10 @@ struct pptable_funcs { * @get_power_limit: Get the device's power limits. */ int (*get_power_limit)(struct smu_context *smu, - uint32_t *current_power_limit, - uint32_t *default_power_limit, - uint32_t *max_power_limit); + uint32_t *current_power_limit, + uint32_t *default_power_limit, + uint32_t *max_power_limit, + uint32_t *min_power_limit); /** * @get_ppt_limit: Get the device's ppt limits. diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 6109784a3ed5..2cb6b68222ba 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1278,14 +1278,15 @@ static int arcturus_get_fan_parameters(struct smu_context *smu) } static int arcturus_get_power_limit(struct smu_context *smu, - uint32_t *current_power_limit, - uint32_t *default_power_limit, - uint32_t *max_power_limit) + uint32_t *current_power_limit, + uint32_t *default_power_limit, + uint32_t *max_power_limit, + uint32_t *min_power_limit) { struct smu_11_0_powerplay_table *powerplay_table = (struct smu_11_0_powerplay_table *)smu->smu_table.power_play_table; PPTable_t *pptable = smu->smu_table.driver_pptable; - uint32_t power_limit, od_percent; + uint32_t power_limit, od_percent_upper, od_percent_lower; if (smu_v11_0_get_current_power_limit(smu, &power_limit)) { /* the last hope to figure out the ppt limit */ @@ -1302,17 +1303,25 @@ static int arcturus_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; + if (smu->od_enabled) { + od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); + } else { + od_percent_upper = 0; + od_percent_lower = 100; + } + + dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", + od_percent_upper, od_percent_lower, power_limit); + if (max_power_limit) { - if (smu->od_enabled) { - od_percent = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); + *max_power_limit = power_limit * (100 + od_percent_upper); + *max_power_limit /= 100; + } - dev_dbg(smu->adev->dev, "ODSETTING_POWERPERCENTAGE: %d (default: %d)\n", od_percent, power_limit); - - power_limit *= (100 + od_percent); - power_limit /= 100; - } - - *max_power_limit = power_limit; + if (min_power_limit) { + *min_power_limit = power_limit * (100 - od_percent_lower); + *min_power_limit /= 100; } return 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index bbe0cb68ca30..a38233cc5b7f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2330,15 +2330,16 @@ static int navi10_display_disable_memory_clock_switch(struct smu_context *smu, } static int navi10_get_power_limit(struct smu_context *smu, - uint32_t *current_power_limit, - uint32_t *default_power_limit, - uint32_t *max_power_limit) + uint32_t *current_power_limit, + uint32_t *default_power_limit, + uint32_t *max_power_limit, + uint32_t *min_power_limit) { struct smu_11_0_powerplay_table *powerplay_table = (struct smu_11_0_powerplay_table *)smu->smu_table.power_play_table; struct smu_11_0_overdrive_table *od_settings = smu->od_settings; PPTable_t *pptable = smu->smu_table.driver_pptable; - uint32_t power_limit, od_percent; + uint32_t power_limit, od_percent_upper, od_percent_lower; if (smu_v11_0_get_current_power_limit(smu, &power_limit)) { /* the last hope to figure out the ppt limit */ @@ -2355,18 +2356,26 @@ static int navi10_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (max_power_limit) { - if (smu->od_enabled && + if (smu->od_enabled && navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) { - od_percent = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); + od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); + } else { + od_percent_upper = 0; + od_percent_lower = 100; + } - dev_dbg(smu->adev->dev, "ODSETTING_POWERPERCENTAGE: %d (default: %d)\n", od_percent, power_limit); + dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", + od_percent_upper, od_percent_lower, power_limit); - power_limit *= (100 + od_percent); - power_limit /= 100; - } + if (max_power_limit) { + *max_power_limit = power_limit * (100 + od_percent_upper); + *max_power_limit /= 100; + } - *max_power_limit = power_limit; + if (min_power_limit) { + *min_power_limit = power_limit * (100 - od_percent_lower); + *min_power_limit /= 100; } return 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 1e710c0c9042..090249b6422a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -620,11 +620,12 @@ static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *s static int sienna_cichlid_get_power_limit(struct smu_context *smu, uint32_t *current_power_limit, uint32_t *default_power_limit, - uint32_t *max_power_limit) + uint32_t *max_power_limit, + uint32_t *min_power_limit) { struct smu_11_0_7_powerplay_table *powerplay_table = (struct smu_11_0_7_powerplay_table *)smu->smu_table.power_play_table; - uint32_t power_limit, od_percent; + uint32_t power_limit, od_percent_upper, od_percent_lower; uint16_t *table_member; GET_PPTABLE_MEMBER(SocketPowerLimitAc, &table_member); @@ -639,21 +640,26 @@ static int sienna_cichlid_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (max_power_limit) { - if (smu->od_enabled) { - od_percent = - le32_to_cpu(powerplay_table->overdrive_table.max[ - SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); - - dev_dbg(smu->adev->dev, "ODSETTING_POWERPERCENTAGE: %d (default: %d)\n", - od_percent, power_limit); - - power_limit *= (100 + od_percent); - power_limit /= 100; - } - *max_power_limit = power_limit; + if (smu->od_enabled) { + od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); + } else { + od_percent_upper = 0; + od_percent_lower = 100; } + dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", + od_percent_upper, od_percent_lower, power_limit); + + if (max_power_limit) { + *max_power_limit = power_limit * (100 + od_percent_upper); + *max_power_limit /= 100; + } + + if (min_power_limit) { + *min_power_limit = power_limit * (100 - od_percent_lower); + *min_power_limit /= 100; + } return 0; } @@ -672,7 +678,7 @@ static void sienna_cichlid_get_smartshift_power_percentage(struct smu_context *s uint32_t cur_power_limit; if (metrics_v4->ApuSTAPMSmartShiftLimit != 0) { - sienna_cichlid_get_power_limit(smu, &cur_power_limit, NULL, NULL); + sienna_cichlid_get_power_limit(smu, &cur_power_limit, NULL, NULL, NULL); apu_power_limit = metrics_v4->ApuSTAPMLimit; dgpu_power_limit = cur_power_limit; powerRatio = (((apu_power_limit + diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 8c13aa469f60..3efc6aed28f1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -2314,7 +2314,8 @@ static u32 vangogh_get_gfxoff_status(struct smu_context *smu) static int vangogh_get_power_limit(struct smu_context *smu, uint32_t *current_power_limit, uint32_t *default_power_limit, - uint32_t *max_power_limit) + uint32_t *max_power_limit, + uint32_t *min_power_limit) { struct smu_11_5_power_context *power_context = smu->smu_power.power_context; @@ -2336,6 +2337,8 @@ static int vangogh_get_power_limit(struct smu_context *smu, *default_power_limit = ppt_limit / 1000; if (max_power_limit) *max_power_limit = 29; + if (min_power_limit) + *min_power_limit = 0; ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetFastPPTLimit, &ppt_limit); if (ret) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index bdb278f5c654..f082cd4b40c1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1139,9 +1139,10 @@ static int aldebaran_read_sensor(struct smu_context *smu, } static int aldebaran_get_power_limit(struct smu_context *smu, - uint32_t *current_power_limit, - uint32_t *default_power_limit, - uint32_t *max_power_limit) + uint32_t *current_power_limit, + uint32_t *default_power_limit, + uint32_t *max_power_limit, + uint32_t *min_power_limit) { PPTable_t *pptable = smu->smu_table.driver_pptable; uint32_t power_limit = 0; @@ -1154,7 +1155,8 @@ static int aldebaran_get_power_limit(struct smu_context *smu, *default_power_limit = 0; if (max_power_limit) *max_power_limit = 0; - + if (min_power_limit) + *min_power_limit = 0; dev_warn(smu->adev->dev, "PPT feature is not enabled, power values can't be fetched."); @@ -1189,6 +1191,9 @@ static int aldebaran_get_power_limit(struct smu_context *smu, *max_power_limit = pptable->PptLimit; } + if (min_power_limit) + *min_power_limit = 0; + return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 60e81f015407..bcd7b39a3a1b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2341,16 +2341,17 @@ static int smu_v13_0_0_enable_mgpu_fan_boost(struct smu_context *smu) } static int smu_v13_0_0_get_power_limit(struct smu_context *smu, - uint32_t *current_power_limit, - uint32_t *default_power_limit, - uint32_t *max_power_limit) + uint32_t *current_power_limit, + uint32_t *default_power_limit, + uint32_t *max_power_limit, + uint32_t *min_power_limit) { struct smu_table_context *table_context = &smu->smu_table; struct smu_13_0_0_powerplay_table *powerplay_table = (struct smu_13_0_0_powerplay_table *)table_context->power_play_table; PPTable_t *pptable = table_context->driver_pptable; SkuTable_t *skutable = &pptable->SkuTable; - uint32_t power_limit, od_percent; + uint32_t power_limit, od_percent_upper, od_percent_lower; if (smu_v13_0_get_current_power_limit(smu, &power_limit)) power_limit = smu->adev->pm.ac_power ? @@ -2362,16 +2363,25 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; + if (smu->od_enabled) { + od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); + } else { + od_percent_upper = 0; + od_percent_lower = 100; + } + + dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", + od_percent_upper, od_percent_lower, power_limit); + if (max_power_limit) { - if (smu->od_enabled) { - od_percent = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); + *max_power_limit = power_limit * (100 + od_percent_upper); + *max_power_limit /= 100; + } - dev_dbg(smu->adev->dev, "ODSETTING_POWERPERCENTAGE: %d (default: %d)\n", od_percent, power_limit); - - power_limit *= (100 + od_percent); - power_limit /= 100; - } - *max_power_limit = power_limit; + if (min_power_limit) { + *min_power_limit = power_limit * (100 - od_percent_lower); + *min_power_limit /= 100; } return 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 15daaf7fa2f5..f9f40d8edee4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1309,9 +1309,10 @@ static int smu_v13_0_6_read_sensor(struct smu_context *smu, } static int smu_v13_0_6_get_power_limit(struct smu_context *smu, - uint32_t *current_power_limit, - uint32_t *default_power_limit, - uint32_t *max_power_limit) + uint32_t *current_power_limit, + uint32_t *default_power_limit, + uint32_t *max_power_limit, + uint32_t *min_power_limit) { struct smu_table_context *smu_table = &smu->smu_table; struct PPTable_t *pptable = @@ -1335,6 +1336,8 @@ static int smu_v13_0_6_get_power_limit(struct smu_context *smu, *max_power_limit = pptable->MaxSocketPowerLimit; } + if (min_power_limit) + *min_power_limit = 0; return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 3c5526babb14..ac0e1cc812bd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2304,16 +2304,17 @@ static int smu_v13_0_7_enable_mgpu_fan_boost(struct smu_context *smu) } static int smu_v13_0_7_get_power_limit(struct smu_context *smu, - uint32_t *current_power_limit, - uint32_t *default_power_limit, - uint32_t *max_power_limit) + uint32_t *current_power_limit, + uint32_t *default_power_limit, + uint32_t *max_power_limit, + uint32_t *min_power_limit) { struct smu_table_context *table_context = &smu->smu_table; struct smu_13_0_7_powerplay_table *powerplay_table = (struct smu_13_0_7_powerplay_table *)table_context->power_play_table; PPTable_t *pptable = table_context->driver_pptable; SkuTable_t *skutable = &pptable->SkuTable; - uint32_t power_limit, od_percent; + uint32_t power_limit, od_percent_upper, od_percent_lower; if (smu_v13_0_get_current_power_limit(smu, &power_limit)) power_limit = smu->adev->pm.ac_power ? @@ -2325,16 +2326,25 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; + if (smu->od_enabled) { + od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); + } else { + od_percent_upper = 0; + od_percent_lower = 100; + } + + dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", + od_percent_upper, od_percent_lower, power_limit); + if (max_power_limit) { - if (smu->od_enabled) { - od_percent = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); + *max_power_limit = power_limit * (100 + od_percent_upper); + *max_power_limit /= 100; + } - dev_dbg(smu->adev->dev, "ODSETTING_POWERPERCENTAGE: %d (default: %d)\n", od_percent, power_limit); - - power_limit *= (100 + od_percent); - power_limit /= 100; - } - *max_power_limit = power_limit; + if (min_power_limit) { + *min_power_limit = power_limit * (100 - od_percent_lower); + *min_power_limit /= 100; } return 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h index bcc42abfc768..80b3c3efc006 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h @@ -85,7 +85,7 @@ #define smu_i2c_fini(smu) smu_ppt_funcs(i2c_fini, 0, smu) #define smu_get_unique_id(smu) smu_ppt_funcs(get_unique_id, 0, smu) #define smu_log_thermal_throttling(smu) smu_ppt_funcs(log_thermal_throttling_event, 0, smu) -#define smu_get_asic_power_limits(smu, current, default, max) smu_ppt_funcs(get_power_limit, 0, smu, current, default, max) +#define smu_get_asic_power_limits(smu, current, default, max, min) smu_ppt_funcs(get_power_limit, 0, smu, current, default, max, min) #define smu_get_pp_feature_mask(smu, buf) smu_ppt_funcs(get_pp_feature_mask, 0, smu, buf) #define smu_set_pp_feature_mask(smu, new_mask) smu_ppt_funcs(set_pp_feature_mask, 0, smu, new_mask) #define smu_gfx_ulv_control(smu, enablement) smu_ppt_funcs(gfx_ulv_control, 0, smu, enablement) From f9caf6cdd5cc1f4006fd7b6b113658c0b0159f23 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Thu, 19 Oct 2023 14:39:48 +0800 Subject: [PATCH 22/54] drm/amdkfd:remove unused code Function svm_range_split_by_grinity is not used, so it is removed. Signed-off-by: Jesse Zhang Suggested-by: Philip Yang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 60 ---------------------------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 3 -- 2 files changed, 63 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index eef76190800c..ad4e1387e0b0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1141,66 +1141,6 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, list_add_tail(&pchild->child_list, &prange->child_list); } -/** - * svm_range_split_by_granularity - collect ranges within granularity boundary - * - * @p: the process with svms list - * @mm: mm structure - * @addr: the vm fault address in pages, to split the prange - * @parent: parent range if prange is from child list - * @prange: prange to split - * - * Trims @prange to be a single aligned block of prange->granularity if - * possible. The head and tail are added to the child_list in @parent. - * - * Context: caller must hold mmap_read_lock and prange->lock - * - * Return: - * 0 - OK, otherwise error code - */ -int -svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, - unsigned long addr, struct svm_range *parent, - struct svm_range *prange) -{ - struct svm_range *head, *tail; - unsigned long start, last, size; - int r; - - /* Align splited range start and size to granularity size, then a single - * PTE will be used for whole range, this reduces the number of PTE - * updated and the L1 TLB space used for translation. - */ - size = 1UL << prange->granularity; - start = ALIGN_DOWN(addr, size); - last = ALIGN(addr + 1, size) - 1; - - pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n", - prange->svms, prange->start, prange->last, start, last, size); - - if (start > prange->start) { - r = svm_range_split(prange, start, prange->last, &head); - if (r) - return r; - svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE); - } - - if (last < prange->last) { - r = svm_range_split(prange, prange->start, last, &tail); - if (r) - return r; - svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); - } - - /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ - if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) { - prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP; - pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n", - prange, prange->start, prange->last, - SVM_OP_ADD_RANGE_AND_MAP); - } - return 0; -} static bool svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index be11ba0c4289..026863a0abcd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -172,9 +172,6 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange, int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); -int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, - unsigned long addr, struct svm_range *parent, - struct svm_range *prange); int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, uint64_t addr, bool write_fault); From 472c5fb29798695b589fb844f84c6bf4ff07c592 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 11 Oct 2023 18:36:15 +0800 Subject: [PATCH 23/54] drm/amdgpu: define ras_reset_error_count function Make the code architecture more simple. v2: reuse ras_reset_error_count in ras_reset_error_status. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 +++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 ++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index c7f8dcb3b4d2..a4384f0eb8bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1170,23 +1170,34 @@ out_fini_err_data: return ret; } -int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, +int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block block) { struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); if (!block_obj || !block_obj->hw_ops) { dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", - ras_block_str(block)); - return 0; + ras_block_str(block)); + return -EOPNOTSUPP; } if (!amdgpu_ras_is_supported(adev, block)) - return 0; + return -EOPNOTSUPP; if (block_obj->hw_ops->reset_ras_error_count) block_obj->hw_ops->reset_ras_error_count(adev); + return 0; +} + +int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); + + if (amdgpu_ras_reset_error_count(adev, block) == -EOPNOTSUPP) + return 0; + if ((block == AMDGPU_RAS_BLOCK__GFX) || (block == AMDGPU_RAS_BLOCK__MMHUB)) { if (block_obj->hw_ops->reset_ras_error_status) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 0a5c8a107fb2..3f9ac0ab67e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -714,6 +714,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev); int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info); +int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, + enum amdgpu_ras_block block); int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, enum amdgpu_ras_block block); From 9248462d7e0862883df6741ec0e1bb41c3698b22 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 28 Sep 2023 17:00:31 +0800 Subject: [PATCH 24/54] drm/amdgpu: Enable software RAS in vcn v4_0_3 Set VCN/JPEG RAS masks to enable software RAS for VCN and JPEG. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a4384f0eb8bf..0df15d856085 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2622,7 +2622,9 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) || amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(4, 0, 0)) + IP_VERSION(4, 0, 0) || + amdgpu_ip_version(adev, VCN_HWIP, 0) == + IP_VERSION(4, 0, 3)) adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | 1 << AMDGPU_RAS_BLOCK__JPEG); else From 08e9ebc75b5bcfec9d226f9e16bab2ab7b25a39a Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Tue, 17 Oct 2023 16:01:35 +0200 Subject: [PATCH 25/54] drm/amd/pm: Handle non-terminated overdrive commands. The incoming strings might not be terminated by a newline or a 0. (found while testing a program that just wrote the string itself, causing a crash) Cc: stable@vger.kernel.org Fixes: e3933f26b657 ("drm/amd/pp: Add edit/commit/show OD clock/voltage support in sysfs") Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index d76c3abf406f..358bb5e485f2 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -761,7 +761,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, if (adev->in_suspend && !adev->in_runpm) return -EPERM; - if (count > 127) + if (count > 127 || count == 0) return -EINVAL; if (*buf == 's') @@ -781,7 +781,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, else return -EINVAL; - memcpy(buf_cpy, buf, count+1); + memcpy(buf_cpy, buf, count); + buf_cpy[count] = 0; tmp_str = buf_cpy; @@ -798,6 +799,9 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, return -EINVAL; parameter_size++; + if (!tmp_str) + break; + while (isspace(*tmp_str)) tmp_str++; } From 803c2707c21df5d82473831aac74969924e028c5 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 28 Sep 2023 09:13:51 +0800 Subject: [PATCH 26/54] drm/amd/display: clean up some inconsistent indentings drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn35/dcn35_fpu.c:261 dcn35_update_bw_bounding_box_fpu() warn: inconsistent indenting Signed-off-by: Yang Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 142 +++++++++--------- 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index be345f470b25..a5fe523668e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -236,85 +236,85 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, dc_assert_fp_enabled(); - dcn3_5_ip.max_num_otg = - dc->res_pool->res_cap->num_timing_generator; - dcn3_5_ip.max_num_dpp = dc->res_pool->pipe_count; - dcn3_5_soc.num_chans = bw_params->num_channels; + dcn3_5_ip.max_num_otg = + dc->res_pool->res_cap->num_timing_generator; + dcn3_5_ip.max_num_dpp = dc->res_pool->pipe_count; + dcn3_5_soc.num_chans = bw_params->num_channels; - ASSERT(clk_table->num_entries); + ASSERT(clk_table->num_entries); - /* Prepass to find max clocks independent of voltage level. */ - for (i = 0; i < clk_table->num_entries; ++i) { - if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; - if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_5_soc.num_states - 1; + j >= 0; j--) { + if (dcn3_5_soc.clock_limits[j].dcfclk_mhz <= + clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + if (clk_table->num_entries == 1) { + /*smu gives one DPM level, let's take the highest one*/ + closest_clk_lvl = dcn3_5_soc.num_states - 1; } - for (i = 0; i < clk_table->num_entries; i++) { - /* loop backwards*/ - for (closest_clk_lvl = 0, j = dcn3_5_soc.num_states - 1; - j >= 0; j--) { - if (dcn3_5_soc.clock_limits[j].dcfclk_mhz <= - clk_table->entries[i].dcfclk_mhz) { - closest_clk_lvl = j; - break; - } - } - if (clk_table->num_entries == 1) { - /*smu gives one DPM level, let's take the highest one*/ - closest_clk_lvl = dcn3_5_soc.num_states - 1; - } + clock_limits[i].state = i; - clock_limits[i].state = i; - - /* Clocks dependent on voltage level. */ - clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - if (clk_table->num_entries == 1 && - clock_limits[i].dcfclk_mhz < - dcn3_5_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { - /*SMU fix not released yet*/ - clock_limits[i].dcfclk_mhz = - dcn3_5_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; - } - - clock_limits[i].fabricclk_mhz = - clk_table->entries[i].fclk_mhz; - clock_limits[i].socclk_mhz = - clk_table->entries[i].socclk_mhz; - - if (clk_table->entries[i].memclk_mhz && - clk_table->entries[i].wck_ratio) - clock_limits[i].dram_speed_mts = - clk_table->entries[i].memclk_mhz * 2 * - clk_table->entries[i].wck_ratio; - - /* Clocks independent of voltage level. */ - clock_limits[i].dispclk_mhz = max_dispclk_mhz ? - max_dispclk_mhz : - dcn3_5_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - - clock_limits[i].dppclk_mhz = max_dppclk_mhz ? - max_dppclk_mhz : - dcn3_5_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - - clock_limits[i].dram_bw_per_chan_gbps = - dcn3_5_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - clock_limits[i].dscclk_mhz = - dcn3_5_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - clock_limits[i].dtbclk_mhz = - dcn3_5_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - clock_limits[i].phyclk_d18_mhz = - dcn3_5_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - clock_limits[i].phyclk_mhz = - dcn3_5_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + /* Clocks dependent on voltage level. */ + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + if (clk_table->num_entries == 1 && + clock_limits[i].dcfclk_mhz < + dcn3_5_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { + /*SMU fix not released yet*/ + clock_limits[i].dcfclk_mhz = + dcn3_5_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; } - memcpy(dcn3_5_soc.clock_limits, clock_limits, - sizeof(dcn3_5_soc.clock_limits)); + clock_limits[i].fabricclk_mhz = + clk_table->entries[i].fclk_mhz; + clock_limits[i].socclk_mhz = + clk_table->entries[i].socclk_mhz; - if (clk_table->num_entries) - dcn3_5_soc.num_states = clk_table->num_entries; + if (clk_table->entries[i].memclk_mhz && + clk_table->entries[i].wck_ratio) + clock_limits[i].dram_speed_mts = + clk_table->entries[i].memclk_mhz * 2 * + clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + clock_limits[i].dispclk_mhz = max_dispclk_mhz ? + max_dispclk_mhz : + dcn3_5_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + clock_limits[i].dppclk_mhz = max_dppclk_mhz ? + max_dppclk_mhz : + dcn3_5_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + clock_limits[i].dram_bw_per_chan_gbps = + dcn3_5_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + clock_limits[i].dscclk_mhz = + dcn3_5_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + clock_limits[i].dtbclk_mhz = + dcn3_5_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + clock_limits[i].phyclk_d18_mhz = + dcn3_5_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + clock_limits[i].phyclk_mhz = + dcn3_5_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + + memcpy(dcn3_5_soc.clock_limits, clock_limits, + sizeof(dcn3_5_soc.clock_limits)); + + if (clk_table->num_entries) + dcn3_5_soc.num_states = clk_table->num_entries; if (max_dispclk_mhz) { dcn3_5_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; From 00e6bcbd11570683bd0dd9bbce7446db436b71ab Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 18 Oct 2023 09:02:03 +0800 Subject: [PATCH 27/54] drm/amd/display: Remove duplicated include in dce110_hwseq.c ./drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c: dce110_hwseq.h is included more than once. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6897 Signed-off-by: Yang Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 74602a5fd6dd..51e42cbb3cdb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -65,8 +65,6 @@ #include "dcn10/dcn10_hwseq.h" -#include "dce110_hwseq.h" - #define GAMMA_HW_POINTS_NUM 256 /* From 7e653e01a08c884c2a11cbedd3ab8bc939abb56d Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 18 Oct 2023 09:16:14 +0800 Subject: [PATCH 28/54] drm/amd/display: Remove unneeded semicolon ./drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c:464:3-4: Unneeded semicolon Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6900 Signed-off-by: Yang Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index 36baf35bb170..f45fbe820445 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -461,7 +461,7 @@ static void sort_pipes_for_splitting(struct dc_plane_pipe_pool *pipes) swapped = false; } - }; + } } } From 30440201edb0eb20352f5dfb509d407f4015259d Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 18 Oct 2023 09:22:32 +0800 Subject: [PATCH 29/54] drm/amd/display: Simplify bool conversion ./drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c:4802:84-89: WARNING: conversion to bool not needed here Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6901 Signed-off-by: Yang Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index b2e187098fbc..cff53d28d3c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -4799,7 +4799,7 @@ static void CalculateSurfaceSizeInMall( if (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable) TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; } - *ExceededMALLSize = (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true); + *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024); } // CalculateSurfaceSizeInMall static void CalculateDETBufferSize( From b63eae94d28ccdb46c49803c6552876e5c907f4c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 19 Oct 2023 11:38:26 +0800 Subject: [PATCH 30/54] drm/amd/display: clean up some inconsistent indenting No functional modification involved. drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:2902 dm_resume() warn: inconsistent indenting. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6940 Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2ac940a38703..82965723f0b6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2900,7 +2900,7 @@ static int dm_resume(void *handle) } /* power on hardware */ - dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); /* program HPD filter */ dc_resume(dm->dc); From 089dbf6a06f1dcaeed4f8b86d619e8d28b235207 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 18 Oct 2023 11:45:52 -0700 Subject: [PATCH 31/54] drm/amd/display: Respect CONFIG_FRAME_WARN=0 in DML2 display_mode_code.c is unconditionally built with -Wframe-larger-than=2048, which causes warnings even when CONFIG_FRAME_WARN has been set to 0, which should show no warnings. Use the existing $(frame_warn_flag) variable, which handles this situation. This is basically commit 25f178bbd078 ("drm/amd/display: Respect CONFIG_FRAME_WARN=0 in dml Makefile") but for DML2. Fixes: 7966f319c66d ("drm/amd/display: Introduce DML2") Signed-off-by: Nathan Chancellor Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index f35ed8de260d..66431525f2a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -61,7 +61,7 @@ ifneq ($(CONFIG_FRAME_WARN),0) frame_warn_flag := -Wframe-larger-than=2048 endif -CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) -Wframe-larger-than=2048 +CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_wrapper.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_utils.o := $(dml2_ccflags) From f2176d70638aaa1fa2a1c3068f0acedcb271a8aa Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 28 Sep 2023 17:05:59 +0800 Subject: [PATCH 32/54] drm/amdgpu: Add UVD_VCPU_INT_EN2 to dpg sram Add RAS sepcifc programming to dpg sram. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index f85d18cd74ec..810bbfccd6f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1760,6 +1760,11 @@ static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev, SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL), tmp, 0, indirect); + tmp = UVD_VCPU_INT_EN2__RASCNTL_VCPU_VCODEC_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_VCPU_INT_EN2), + tmp, 0, indirect); + tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN), From ce43a5fa2eca33e602dc3c276ee7acfb458e74d3 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Wed, 18 Oct 2023 20:10:34 +0800 Subject: [PATCH 33/54] drm/amdgpu: Enable mca debug mode mode when ras enabled Enable smu_v13_0_6 mca debug mode if ras is enabled. Changed from V1: enable mca debug mode if ras enabled. Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index f9f40d8edee4..60eb6f8af187 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2303,7 +2303,7 @@ static int smu_v13_0_6_post_init(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - if (!amdgpu_sriov_vf(adev) && amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + if (!amdgpu_sriov_vf(adev) && adev->ras_enabled) return smu_v13_0_6_mca_set_debug_mode(smu, true); return 0; From 8a65661114941788a2093193c251e44cf1d6439c Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Thu, 19 Oct 2023 14:55:57 +0800 Subject: [PATCH 34/54] drm/amdgpu: Fix delete nodes that have been relesed Fix delete nodes that it has been freed. Fixes: 5b1270beb380 ("drm/amdgpu: add ras_err_info to identify RAS error source") Signed-off-by: Stanley.Yang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 0df15d856085..b3c32c667e09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3503,10 +3503,8 @@ void amdgpu_ras_error_data_fini(struct ras_err_data *err_data) { struct ras_err_node *err_node, *tmp; - list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node) { + list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node) amdgpu_ras_error_node_release(err_node); - list_del(&err_node->node); - } } static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data *err_data, From 97b2821643f776c94ebcea79052f77e732d56f6d Mon Sep 17 00:00:00 2001 From: Bokun Zhang Date: Mon, 16 Oct 2023 12:48:02 -0400 Subject: [PATCH 35/54] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV - P1 - Update SRIOV header with RB decouple flag Signed-off-by: Bokun Zhang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 104a5ad8397d..51a14f6d93bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -90,10 +90,11 @@ union amd_sriov_msg_feature_flags { uint32_t host_load_ucodes : 1; uint32_t host_flr_vramlost : 1; uint32_t mm_bw_management : 1; - uint32_t pp_one_vf_mode : 1; + uint32_t pp_one_vf_mode : 1; uint32_t reg_indirect_acc : 1; uint32_t av1_support : 1; - uint32_t reserved : 25; + uint32_t vcn_rb_decouple : 1; + uint32_t reserved : 24; } flags; uint32_t all; }; From fc3136730ba3e606b1c892e041f0b8356bda5457 Mon Sep 17 00:00:00 2001 From: Bokun Zhang Date: Mon, 16 Oct 2023 12:49:11 -0400 Subject: [PATCH 36/54] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV - P2 - Add function to check if RB decouple is enabled under SRIOV Signed-off-by: Bokun Zhang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index fabb83e9d9ae..858ef21ae515 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -126,6 +126,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5), /* AV1 Support MODE*/ AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), + /* VCN RB decouple */ + AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -326,6 +328,8 @@ static inline bool is_virtual_machine(void) ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug)) #define amdgpu_sriov_is_av1_support(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) +#define amdgpu_sriov_is_vcn_rb_decouple(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, From eb9d6256b9b072b29193a3a051b2f7e76e0fd0de Mon Sep 17 00:00:00 2001 From: Bokun Zhang Date: Mon, 16 Oct 2023 12:49:58 -0400 Subject: [PATCH 37/54] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV - P3 - Update VCN header for RB decouple feature - Add metadata struct, metadata will be placed after each RB Signed-off-by: Bokun Zhang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 48 +++++++++++++++++++++---- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 0815c5a97564..514c98ea144f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -169,6 +169,9 @@ #define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) #define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11) #define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14) +#define AMDGPU_VCN_VF_RB_DECOUPLE_FLAG (1 << 15) + +#define MAX_NUM_VCN_RB_SETUP 4 #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 @@ -335,15 +338,30 @@ struct amdgpu_fw_shared { struct amdgpu_fw_shared_smu_interface_info smu_interface_info; }; +struct amdgpu_vcn_rb_setup_info { + uint32_t rb_addr_lo; + uint32_t rb_addr_hi; + uint32_t rb_size; +}; + struct amdgpu_fw_shared_rb_setup { uint32_t is_rb_enabled_flags; - uint32_t rb_addr_lo; - uint32_t rb_addr_hi; - uint32_t rb_size; - uint32_t rb4_addr_lo; - uint32_t rb4_addr_hi; - uint32_t rb4_size; - uint32_t reserved[6]; + + union { + struct { + uint32_t rb_addr_lo; + uint32_t rb_addr_hi; + uint32_t rb_size; + uint32_t rb4_addr_lo; + uint32_t rb4_addr_hi; + uint32_t rb4_size; + uint32_t reserved[6]; + }; + + struct { + struct amdgpu_vcn_rb_setup_info rb_info[MAX_NUM_VCN_RB_SETUP]; + }; + }; }; struct amdgpu_fw_shared_drm_key_wa { @@ -351,6 +369,11 @@ struct amdgpu_fw_shared_drm_key_wa { uint8_t reserved[3]; }; +struct amdgpu_fw_shared_queue_decouple { + uint8_t is_enabled; + uint8_t reserved[7]; +}; + struct amdgpu_vcn4_fw_shared { uint32_t present_flag_0; uint8_t pad[12]; @@ -361,6 +384,8 @@ struct amdgpu_vcn4_fw_shared { struct amdgpu_fw_shared_rb_setup rb_setup; struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; + struct amdgpu_fw_shared_queue_decouple decouple; }; struct amdgpu_vcn_fwlog { @@ -378,6 +403,15 @@ struct amdgpu_vcn_decode_buffer { uint32_t pad[30]; }; +struct amdgpu_vcn_rb_metadata { + uint32_t size; + uint32_t present_flag_0; + + uint8_t version; + uint8_t ring_id; + uint8_t pad[26]; +}; + #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 #define VCN_BLOCK_DECODE_DISABLE_MASK 0x40 #define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0 From 017634a68dab9c2ebdcd51b495ef6e53b95280cd Mon Sep 17 00:00:00 2001 From: Bokun Zhang Date: Mon, 16 Oct 2023 12:51:00 -0400 Subject: [PATCH 38/54] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV - P4 - In VCN 4 SRIOV code path, add code to enable RB decouple feature Signed-off-by: Bokun Zhang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 71 +++++++++++++++++++++------ 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 7ab9263d3e19..48bfcd0d558b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -176,9 +176,6 @@ static int vcn_v4_0_sw_init(void *handle) AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; } - if (amdgpu_sriov_vf(adev)) - fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); - if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } @@ -1209,6 +1206,24 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) return 0; } +static int vcn_v4_0_init_ring_metadata(struct amdgpu_device *adev, uint32_t vcn_inst, struct amdgpu_ring *ring_enc) +{ + struct amdgpu_vcn_rb_metadata *rb_metadata = NULL; + uint8_t *rb_ptr = (uint8_t *)ring_enc->ring; + + rb_ptr += ring_enc->ring_size; + rb_metadata = (struct amdgpu_vcn_rb_metadata *)rb_ptr; + + memset(rb_metadata, 0, sizeof(struct amdgpu_vcn_rb_metadata)); + rb_metadata->size = sizeof(struct amdgpu_vcn_rb_metadata); + rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG); + rb_metadata->version = 1; + rb_metadata->ring_id = vcn_inst & 0xFF; + + return 0; +} + static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) { int i; @@ -1331,11 +1346,30 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) rb_enc_addr = ring_enc->gpu_addr; rb_setup->is_rb_enabled_flags |= RB_ENABLED; - rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); - rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); - rb_setup->rb_size = ring_enc->ring_size / 4; fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + if (amdgpu_sriov_is_vcn_rb_decouple(adev)) { + vcn_v4_0_init_ring_metadata(adev, i, ring_enc); + + memset((void *)&rb_setup->rb_info, 0, sizeof(struct amdgpu_vcn_rb_setup_info) * MAX_NUM_VCN_RB_SETUP); + if (!(adev->vcn.harvest_config & (1 << 0))) { + rb_setup->rb_info[0].rb_addr_lo = lower_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr); + rb_setup->rb_info[0].rb_addr_hi = upper_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr); + rb_setup->rb_info[0].rb_size = adev->vcn.inst[0].ring_enc[0].ring_size / 4; + } + if (!(adev->vcn.harvest_config & (1 << 1))) { + rb_setup->rb_info[2].rb_addr_lo = lower_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr); + rb_setup->rb_info[2].rb_addr_hi = upper_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr); + rb_setup->rb_info[2].rb_size = adev->vcn.inst[1].ring_enc[0].ring_size / 4; + } + fw_shared->decouple.is_enabled = 1; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG); + } else { + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); + rb_setup->rb_size = ring_enc->ring_size / 4; + } + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); @@ -1807,6 +1841,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .extra_dw = sizeof(struct amdgpu_vcn_rb_metadata), .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, @@ -2020,16 +2055,20 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ { uint32_t ip_instance; - switch (entry->client_id) { - case SOC15_IH_CLIENTID_VCN: - ip_instance = 0; - break; - case SOC15_IH_CLIENTID_VCN1: - ip_instance = 1; - break; - default: - DRM_ERROR("Unhandled client id: %d\n", entry->client_id); - return 0; + if (amdgpu_sriov_is_vcn_rb_decouple(adev)) { + ip_instance = entry->ring_id; + } else { + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } } DRM_DEBUG("IH: VCN TRAP\n"); From 49c260bef3ac9fc1bb73acf98036dac64712536d Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 19 Oct 2023 14:59:19 +0800 Subject: [PATCH 39/54] drm/amdgpu: fix typo for amdgpu ras error data print typo fix. Fixes: 5b1270beb380 ("drm/amdgpu: add ras_err_info to identify RAS error source") Signed-off-by: Yang Wang Reviewed-by: Candice Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index b3c32c667e09..70dd249f2ba7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1038,7 +1038,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, ras_mgr->err_data.ue_count, blk_name); else dev_info(adev->dev, "%ld correctable hardware errors detected in %s block\n", - ras_mgr->err_data.ue_count, blk_name); + ras_mgr->err_data.ce_count, blk_name); for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; @@ -1055,7 +1055,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, "%lld correctable hardware errors detected in %s block\n", mcm_info->socket_id, mcm_info->die_id, - err_info->ue_count, + err_info->ce_count, blk_name); } } From 66d64e4e03ef5ecf330075a5f1fc449549ce374a Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Thu, 19 Oct 2023 22:49:32 +0800 Subject: [PATCH 40/54] drm/amdgpu: Enable RAS feature by default for APU Enable RAS feature by default for aqua vanjaram on apu platform. Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 70dd249f2ba7..eb81efacefe2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2653,18 +2653,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) /* hw_supported needs to be aligned with RAS block mask. */ adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK; - - /* - * Disable ras feature for aqua vanjaram - * by default on apu platform. - */ - if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) && - adev->gmc.is_app_apu) - adev->ras_enabled = amdgpu_ras_enable != 1 ? 0 : - adev->ras_hw_enabled & amdgpu_ras_mask; - else - adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : - adev->ras_hw_enabled & amdgpu_ras_mask; + adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : + adev->ras_hw_enabled & amdgpu_ras_mask; } static void amdgpu_ras_counte_dw(struct work_struct *work) From fa9dd7a285efbcf81dc0fc5a75bd9341e017c80f Mon Sep 17 00:00:00 2001 From: Li Ma Date: Mon, 16 Oct 2023 14:36:26 +0800 Subject: [PATCH 41/54] drm/amdgpu: fix missing stuff in NBIO v7.11 add get_clockgating_state, update_medium_grain_light_sleep and update_medium_grain_clock_gating in nbio_v7_11_funcs v1: add missing funcs in nbio_v7_11.c v2: modify the if condition and add spport for nbio v7.11 clockgating. Signed-off-by: Li Ma Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c | 78 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + .../asic_reg/nbio/nbio_7_11_0_offset.h | 6 ++ .../asic_reg/nbio/nbio_7_11_0_sh_mask.h | 13 +++- 4 files changed, 97 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 3a94f249929e..676ab1d20d2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -272,6 +272,81 @@ static void nbio_v7_11_init_registers(struct amdgpu_device *adev) */ } +static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL); + if (enable) { + data |= (BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL, data); +} + +static void nbio_v7_11_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2); + if (enable) + data |= BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + else + data &= ~BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1); + if (enable) { + data |= (BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1, data); +} + +static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL); + if (data & BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2); + if (data & BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + const struct amdgpu_nbio_funcs nbio_v7_11_funcs = { .get_hdp_flush_req_offset = nbio_v7_11_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_11_get_hdp_flush_done_offset, @@ -288,6 +363,9 @@ const struct amdgpu_nbio_funcs nbio_v7_11_funcs = { .enable_doorbell_aperture = nbio_v7_11_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_11_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_11_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_11_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_11_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_11_get_clockgating_state, .ih_control = nbio_v7_11_ih_control, .init_registers = nbio_v7_11_init_registers, .remap_hdp_registers = nbio_v7_11_remap_hdp_registers, diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index df7462cec6ab..7fe199560264 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -863,6 +863,7 @@ static int soc21_common_set_clockgating_state(void *handle, case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): case IP_VERSION(7, 7, 0): + case IP_VERSION(7, 11, 0): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h index 846a8cf3926a..ff30f04be591 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h @@ -775,6 +775,12 @@ #define regPCIE_USB4_ERR_CNTL5_BASE_IDX 5 #define regPCIE_USB4_LC_CNTL1 0x420179 #define regPCIE_USB4_LC_CNTL1_BASE_IDX 5 +#define regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL 0x420118 +#define regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL_BASE_IDX 5 +#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2 0x42001c +#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2_BASE_IDX 5 +#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1 0x420187 +#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1_BASE_IDX 5 // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h index 84242240f611..7f131999a263 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h @@ -24634,7 +24634,18 @@ //PCIE_USB4_LC_CNTL1 #define PCIE_USB4_LC_CNTL1__PCIE_USB_ROUTER_CLEAR_PATH_MODE__SHIFT 0x0 #define PCIE_USB4_LC_CNTL1__PCIE_USB_ROUTER_CLEAR_PATH_MODE_MASK 0x00000001L - +//BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL +#define BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK 0x00000001L +#define BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK 0x00000002L +#define BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK 0x00000020L +#define BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK 0x00000040L +#define BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK 0x00000080L +#define BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK 0x00000100L +//BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK 0x00010000L +//BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK 0x00000001L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK 0x00000008L // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp //BIF_CFG_DEV0_RC0_VENDOR_ID From 9d7a965e22e5c0abd1aa6aaa389a81de58ca5182 Mon Sep 17 00:00:00 2001 From: Li Ma Date: Wed, 18 Oct 2023 13:32:39 +0800 Subject: [PATCH 42/54] drm/amdgpu: add clockgating support for NBIO v7.7.1 add clockgating support for NBIO ip 7.7.1 Signed-off-by: Li Ma Reviewed-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 7fe199560264..8c6cab641a1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -863,6 +863,7 @@ static int soc21_common_set_clockgating_state(void *handle, case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): case IP_VERSION(7, 7, 0): + case IP_VERSION(7, 7, 1): case IP_VERSION(7, 11, 0): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); From 21226f02d77b6a1efcf987df8d97b2a4f40087bd Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 18 Oct 2023 17:57:25 +0800 Subject: [PATCH 43/54] drm/amdgpu: replace reset_error_count with amdgpu_ras_reset_error_count Simplify the code. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++------ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 9 ++------- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 7 ++----- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 7 ++----- 5 files changed, 10 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0f98f720d9ca..2d385476939b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3578,9 +3578,7 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) if (adev->asic_reset_res) goto fail; - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB); } else { task_barrier_full(&hive->tb); @@ -5201,9 +5199,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (!r && amdgpu_ras_intr_triggered()) { list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops && - tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev); + amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB); } amdgpu_ras_intr_cleared(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 19aa6b7b16fb..9d5d742ee9d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -908,7 +908,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_comm adev->gmc.xgmi.num_physical_nodes == 0) return 0; - adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL); return amdgpu_ras_block_late_init(adev, ras_block); } @@ -1075,7 +1075,7 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, break; } - adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL); err_data->ue_count += ue_cnt; err_data->ce_count += ce_cnt; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3a1050344b59..5fed01e34928 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1587,13 +1587,8 @@ static int gmc_v9_0_late_init(void *handle) } if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); - - if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops && - adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count) - adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__HDP); } r = amdgpu_gmc_ras_late_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index dff66e1ae7ea..683d51ae4bf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1749,11 +1749,8 @@ static int sdma_v4_0_late_init(void *handle) sdma_v4_0_setup_ulv(adev); - if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count) - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); - } + if (!amdgpu_persistent_edc_harvesting_supported(adev)) + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 31aa245552d6..c46bc6aa4f48 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1276,11 +1276,8 @@ static int sdma_v4_4_2_late_init(void *handle) .cb = sdma_v4_4_2_process_ras_data_cb, }; #endif - if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count) - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); - } + if (!amdgpu_persistent_edc_harvesting_supported(adev)) + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); return 0; } From 8096df766474b54758b268afe900ba9d7ab0cc37 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 12 Oct 2023 11:22:07 +0800 Subject: [PATCH 44/54] drm/amdgpu: add set/get mca debug mode operations Record the debug mode status in RAS. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 5 +++++ 2 files changed, 26 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index eb81efacefe2..3c83a2b8fb2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3311,6 +3311,27 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) return 0; } +void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (con) + con->is_mca_debug_mode = enable; +} + +bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!con) + return false; + + if (mca_funcs && mca_funcs->mca_set_debug_mode) + return con->is_mca_debug_mode; + else + return true; +} /* Register each ip ras block into amdgpu ras */ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 3f9ac0ab67e6..2fdfef62ee27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -434,6 +434,8 @@ struct amdgpu_ras { /* Indicates smu whether need update bad channel info */ bool update_channel_flag; + /* Record status of smu mca debug mode */ + bool is_mca_debug_mode; /* Record special requirements of gpu reset caller */ uint32_t gpu_reset_flags; @@ -768,6 +770,9 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con); +void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); +bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev); + int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object *ras_block_obj); void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev); From 626121fce415960522ed608a4e4949a347c9a8a3 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang Date: Wed, 20 Sep 2023 17:05:41 +0800 Subject: [PATCH 45/54] drm/amdgpu: update the xgmi ta interface header Update the header file to the v20.00.00.13 v1: rename TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO to TA_COMMAND_XGMI__GET_TOPOLOGY_INFO And also rename struct ta_xgmi_cmd_get_peer_link_info_output to ta_xgmi_cmd_get_peer_link_info accordingly v2: add structs to support xgmi GET_EXTEND_PEER_LINK command Signed-off-by: Shiwu Zhang Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 +-- drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h | 64 +++++++++++++++++++------ 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index b153acf9ed05..bd398e10fa0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1388,7 +1388,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, /* Fill in the shared memory with topology information as input */ topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_TOPOLOGY_INFO; topology_info_input->num_nodes = number_devices; for (i = 0; i < topology_info_input->num_nodes; i++) { @@ -1399,7 +1399,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, } /* Invoke xgmi ta to get the topology information */ - ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO); + ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_TOPOLOGY_INFO); if (ret) return ret; @@ -1424,7 +1424,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, /* Invoke xgmi ta again to get the link information */ if (psp_xgmi_peer_link_info_supported(psp)) { - struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output; + struct ta_xgmi_cmd_get_peer_link_info *link_info_output; bool requires_reflection = (psp->xgmi_context.supports_extended_data && get_extended_data) || diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h index da815a93d46e..d5748032674e 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h @@ -1,5 +1,5 @@ /* - * Copyright 2018 Advanced Micro Devices, Inc. + * Copyright 2018-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,7 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ - #ifndef _TA_XGMI_IF_H #define _TA_XGMI_IF_H @@ -28,20 +27,31 @@ #define RSP_ID_MASK (1U << 31) #define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) +#define EXTEND_PEER_LINK_INFO_CMD_FLAG 1 + enum ta_command_xgmi { + /* Initialize the Context and Session Topology */ TA_COMMAND_XGMI__INITIALIZE = 0x00, + /* Gets the current GPU's node ID */ TA_COMMAND_XGMI__GET_NODE_ID = 0x01, + /* Gets the current GPU's hive ID */ TA_COMMAND_XGMI__GET_HIVE_ID = 0x02, - TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03, + /* Gets the Peer's topology Information */ + TA_COMMAND_XGMI__GET_TOPOLOGY_INFO = 0x03, + /* Sets the Peer's topology Information */ TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04, - TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B + /* Gets the total links between adjacent peer dies in hive */ + TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B, + /* Gets the total links and connected port numbers between adjacent peer dies in hive */ + TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS = 0x0C }; /* XGMI related enumerations */ /**********************************************************/; -enum ta_xgmi_connected_nodes { - TA_XGMI__MAX_CONNECTED_NODES = 64 -}; +enum { TA_XGMI__MAX_CONNECTED_NODES = 64 }; +enum { TA_XGMI__MAX_INTERNAL_STATE = 32 }; +enum { TA_XGMI__MAX_INTERNAL_STATE_BUFFER = 128 }; +enum { TA_XGMI__MAX_PORT_NUM = 8 }; enum ta_xgmi_status { TA_XGMI_STATUS__SUCCESS = 0x00, @@ -81,6 +91,18 @@ struct ta_xgmi_peer_link_info { uint8_t num_links; }; +struct xgmi_connected_port_num { + uint8_t dst_xgmi_port_num; + uint8_t src_xgmi_port_num; +}; + +/* support both the port num and num_links */ +struct ta_xgmi_extend_peer_link_info { + uint64_t node_id; + uint8_t num_links; + struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM]; +}; + struct ta_xgmi_cmd_initialize_output { uint32_t status; }; @@ -103,16 +125,21 @@ struct ta_xgmi_cmd_get_topology_info_output { struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; -struct ta_xgmi_cmd_get_peer_link_info_output { - uint32_t num_nodes; - struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; -}; - struct ta_xgmi_cmd_set_topology_info_input { uint32_t num_nodes; struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; +/* support XGMI TA w/ and w/o port_num both so two similar structs defined */ +struct ta_xgmi_cmd_get_peer_link_info { + uint32_t num_nodes; + struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; + +struct ta_xgmi_cmd_get_extend_peer_link_info { + uint32_t num_nodes; + struct ta_xgmi_extend_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; /**********************************************************/ /* Common input structure for XGMI callbacks */ union ta_xgmi_cmd_input { @@ -126,16 +153,23 @@ union ta_xgmi_cmd_output { struct ta_xgmi_cmd_get_node_id_output get_node_id; struct ta_xgmi_cmd_get_hive_id_output get_hive_id; struct ta_xgmi_cmd_get_topology_info_output get_topology_info; - struct ta_xgmi_cmd_get_peer_link_info_output get_link_info; + struct ta_xgmi_cmd_get_peer_link_info get_link_info; + struct ta_xgmi_cmd_get_extend_peer_link_info get_extend_link_info; }; -/**********************************************************/ struct ta_xgmi_shared_memory { uint32_t cmd_id; uint32_t resp_id; enum ta_xgmi_status xgmi_status; + + /* if the number of xgmi link record is more than 128, driver will set the + * flag 0 to get the first 128 of the link records and will set to 1, to get + * the second set + */ uint8_t flag_extend_link_record; - uint8_t reserved0[3]; + /* bit0: port_num info support flag for GET_EXTEND_PEER_LINKS commmand */ + uint8_t caps_flag; + uint8_t reserved[2]; union ta_xgmi_cmd_input xgmi_in_message; union ta_xgmi_cmd_output xgmi_out_message; }; From d9443ac4f9ea97f9eaebf2569d3fd044da4c9c98 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 17 Oct 2023 19:06:24 +0800 Subject: [PATCH 46/54] drm/amdgpu: drop status query/reset for GCEA 9.4.3 and MMEA 1.8 PMFW will be responsible for them. v2: remove query interfaces. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 60 ---------- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 143 ------------------------ 2 files changed, 203 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index a1c2c952d882..362bf51ab1d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3754,10 +3754,6 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = { AMDGPU_GFX_LDS_MEM, 4}, }; -static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = { - SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 -}; - static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { @@ -3846,39 +3842,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); } -static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t i, j; - uint32_t reg_value; - - mutex_lock(&adev->grbm_idx_mutex); - - for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { - gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); - reg_value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), - regGCEA_ERR_STATUS); - if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "GCEA err detected at instance: %d, status: 0x%x!\n", - j, reg_value); - } - /* clear after read */ - reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS, - CLEAR_ERROR_STATUS, 0x1); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, - reg_value); - } - } - - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev, int xcc_id) { @@ -3983,7 +3946,6 @@ static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev, static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { - gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id); gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id); gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id); } @@ -3996,27 +3958,6 @@ static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev, WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3); } -static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t i, j; - uint32_t value; - - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { - gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); - value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS); - value = REG_SET_FIELD(value, GCEA_ERR_STATUS, - CLEAR_ERROR_STATUS, 0x1); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value); - } - } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev, int xcc_id) { @@ -4042,7 +3983,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id); - gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id); gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id); } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index aa00483e7b37..ea142611be1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -700,152 +700,9 @@ static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev) mmhub_v1_8_inst_reset_ras_error_count(adev, i); } -static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = { - regMMEA0_ERR_STATUS, - regMMEA1_ERR_STATUS, - regMMEA2_ERR_STATUS, - regMMEA3_ERR_STATUS, - regMMEA4_ERR_STATUS, -}; - -static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev, - uint32_t mmhub_inst) -{ - uint32_t reg_value; - uint32_t mmea_err_status_addr_dist; - uint32_t i; - - /* query mmea ras err status */ - mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS; - for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) { - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist); - if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "Detected MMEA%d err in MMHUB%d, status: 0x%x\n", - i, mmhub_inst, reg_value); - } - } - - /* query mm_cane ras err status */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS); - if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "Detected MM CANE err in MMHUB%d, status: 0x%x\n", - mmhub_inst, reg_value); - } -} - -static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev) -{ - uint32_t inst_mask; - uint32_t i; - - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { - dev_warn(adev->dev, "MMHUB RAS is not supported\n"); - return; - } - - inst_mask = adev->aid_mask; - for_each_inst(i, inst_mask) - mmhub_v1_8_inst_query_ras_err_status(adev, i); -} - -static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev, - uint32_t mmhub_inst) -{ - uint32_t mmea_cgtt_clk_cntl_addr_dist; - uint32_t mmea_err_status_addr_dist; - uint32_t reg_value; - uint32_t i; - - /* reset mmea ras err status */ - mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL; - mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS; - for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) { - /* force clk branch on for response path - * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1 - */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL, - SOFT_OVERRIDE_RETURN, 1); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist, - reg_value); - - /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS, - CLEAR_ERROR_STATUS, 1); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist, - reg_value); - - /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL, - SOFT_OVERRIDE_RETURN, 0); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist, - reg_value); - } - - /* reset mm_cane ras err status - * force clk branch on for response path - * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1 - */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, - SOFT_OVERRIDE_ATRET, 1); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value); - - /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS, - CLEAR_ERROR_STATUS, 1); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value); - - /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, - SOFT_OVERRIDE_ATRET, 0); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value); -} - -static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev) -{ - uint32_t inst_mask; - uint32_t i; - - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { - dev_warn(adev->dev, "MMHUB RAS is not supported\n"); - return; - } - - inst_mask = adev->aid_mask; - for_each_inst(i, inst_mask) - mmhub_v1_8_inst_reset_ras_err_status(adev, i); -} - static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { .query_ras_error_count = mmhub_v1_8_query_ras_error_count, .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, - .query_ras_error_status = mmhub_v1_8_query_ras_error_status, - .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status, }; struct amdgpu_mmhub_ras mmhub_v1_8_ras = { From e8a5ded36b4c68db4e0d4066ae2d420116715105 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang Date: Wed, 20 Sep 2023 18:36:55 +0800 Subject: [PATCH 47/54] drm/amdgpu: prepare the output buffer for GET_PEER_LINKS command Per the xgmi ta implementation, KGD needs to fill in node_ids in concern into the shared command output buffer rather than the command input buffer. Input buffer is not used for GET_PEER_LINKS command execution. In this way, xgmi ta can reuse the node info in the output buffer just filled in and populate the same buffer with link info directly. Signed-off-by: Shiwu Zhang Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index bd398e10fa0a..37891042ce9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1431,14 +1431,22 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6); - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; + link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; + /* popluate the shared output buffer rather than the cmd input buffer + * with node_ids as the input for GET_PEER_LINKS command execution. + * This is required for GET_PEER_LINKS only per xgmi ta implementation + */ + for (i = 0; i < topology->num_nodes; i++) { + link_info_output->nodes[i].node_id = topology->nodes[i].node_id; + } + link_info_output->num_nodes = topology->num_nodes; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_PEER_LINKS); if (ret) return ret; - link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; for (i = 0; i < topology->num_nodes; i++) { /* accumulate num_links on extended data */ topology->nodes[i].num_links = get_extended_data ? From 723fac64d05d7005929babbeb41dd09fb45f6f35 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang Date: Thu, 21 Sep 2023 20:05:12 +0800 Subject: [PATCH 48/54] drm/amdgpu: support the port num info based on the capability flag XGMI TA will set the capability flag to indicate whether the port_num info is supported or not. KGD checks the flag and accordingly picks up the right buffer format and send the right command to TA to retrieve the info. v2: simplify the code by reusing the same statement (lijo) Signed-off-by: Shiwu Zhang Acked-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 45 ++++++++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 + 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 37891042ce9f..648bd5e12830 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1267,6 +1267,8 @@ invoke: xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE; ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + /* note down the capbility flag for XGMI TA */ + psp->xgmi_context.xgmi_ta_caps = xgmi_cmd->caps_flag; return ret; } @@ -1425,35 +1427,52 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, /* Invoke xgmi ta again to get the link information */ if (psp_xgmi_peer_link_info_supported(psp)) { struct ta_xgmi_cmd_get_peer_link_info *link_info_output; + struct ta_xgmi_cmd_get_extend_peer_link_info *link_extend_info_output; bool requires_reflection = (psp->xgmi_context.supports_extended_data && get_extended_data) || amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6); + bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps & + EXTEND_PEER_LINK_INFO_CMD_FLAG; - link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; /* popluate the shared output buffer rather than the cmd input buffer * with node_ids as the input for GET_PEER_LINKS command execution. - * This is required for GET_PEER_LINKS only per xgmi ta implementation + * This is required for GET_PEER_LINKS per xgmi ta implementation. + * The same requirement for GET_EXTEND_PEER_LINKS command. */ - for (i = 0; i < topology->num_nodes; i++) { - link_info_output->nodes[i].node_id = topology->nodes[i].node_id; + if (ta_port_num_support) { + link_extend_info_output = &xgmi_cmd->xgmi_out_message.get_extend_link_info; + + for (i = 0; i < topology->num_nodes; i++) + link_extend_info_output->nodes[i].node_id = topology->nodes[i].node_id; + + link_extend_info_output->num_nodes = topology->num_nodes; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS; + } else { + link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; + + for (i = 0; i < topology->num_nodes; i++) + link_info_output->nodes[i].node_id = topology->nodes[i].node_id; + + link_info_output->num_nodes = topology->num_nodes; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; } - link_info_output->num_nodes = topology->num_nodes; - - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; - ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_PEER_LINKS); + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); if (ret) return ret; for (i = 0; i < topology->num_nodes; i++) { + uint8_t node_num_links = ta_port_num_support ? + link_extend_info_output->nodes[i].num_links : link_info_output->nodes[i].num_links; /* accumulate num_links on extended data */ - topology->nodes[i].num_links = get_extended_data ? - topology->nodes[i].num_links + - link_info_output->nodes[i].num_links : - ((requires_reflection && topology->nodes[i].num_links) ? topology->nodes[i].num_links : - link_info_output->nodes[i].num_links); + if (get_extended_data) { + topology->nodes[i].num_links = topology->nodes[i].num_links + node_num_links; + } else { + topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ? + topology->nodes[i].num_links : node_num_links; + } /* reflect the topology information for bi-directionality */ if (requires_reflection && topology->nodes[i].num_hops) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 3e67ed63e638..7111dd32e66f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -189,6 +189,7 @@ struct psp_xgmi_context { struct ta_context context; struct psp_xgmi_topology_info top_info; bool supports_extended_data; + uint8_t xgmi_ta_caps; }; struct psp_ras_context { From 2d6a2a28cdeade75021503f86e57e7ebce7eb74c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 15 Sep 2023 16:38:59 +0200 Subject: [PATCH 49/54] drm/amdgpu: Encapsulate all device reset info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To better organize struct amdgpu_device, keep all reset information related fields together in a separated struct. Signed-off-by: André Almeida Signed-off-by: Shashank Sharma Reviewed-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 34 +++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 10 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 ++++++----- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 17c4872a0287..ca94d47cb504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -773,6 +773,26 @@ struct amdgpu_mqd { struct amdgpu_reset_domain; struct amdgpu_fru_info; +#ifdef CONFIG_DEV_COREDUMP +struct amdgpu_coredump_info { + struct amdgpu_device *adev; + struct amdgpu_task_info reset_task_info; + struct timespec64 reset_time; + bool reset_vram_lost; +}; +#endif + +struct amdgpu_reset_info { + /* reset dump register */ + u32 *reset_dump_reg_list; + u32 *reset_dump_reg_value; + int num_regs; + +#ifdef CONFIG_DEV_COREDUMP + struct amdgpu_coredump_info *coredump_info; +#endif +}; + /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. */ @@ -1081,10 +1101,7 @@ struct amdgpu_device { struct mutex benchmark_mutex; - /* reset dump register */ - uint32_t *reset_dump_reg_list; - uint32_t *reset_dump_reg_value; - int num_regs; + struct amdgpu_reset_info reset_info; bool scpm_enabled; uint32_t scpm_status; @@ -1111,15 +1128,6 @@ static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, return adev->ip_versions[ip][inst] & ~0xFFU; } -#ifdef CONFIG_DEV_COREDUMP -struct amdgpu_coredump_info { - struct amdgpu_device *adev; - struct amdgpu_task_info reset_task_info; - struct timespec64 reset_time; - bool reset_vram_lost; -}; -#endif - static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) { return container_of(ddev, struct amdgpu_device, ddev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a4faea4fa0b5..3136a0774dd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2016,8 +2016,8 @@ static ssize_t amdgpu_reset_dump_register_list_read(struct file *f, if (ret) return ret; - for (i = 0; i < adev->num_regs; i++) { - sprintf(reg_offset, "0x%x\n", adev->reset_dump_reg_list[i]); + for (i = 0; i < adev->reset_info.num_regs; i++) { + sprintf(reg_offset, "0x%x\n", adev->reset_info.reset_dump_reg_list[i]); up_read(&adev->reset_domain->sem); if (copy_to_user(buf + len, reg_offset, strlen(reg_offset))) return -EFAULT; @@ -2074,9 +2074,9 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, if (ret) goto error_free; - swap(adev->reset_dump_reg_list, tmp); - swap(adev->reset_dump_reg_value, new); - adev->num_regs = i; + swap(adev->reset_info.reset_dump_reg_list, tmp); + swap(adev->reset_info.reset_dump_reg_value, new); + adev->reset_info.num_regs = i; up_write(&adev->reset_domain->sem); ret = size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2d385476939b..c4931d519599 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5048,10 +5048,12 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) lockdep_assert_held(&adev->reset_domain->sem); - for (i = 0; i < adev->num_regs; i++) { - adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]); - trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], - adev->reset_dump_reg_value[i]); + for (i = 0; i < adev->reset_info.num_regs; i++) { + adev->reset_info.reset_dump_reg_value[i] = + RREG32(adev->reset_info.reset_dump_reg_list[i]); + + trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], + adev->reset_info.reset_dump_reg_value[i]); } return 0; @@ -5089,13 +5091,13 @@ static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->num_regs) { + if (coredump->adev->reset_info.num_regs) { drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - for (i = 0; i < coredump->adev->num_regs; i++) + for (i = 0; i < coredump->adev->reset_info.num_regs; i++) drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_dump_reg_list[i], - coredump->adev->reset_dump_reg_value[i]); + coredump->adev->reset_info.reset_dump_reg_list[i], + coredump->adev->reset_info.reset_dump_reg_value[i]); } return count - iter.remain; From 69619868d39bf364721db8d9d2429420704417a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 15 Sep 2023 16:44:16 +0200 Subject: [PATCH 50/54] drm/amdgpu: Move coredump code to amdgpu_reset file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Giving that we use codedump just for device resets, move it's functions and structs to a more semantic file, the amdgpu_reset.{c, h}. Signed-off-by: André Almeida Signed-off-by: Shashank Sharma Reviewed-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 78 --------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 79 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 11 +++ 4 files changed, 90 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ca94d47cb504..8df702eaa2ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -773,15 +773,6 @@ struct amdgpu_mqd { struct amdgpu_reset_domain; struct amdgpu_fru_info; -#ifdef CONFIG_DEV_COREDUMP -struct amdgpu_coredump_info { - struct amdgpu_device *adev; - struct amdgpu_task_info reset_task_info; - struct timespec64 reset_time; - bool reset_vram_lost; -}; -#endif - struct amdgpu_reset_info { /* reset dump register */ u32 *reset_dump_reg_list; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c4931d519599..9340e8dc0413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -32,8 +32,6 @@ #include #include #include -#include -#include #include #include @@ -5059,82 +5057,6 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) return 0; } -#ifndef CONFIG_DEV_COREDUMP -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ -} -#else -static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, - size_t count, void *data, size_t datalen) -{ - struct drm_printer p; - struct amdgpu_coredump_info *coredump = data; - struct drm_print_iterator iter; - int i; - - iter.data = buffer; - iter.offset = 0; - iter.start = offset; - iter.remain = count; - - p = drm_coredump_printer(&iter); - - drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); - drm_printf(&p, "module: " KBUILD_MODNAME "\n"); - drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec); - if (coredump->reset_task_info.pid) - drm_printf(&p, "process_name: %s PID: %d\n", - coredump->reset_task_info.process_name, - coredump->reset_task_info.pid); - - if (coredump->reset_vram_lost) - drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->reset_info.num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->reset_info.num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_info.reset_dump_reg_list[i], - coredump->adev->reset_info.reset_dump_reg_value[i]); - } - - return count - iter.remain; -} - -static void amdgpu_devcoredump_free(void *data) -{ - kfree(data); -} - -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ - struct amdgpu_coredump_info *coredump; - struct drm_device *dev = adev_to_drm(adev); - - coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); - - if (!coredump) { - DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); - return; - } - - coredump->reset_vram_lost = vram_lost; - - if (reset_context->job && reset_context->job->vm) - coredump->reset_task_info = reset_context->job->vm->task_info; - - coredump->adev = adev; - - ktime_get_ts64(&coredump->reset_time); - - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); -} -#endif - int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 970bfece775c..f53ddca35be6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -21,6 +21,9 @@ * */ +#include +#include + #include "amdgpu_reset.h" #include "aldebaran.h" #include "sienna_cichlid.h" @@ -159,5 +162,81 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) up_write(&reset_domain->sem); } +#ifndef CONFIG_DEV_COREDUMP +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ +} +#else +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen) +{ + struct drm_printer p; + struct amdgpu_coredump_info *coredump = data; + struct drm_print_iterator iter; + int i; + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, + coredump->reset_time.tv_nsec); + + if (coredump->reset_task_info.pid) + drm_printf(&p, "process_name: %s PID: %d\n", + coredump->reset_task_info.process_name, + coredump->reset_task_info.pid); + + if (coredump->reset_vram_lost) + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (coredump->adev->reset_info.num_regs) { + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); + + for (i = 0; i < coredump->adev->reset_info.num_regs; i++) + drm_printf(&p, "0x%08x: 0x%08x\n", + coredump->adev->reset_info.reset_dump_reg_list[i], + coredump->adev->reset_info.reset_dump_reg_value[i]); + } + + return count - iter.remain; +} + +static void amdgpu_devcoredump_free(void *data) +{ + kfree(data); +} + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_coredump_info *coredump; + struct drm_device *dev = adev_to_drm(adev); + + coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); + + if (!coredump) { + DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); + return; + } + + coredump->reset_vram_lost = vram_lost; + + if (reset_context->job && reset_context->job->vm) + coredump->reset_task_info = reset_context->job->vm->task_info; + + coredump->adev = adev; + + ktime_get_ts64(&coredump->reset_time); + + dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 471d789b33a5..f4cfa2c7c9d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -89,6 +89,14 @@ struct amdgpu_reset_domain { atomic_t reset_res; }; +#ifdef CONFIG_DEV_COREDUMP +struct amdgpu_coredump_info { + struct amdgpu_device *adev; + struct amdgpu_task_info reset_task_info; + struct timespec64 reset_time; + bool reset_vram_lost; +}; +#endif int amdgpu_reset_init(struct amdgpu_device *adev); int amdgpu_reset_fini(struct amdgpu_device *adev); @@ -130,6 +138,9 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context); + #define for_each_handler(i, handler, reset_ctl) \ for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \ (handler = (*reset_ctl->reset_handlers)[i]); \ From de009982c6aa8363b2bc8800fb0a13896d264853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 15 Sep 2023 18:44:53 +0200 Subject: [PATCH 51/54] drm/amdgpu: Create version number for coredumps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if there's nothing currently parsing amdgpu's coredump files, if we eventually have such tools they will be glad to find a version field to properly read the file. Create a version number to be displayed on top of coredump file, to be incremented when the file format or content get changed. Signed-off-by: André Almeida Reviewed-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index f53ddca35be6..4baa300121d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -185,6 +185,7 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, p = drm_coredump_printer(&iter); drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); drm_printf(&p, "kernel: " UTS_RELEASE "\n"); drm_printf(&p, "module: " KBUILD_MODNAME "\n"); drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index f4cfa2c7c9d6..b0335a1c5e90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -90,6 +90,9 @@ struct amdgpu_reset_domain { }; #ifdef CONFIG_DEV_COREDUMP + +#define AMDGPU_COREDUMP_VERSION "1" + struct amdgpu_coredump_info { struct amdgpu_device *adev; struct amdgpu_task_info reset_task_info; From 7ef6b2d4b7e5c0d105c688a4da1f0ac122e3aa44 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 17 Oct 2023 17:08:56 -0500 Subject: [PATCH 52/54] drm/amdkfd: remap unaligned svm ranges that have split Split SVM ranges that have been mapped into 2MB page table entries, require to be remap in case the split has happened in a non-aligned VA. [WHY]: This condition causes the 2MB page table entries be split into 4KB PTEs. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 41 +++++++++++++++++++++------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index ad4e1387e0b0..bda88dc6e2fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1106,26 +1106,32 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, } static int -svm_range_split_tail(struct svm_range *prange, - uint64_t new_last, struct list_head *insert_list) +svm_range_split_tail(struct svm_range *prange, uint64_t new_last, + struct list_head *insert_list, struct list_head *remap_list) { struct svm_range *tail; int r = svm_range_split(prange, prange->start, new_last, &tail); - if (!r) + if (!r) { list_add(&tail->list, insert_list); + if (!IS_ALIGNED(new_last + 1, 1UL << prange->granularity)) + list_add(&tail->update_list, remap_list); + } return r; } static int -svm_range_split_head(struct svm_range *prange, - uint64_t new_start, struct list_head *insert_list) +svm_range_split_head(struct svm_range *prange, uint64_t new_start, + struct list_head *insert_list, struct list_head *remap_list) { struct svm_range *head; int r = svm_range_split(prange, new_start, prange->last, &head); - if (!r) + if (!r) { list_add(&head->list, insert_list); + if (!IS_ALIGNED(new_start, 1UL << prange->granularity)) + list_add(&head->update_list, remap_list); + } return r; } @@ -2052,7 +2058,7 @@ static int svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, struct list_head *update_list, struct list_head *insert_list, - struct list_head *remove_list) + struct list_head *remove_list, struct list_head *remap_list) { unsigned long last = start + size - 1UL; struct svm_range_list *svms = &p->svms; @@ -2068,6 +2074,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, INIT_LIST_HEAD(insert_list); INIT_LIST_HEAD(remove_list); INIT_LIST_HEAD(&new_list); + INIT_LIST_HEAD(remap_list); node = interval_tree_iter_first(&svms->objects, start, last); while (node) { @@ -2104,14 +2111,14 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, if (node->start < start) { pr_debug("change old range start\n"); r = svm_range_split_head(prange, start, - insert_list); + insert_list, remap_list); if (r) goto out; } if (node->last > last) { pr_debug("change old range last\n"); r = svm_range_split_tail(prange, last, - insert_list); + insert_list, remap_list); if (r) goto out; } @@ -3501,6 +3508,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, struct list_head update_list; struct list_head insert_list; struct list_head remove_list; + struct list_head remap_list; struct svm_range_list *svms; struct svm_range *prange; struct svm_range *next; @@ -3532,7 +3540,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, /* Add new range and split existing ranges as needed */ r = svm_range_add(p, start, size, nattr, attrs, &update_list, - &insert_list, &remove_list); + &insert_list, &remove_list, &remap_list); if (r) { mutex_unlock(&svms->lock); mmap_write_unlock(mm); @@ -3597,6 +3605,19 @@ out_unlock_range: ret = r; } + list_for_each_entry(prange, &remap_list, update_list) { + pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n", + prange, prange->start, prange->last); + mutex_lock(&prange->migrate_mutex); + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + true, true, prange->mapped_to_gpu); + if (r) + pr_debug("failed %d on remap svm range\n", r); + mutex_unlock(&prange->migrate_mutex); + if (r) + ret = r; + } + dynamic_svm_range_dump(svms); mutex_unlock(&svms->lock); From b141fa036c901303ca5659cc22e9c08f8b097892 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 16 Oct 2023 08:19:17 -0600 Subject: [PATCH 53/54] drm/amd/display: Reduce stack size by splitting function When compiling with allmodconfig, gcc highlights the following error: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c: In function 'dml_core_mode_support': drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/display_mode_core.c:8229:1: error: the frame size of 2736 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] 8229 | } // dml_core_mode_support | ^ cc1: all warnings being treated as errors This commit mitigates part of this problem by extracting the prefetch code to its own function. After applying this commit, the stack size reduces from 2736 to 2464, however, the stack size issue becomes part of the new function. Cc: Stephen Rothwell Cc: Alex Deucher Cc: Roman Li Cc: Chaitanya Dhere Fixes: 7966f319c66d ("drm/amd/display: Introduce DML2") Tested-by: Stephen Rothwell Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/display_mode_core.c | 970 +++++++++--------- 1 file changed, 489 insertions(+), 481 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index cff53d28d3c5..19465c065f3d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6208,16 +6208,500 @@ static dml_uint_t CalculateMaxVStartup( return max_vstartup_lines; } +static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) +{ + struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct DmlPipe *myPipe; + dml_uint_t j, k; + + for (j = 0; j < 2; ++j) { + mode_lib->ms.TimeCalc = 24 / mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; + mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k]; + mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k]; + mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k]; + mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k]; + mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k]; + mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k]; + mode_lib->ms.UnboundedRequestEnabledThisState = mode_lib->ms.UnboundedRequestEnabledAllStates[j]; + mode_lib->ms.CompressedBufferSizeInkByteThisState = mode_lib->ms.CompressedBufferSizeInkByteAllStates[j]; + mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k]; + mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k]; + mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k]; + } + + mode_lib->ms.support.VActiveBandwithSupport[j] = CalculateVActiveBandwithSupport( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.NotUrgentLatencyHiding, + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.meta_row_bandwidth_this_state, + mode_lib->ms.dpte_row_bandwidth_this_state, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.UrgentBurstFactorLuma, + mode_lib->ms.UrgentBurstFactorChroma, + mode_lib->ms.UrgentBurstFactorCursor); + + s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only( + &mode_lib->ms.soc, + mode_lib->ms.state.use_ideal_dram_bw_strobe, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.DCFCLKState[j], + mode_lib->ms.state.fabricclk_mhz, + mode_lib->ms.state.dram_speed_mts); + + s->HostVMInefficiencyFactor = 1; + if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable) + s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBWPerState[j] / s->VMDataOnlyReturnBWPerState; + + mode_lib->ms.ExtraLatency = CalculateExtraLatency( + mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles, + s->ReorderingBytes, + mode_lib->ms.DCFCLKState[j], + mode_lib->ms.TotalNumberOfActiveDPP[j], + mode_lib->ms.ip.pixel_chunk_size_kbytes, + mode_lib->ms.TotalNumberOfDCCActiveDPP[j], + mode_lib->ms.ip.meta_chunk_size_kbytes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.dpte_group_bytes, + s->HostVMInefficiencyFactor, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); + + s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; + s->MaxVStartup = 0; + s->AllPrefetchModeTested = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]); + s->NextPrefetchMode[k] = s->MinPrefetchMode[k]; + } + + do { + s->MaxVStartup = s->NextMaxVStartup; + s->AllPrefetchModeTested = true; + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.PrefetchMode[k] = s->NextPrefetchMode[k]; + mode_lib->ms.TWait = CalculateTWait( + mode_lib->ms.PrefetchMode[k], + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k], + mode_lib->ms.state.dram_clock_change_latency_us, + mode_lib->ms.state.fclk_change_latency_us, + mode_lib->ms.UrgLatency, + mode_lib->ms.state.sr_enter_plus_exit_time_us); + + myPipe = &s->myPipe; + myPipe->Dppclk = mode_lib->ms.RequiredDPPCLKPerSurface[j][k]; + myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK[j]; + myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; + myPipe->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; + myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[j][k]; + myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k]; + myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; + myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; + myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k]; + myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]; + myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; + myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k]; + myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; + myPipe->ODMMode = mode_lib->ms.ODMModePerState[k]; + myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; + myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Calling CalculatePrefetchSchedule for j=%u, k=%u\n", __func__, j, k); + dml_print("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[j][k]); + dml_print("DML::%s: MaxVStartup = %u\n", __func__, s->MaxVStartup); + dml_print("DML::%s: NextPrefetchMode = %u\n", __func__, s->NextPrefetchMode[k]); + dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]); + dml_print("DML::%s: PrefetchMode = %u\n", __func__, mode_lib->ms.PrefetchMode[k]); +#endif + + CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]); + CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k]; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->MaxVStartup, s->MaximumVStartup[j][k])); + CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[j][k]; + CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k]; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency; + CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; + CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k]; + CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait; + CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; + CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; + CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; + + mode_lib->ms.support.NoTimeForPrefetch[j][k] = + CalculatePrefetchSchedule(&mode_lib->scratch, + CalculatePrefetchSchedule_params); + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateUrgentBurstFactor( + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + mode_lib->ms.swath_width_luma_ub_this_state[k], + mode_lib->ms.swath_width_chroma_ub_this_state[k], + mode_lib->ms.SwathHeightYThisState[k], + mode_lib->ms.SwathHeightCThisState[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.UrgLatency, + mode_lib->ms.ip.cursor_buffer_size, + mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], + mode_lib->ms.VRatioPreY[j][k], + mode_lib->ms.VRatioPreC[j][k], + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeYThisState[k], + mode_lib->ms.DETBufferSizeCThisState[k], + /* Output */ + &mode_lib->ms.UrgentBurstFactorCursorPre[k], + &mode_lib->ms.UrgentBurstFactorLumaPre[k], + &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.NotUrgentLatencyHidingPre[k]); + + mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / + mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.VRatioPreY[j][k]; + } + + { + CalculatePrefetchBandwithSupport( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + mode_lib->ms.NotUrgentLatencyHidingPre, + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.meta_row_bandwidth_this_state, + mode_lib->ms.dpte_row_bandwidth_this_state, + mode_lib->ms.cursor_bw_pre, + mode_lib->ms.prefetch_vmrow_bw, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.UrgentBurstFactorLuma, + mode_lib->ms.UrgentBurstFactorChroma, + mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre, + + /* output */ + &s->dummy_single[0], // dml_float_t *PrefetchBandwidth + &s->dummy_single[1], // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch + &mode_lib->mp.FractionOfUrgentBandwidth, // dml_float_t *FractionOfUrgentBandwidth + &mode_lib->ms.support.PrefetchSupported[j]); + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.LineTimesForPrefetch[k] < 2.0 + || mode_lib->ms.LinesForMetaPTE[k] >= 32.0 + || mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16.0 + || mode_lib->ms.support.NoTimeForPrefetch[j][k] == true) { + mode_lib->ms.support.PrefetchSupported[j] = false; + } + } + + mode_lib->ms.support.DynamicMetadataSupported[j] = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.support.NoTimeForDynamicMetadata[j][k] == true) { + mode_lib->ms.support.DynamicMetadataSupported[j] = false; + } + } + + mode_lib->ms.support.VRatioInPrefetchSupported[j] = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.support.NoTimeForPrefetch[j][k] == true || + mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + ((s->MaxVStartup < s->MaximumVStartup[j][k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) && + (mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE__))) { + mode_lib->ms.support.VRatioInPrefetchSupported[j] = false; + } + } + + s->AnyLinesForVMOrRowTooLarge = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16 || mode_lib->ms.LinesForMetaPTE[k] >= 32) { + s->AnyLinesForVMOrRowTooLarge = true; + } + } + + if (mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true) { + mode_lib->ms.BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.cursor_bw_pre, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.UrgentBurstFactorLuma, + mode_lib->ms.UrgentBurstFactorChroma, + mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre); + + mode_lib->ms.TotImmediateFlipBytes = 0; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) { + mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]; + if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) { + mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]); + } else { + mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.DPTEBytesPerRow[j][k]; + } + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateFlipSchedule( + s->HostVMInefficiencyFactor, + mode_lib->ms.ExtraLatency, + mode_lib->ms.UrgLatency, + mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], + mode_lib->ms.MetaRowBytes[j][k], + mode_lib->ms.DPTEBytesPerRow[j][k], + mode_lib->ms.BandwidthAvailableForImmediateFlip, + mode_lib->ms.TotImmediateFlipBytes, + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + mode_lib->ms.Tno_bw[k], + mode_lib->ms.cache_display_cfg.surface.DCCEnable[k], + mode_lib->ms.dpte_row_height[k], + mode_lib->ms.meta_row_height[k], + mode_lib->ms.dpte_row_height_chroma[k], + mode_lib->ms.meta_row_height_chroma[k], + mode_lib->ms.use_one_row_for_frame_flip[j][k], // 24 + + /* Output */ + &mode_lib->ms.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->ms.DestinationLinesToRequestRowInImmediateFlip[k], + &mode_lib->ms.final_flip_bw[k], + &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); + } + + { + CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + mode_lib->ms.policy.ImmediateFlipRequirement, + mode_lib->ms.final_flip_bw, + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.meta_row_bandwidth_this_state, + mode_lib->ms.dpte_row_bandwidth_this_state, + mode_lib->ms.cursor_bw_pre, + mode_lib->ms.prefetch_vmrow_bw, + mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here + mode_lib->ms.UrgentBurstFactorLuma, + mode_lib->ms.UrgentBurstFactorChroma, + mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre, + + /* output */ + &s->dummy_single[0], // dml_float_t *TotalBandwidth + &s->dummy_single[1], // dml_float_t *TotalBandwidthNotIncludingMALLPrefetch + &s->dummy_single[2], // dml_float_t *FractionOfUrgentBandwidth + &mode_lib->ms.support.ImmediateFlipSupportedForState[j]); // dml_bool_t *ImmediateFlipBandwidthSupport + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required) && (mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)) + mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false; + } + + } else { // if prefetch not support, assume iflip not supported + mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false; + } + + if (s->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || s->AnyLinesForVMOrRowTooLarge == false) { + s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1; + + if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k]) + s->AllPrefetchModeTested = false; + } + } else { + s->NextMaxVStartup = s->NextMaxVStartup - 1; + } + } while (!((mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.DynamicMetadataSupported[j] == true && + mode_lib->ms.support.VRatioInPrefetchSupported[j] == true && + // consider flip support is okay if when there is no hostvm and the user does't require a iflip OR the flip bw is ok + // If there is hostvm, DCN needs to support iflip for invalidation + ((s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j] == true)) || + (s->NextMaxVStartup == s->MaxVStartupAllPlanes[j] && s->AllPrefetchModeTested))); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.use_one_row_for_frame_this_state[k] = mode_lib->ms.use_one_row_for_frame[j][k]; + } + + s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; + s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; + s->mSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us; + s->mSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us; + s->mSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us; + s->mSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us; + s->mSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us; + s->mSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us; + s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us; + s->mSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us; + s->mSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us; + + CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal; + CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateWatermarks_params->PrefetchMode = mode_lib->ms.PrefetchMode; + CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLKState[j]; + CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBWPerState[j]; + CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal; + CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateWatermarks_params->meta_row_height = mode_lib->ms.meta_row_height; + CalculateWatermarks_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma; + CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = mode_lib->ms.state.socclk_mhz; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState; + CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState; + CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState; + CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel; + CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState; + CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState; + CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; + CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; + CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps; + CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma; + CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio; + CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma; + CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; + CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; + CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; + CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; + CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; + CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable; + CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat; + CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth; + CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight; + CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledThisState; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteThisState; + + // Output + CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j]; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j]; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j]; + CalculateWatermarks_params->ActiveDRAMClockChangeLatencyMargin = mode_lib->ms.support.ActiveDRAMClockChangeLatencyMargin; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, + CalculateWatermarks_params); + + } // for j +} + /// @brief The Mode Support function. dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) { struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; - struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; - struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; struct UseMinimumDCFCLK_params_st *UseMinimumDCFCLK_params = &mode_lib->scratch.UseMinimumDCFCLK_params; struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; - struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; - struct DmlPipe *myPipe; + struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; dml_uint_t j, k, m; @@ -7546,484 +8030,8 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) } /* Prefetch Check */ + dml_prefetch_check(mode_lib); - for (j = 0; j < 2; ++j) { - mode_lib->ms.TimeCalc = 24 / mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; - mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k]; - mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k]; - mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k]; - mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k]; - mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k]; - mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k]; - mode_lib->ms.UnboundedRequestEnabledThisState = mode_lib->ms.UnboundedRequestEnabledAllStates[j]; - mode_lib->ms.CompressedBufferSizeInkByteThisState = mode_lib->ms.CompressedBufferSizeInkByteAllStates[j]; - mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k]; - mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k]; - mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k]; - } - - mode_lib->ms.support.VActiveBandwithSupport[j] = CalculateVActiveBandwithSupport( - mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBWPerState[j], - mode_lib->ms.NotUrgentLatencyHiding, - mode_lib->ms.ReadBandwidthLuma, - mode_lib->ms.ReadBandwidthChroma, - mode_lib->ms.cursor_bw, - mode_lib->ms.meta_row_bandwidth_this_state, - mode_lib->ms.dpte_row_bandwidth_this_state, - mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor); - - s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only( - &mode_lib->ms.soc, - mode_lib->ms.state.use_ideal_dram_bw_strobe, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, - mode_lib->ms.DCFCLKState[j], - mode_lib->ms.state.fabricclk_mhz, - mode_lib->ms.state.dram_speed_mts); - - s->HostVMInefficiencyFactor = 1; - if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable) - s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBWPerState[j] / s->VMDataOnlyReturnBWPerState; - - mode_lib->ms.ExtraLatency = CalculateExtraLatency( - mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles, - s->ReorderingBytes, - mode_lib->ms.DCFCLKState[j], - mode_lib->ms.TotalNumberOfActiveDPP[j], - mode_lib->ms.ip.pixel_chunk_size_kbytes, - mode_lib->ms.TotalNumberOfDCCActiveDPP[j], - mode_lib->ms.ip.meta_chunk_size_kbytes, - mode_lib->ms.ReturnBWPerState[j], - mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, - mode_lib->ms.num_active_planes, - mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.dpte_group_bytes, - s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, - mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); - - s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; - s->MaxVStartup = 0; - s->AllPrefetchModeTested = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]); - s->NextPrefetchMode[k] = s->MinPrefetchMode[k]; - } - - do { - s->MaxVStartup = s->NextMaxVStartup; - s->AllPrefetchModeTested = true; - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - mode_lib->ms.PrefetchMode[k] = s->NextPrefetchMode[k]; - mode_lib->ms.TWait = CalculateTWait( - mode_lib->ms.PrefetchMode[k], - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], - mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k], - mode_lib->ms.state.dram_clock_change_latency_us, - mode_lib->ms.state.fclk_change_latency_us, - mode_lib->ms.UrgLatency, - mode_lib->ms.state.sr_enter_plus_exit_time_us); - - myPipe = &s->myPipe; - myPipe->Dppclk = mode_lib->ms.RequiredDPPCLKPerSurface[j][k]; - myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK[j]; - myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; - myPipe->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; - myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[j][k]; - myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k]; - myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; - myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; - myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; - myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; - myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; - myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; - myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k]; - myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]; - myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; - myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k]; - myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; - myPipe->ODMMode = mode_lib->ms.ODMModePerState[k]; - myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; - myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; - myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; - myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Calling CalculatePrefetchSchedule for j=%u, k=%u\n", __func__, j, k); - dml_print("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[j][k]); - dml_print("DML::%s: MaxVStartup = %u\n", __func__, s->MaxVStartup); - dml_print("DML::%s: NextPrefetchMode = %u\n", __func__, s->NextPrefetchMode[k]); - dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]); - dml_print("DML::%s: PrefetchMode = %u\n", __func__, mode_lib->ms.PrefetchMode[k]); -#endif - - CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal; - CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor; - CalculatePrefetchSchedule_params->myPipe = myPipe; - CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k]; - CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter; - CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl; - CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only; - CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor; - CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal; - CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]); - CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k]; - CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; - CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->MaxVStartup, s->MaximumVStartup[j][k])); - CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[j][k]; - CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; - CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; - CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; - CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; - CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; - CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; - CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k]; - CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; - CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency; - CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; - CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k]; - CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k]; - CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k]; - CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; - CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k]; - CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; - CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k]; - CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k]; - CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k]; - CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k]; - CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k]; - CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait; - CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; - CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; - CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k]; - CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k]; - CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k]; - CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; - CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k]; - CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; - CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; - CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; - CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; - CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; - CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; - CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; - CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; - - mode_lib->ms.support.NoTimeForPrefetch[j][k] = - CalculatePrefetchSchedule(&mode_lib->scratch, - CalculatePrefetchSchedule_params); - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculateUrgentBurstFactor( - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], - mode_lib->ms.swath_width_luma_ub_this_state[k], - mode_lib->ms.swath_width_chroma_ub_this_state[k], - mode_lib->ms.SwathHeightYThisState[k], - mode_lib->ms.SwathHeightCThisState[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.UrgLatency, - mode_lib->ms.ip.cursor_buffer_size, - mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], - mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], - mode_lib->ms.VRatioPreY[j][k], - mode_lib->ms.VRatioPreC[j][k], - mode_lib->ms.BytePerPixelInDETY[k], - mode_lib->ms.BytePerPixelInDETC[k], - mode_lib->ms.DETBufferSizeYThisState[k], - mode_lib->ms.DETBufferSizeCThisState[k], - /* Output */ - &mode_lib->ms.UrgentBurstFactorCursorPre[k], - &mode_lib->ms.UrgentBurstFactorLumaPre[k], - &mode_lib->ms.UrgentBurstFactorChroma[k], - &mode_lib->ms.NotUrgentLatencyHidingPre[k]); - - mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * - mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / - mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.VRatioPreY[j][k]; - } - - { - CalculatePrefetchBandwithSupport( - mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBWPerState[j], - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, - mode_lib->ms.NotUrgentLatencyHidingPre, - mode_lib->ms.ReadBandwidthLuma, - mode_lib->ms.ReadBandwidthChroma, - mode_lib->ms.RequiredPrefetchPixelDataBWLuma, - mode_lib->ms.RequiredPrefetchPixelDataBWChroma, - mode_lib->ms.cursor_bw, - mode_lib->ms.meta_row_bandwidth_this_state, - mode_lib->ms.dpte_row_bandwidth_this_state, - mode_lib->ms.cursor_bw_pre, - mode_lib->ms.prefetch_vmrow_bw, - mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, - mode_lib->ms.UrgentBurstFactorLumaPre, - mode_lib->ms.UrgentBurstFactorChromaPre, - mode_lib->ms.UrgentBurstFactorCursorPre, - - /* output */ - &s->dummy_single[0], // dml_float_t *PrefetchBandwidth - &s->dummy_single[1], // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch - &mode_lib->mp.FractionOfUrgentBandwidth, // dml_float_t *FractionOfUrgentBandwidth - &mode_lib->ms.support.PrefetchSupported[j]); - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.LineTimesForPrefetch[k] < 2.0 - || mode_lib->ms.LinesForMetaPTE[k] >= 32.0 - || mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16.0 - || mode_lib->ms.support.NoTimeForPrefetch[j][k] == true) { - mode_lib->ms.support.PrefetchSupported[j] = false; - } - } - - mode_lib->ms.support.DynamicMetadataSupported[j] = true; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.support.NoTimeForDynamicMetadata[j][k] == true) { - mode_lib->ms.support.DynamicMetadataSupported[j] = false; - } - } - - mode_lib->ms.support.VRatioInPrefetchSupported[j] = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.support.NoTimeForPrefetch[j][k] == true || - mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - ((s->MaxVStartup < s->MaximumVStartup[j][k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) && - (mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE__))) { - mode_lib->ms.support.VRatioInPrefetchSupported[j] = false; - } - } - - s->AnyLinesForVMOrRowTooLarge = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16 || mode_lib->ms.LinesForMetaPTE[k] >= 32) { - s->AnyLinesForVMOrRowTooLarge = true; - } - } - - if (mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true) { - mode_lib->ms.BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( - mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBWPerState[j], - mode_lib->ms.ReadBandwidthLuma, - mode_lib->ms.ReadBandwidthChroma, - mode_lib->ms.RequiredPrefetchPixelDataBWLuma, - mode_lib->ms.RequiredPrefetchPixelDataBWChroma, - mode_lib->ms.cursor_bw, - mode_lib->ms.cursor_bw_pre, - mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, - mode_lib->ms.UrgentBurstFactorLumaPre, - mode_lib->ms.UrgentBurstFactorChromaPre, - mode_lib->ms.UrgentBurstFactorCursorPre); - - mode_lib->ms.TotImmediateFlipBytes = 0; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) { - mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]; - if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) { - mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]); - } else { - mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.DPTEBytesPerRow[j][k]; - } - } - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculateFlipSchedule( - s->HostVMInefficiencyFactor, - mode_lib->ms.ExtraLatency, - mode_lib->ms.UrgLatency, - mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, - mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, - mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, - mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], - mode_lib->ms.MetaRowBytes[j][k], - mode_lib->ms.DPTEBytesPerRow[j][k], - mode_lib->ms.BandwidthAvailableForImmediateFlip, - mode_lib->ms.TotImmediateFlipBytes, - mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], - (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), - mode_lib->ms.cache_display_cfg.plane.VRatio[k], - mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], - mode_lib->ms.Tno_bw[k], - mode_lib->ms.cache_display_cfg.surface.DCCEnable[k], - mode_lib->ms.dpte_row_height[k], - mode_lib->ms.meta_row_height[k], - mode_lib->ms.dpte_row_height_chroma[k], - mode_lib->ms.meta_row_height_chroma[k], - mode_lib->ms.use_one_row_for_frame_flip[j][k], // 24 - - /* Output */ - &mode_lib->ms.DestinationLinesToRequestVMInImmediateFlip[k], - &mode_lib->ms.DestinationLinesToRequestRowInImmediateFlip[k], - &mode_lib->ms.final_flip_bw[k], - &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); - } - - { - CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBWPerState[j], - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, - mode_lib->ms.policy.ImmediateFlipRequirement, - mode_lib->ms.final_flip_bw, - mode_lib->ms.ReadBandwidthLuma, - mode_lib->ms.ReadBandwidthChroma, - mode_lib->ms.RequiredPrefetchPixelDataBWLuma, - mode_lib->ms.RequiredPrefetchPixelDataBWChroma, - mode_lib->ms.cursor_bw, - mode_lib->ms.meta_row_bandwidth_this_state, - mode_lib->ms.dpte_row_bandwidth_this_state, - mode_lib->ms.cursor_bw_pre, - mode_lib->ms.prefetch_vmrow_bw, - mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, - mode_lib->ms.UrgentBurstFactorLumaPre, - mode_lib->ms.UrgentBurstFactorChromaPre, - mode_lib->ms.UrgentBurstFactorCursorPre, - - /* output */ - &s->dummy_single[0], // dml_float_t *TotalBandwidth - &s->dummy_single[1], // dml_float_t *TotalBandwidthNotIncludingMALLPrefetch - &s->dummy_single[2], // dml_float_t *FractionOfUrgentBandwidth - &mode_lib->ms.support.ImmediateFlipSupportedForState[j]); // dml_bool_t *ImmediateFlipBandwidthSupport - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required) && (mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)) - mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false; - } - - } else { // if prefetch not support, assume iflip not supported - mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false; - } - - if (s->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || s->AnyLinesForVMOrRowTooLarge == false) { - s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1; - - if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k]) - s->AllPrefetchModeTested = false; - } - } else { - s->NextMaxVStartup = s->NextMaxVStartup - 1; - } - } while (!((mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.DynamicMetadataSupported[j] == true && - mode_lib->ms.support.VRatioInPrefetchSupported[j] == true && - // consider flip support is okay if when there is no hostvm and the user does't require a iflip OR the flip bw is ok - // If there is hostvm, DCN needs to support iflip for invalidation - ((s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j] == true)) || - (s->NextMaxVStartup == s->MaxVStartupAllPlanes[j] && s->AllPrefetchModeTested))); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.use_one_row_for_frame_this_state[k] = mode_lib->ms.use_one_row_for_frame[j][k]; - } - - s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; - s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; - s->mSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us; - s->mSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us; - s->mSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us; - s->mSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us; - s->mSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us; - s->mSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us; - s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us; - s->mSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us; - s->mSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us; - - CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal; - CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; - CalculateWatermarks_params->PrefetchMode = mode_lib->ms.PrefetchMode; - CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines; - CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits; - CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes; - CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLKState[j]; - CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBWPerState[j]; - CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal; - CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal; - CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay; - CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; - CalculateWatermarks_params->meta_row_height = mode_lib->ms.meta_row_height; - CalculateWatermarks_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma; - CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; - CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes; - CalculateWatermarks_params->SOCCLK = mode_lib->ms.state.socclk_mhz; - CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; - CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState; - CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState; - CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState; - CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState; - CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel; - CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState; - CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState; - CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; - CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; - CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps; - CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma; - CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio; - CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma; - CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; - CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; - CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; - CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; - CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; - CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState; - CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; - CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; - CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; - CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; - CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable; - CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat; - CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth; - CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight; - CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight; - CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledThisState; - CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteThisState; - - // Output - CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j]; - CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] - CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j]; - CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j]; - CalculateWatermarks_params->ActiveDRAMClockChangeLatencyMargin = mode_lib->ms.support.ActiveDRAMClockChangeLatencyMargin; - - CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, - CalculateWatermarks_params); - - } // for j // End of Prefetch Check dml_print("DML::%s: Done prefetch calculation\n", __func__); From 5b2c54e0d0ea09f7a3b500510731878326e1117e Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 16 Oct 2023 08:19:18 -0600 Subject: [PATCH 54/54] drm/amd/display: Fix stack size issue on DML2 This commit is the last part of the fix that reduces the stack size in the DML2 code. Cc: Stephen Rothwell Cc: Alex Deucher Cc: Roman Li Cc: Chaitanya Dhere Fixes: 7966f319c66d ("drm/amd/display: Introduce DML2") Tested-by: Stephen Rothwell Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/display_mode_core.c | 99 ++++++++++--------- 1 file changed, 54 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 19465c065f3d..5f54251a559c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6208,6 +6208,58 @@ static dml_uint_t CalculateMaxVStartup( return max_vstartup_lines; } +static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib, + struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params, + dml_uint_t j, + dml_uint_t k) +{ + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k]; + CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]); + CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k]; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k]; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency; + CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; + CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k]; + CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait; + CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; +} + static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) { struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; @@ -6335,57 +6387,12 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) dml_print("DML::%s: PrefetchMode = %u\n", __func__, mode_lib->ms.PrefetchMode[k]); #endif - CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal; CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor; CalculatePrefetchSchedule_params->myPipe = myPipe; - CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k]; - CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter; - CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl; - CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only; - CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor; - CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal; - CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]); - CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k]; - CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->MaxVStartup, s->MaximumVStartup[j][k])); CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[j][k]; - CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; - CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; - CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; - CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; - CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; - CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; - CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k]; - CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; - CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency; - CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; - CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k]; - CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k]; - CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k]; - CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; - CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k]; - CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; - CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k]; - CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k]; - CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k]; - CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k]; - CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k]; - CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait; CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; - CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k]; - CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k]; - CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k]; - CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; - CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k]; - CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; @@ -6394,6 +6401,8 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; + set_calculate_prefetch_schedule_params(mode_lib, CalculatePrefetchSchedule_params, j, k); + mode_lib->ms.support.NoTimeForPrefetch[j][k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);