summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorHawking Zhang <Hawking.Zhang@amd.com>2025-01-22 19:34:33 +0800
committerAlex Deucher <alexander.deucher@amd.com>2025-02-12 21:02:59 -0500
commit16b85a0942c0b0f1611bcaa42cc98f020e34b1cf (patch)
treed7f35626c9938478c28b84fea369176192559412 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parentc003b5ccaf625c4e8077a0e7a8a1d9e6e403d603 (diff)
downloadlwn-16b85a0942c0b0f1611bcaa42cc98f020e34b1cf.tar.gz
lwn-16b85a0942c0b0f1611bcaa42cc98f020e34b1cf.zip
drm/amdgpu: Update usage for bad page threshold
The driver's behavior varies based on the configuration of amdgpu_bad_page_threshold setting Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c40
1 files changed, 20 insertions, 20 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index f0924aa3f4e4..90394f89aba6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3080,31 +3080,29 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
/*
- * Justification of value bad_page_cnt_threshold in ras structure
- *
- * Generally, 0 <= amdgpu_bad_page_threshold <= max record length
- * in eeprom or amdgpu_bad_page_threshold == -2, introduce two
- * scenarios accordingly.
- *
- * Bad page retirement enablement:
- * - If amdgpu_bad_page_threshold = -2,
- * bad_page_cnt_threshold = typical value by formula.
- *
- * - When the value from user is 0 < amdgpu_bad_page_threshold <
- * max record length in eeprom, use it directly.
- *
- * Bad page retirement disablement:
- * - If amdgpu_bad_page_threshold = 0, bad page retirement
- * functionality is disabled, and bad_page_cnt_threshold will
- * take no effect.
+ * amdgpu_bad_page_threshold is used to config
+ * the threshold for the number of bad pages.
+ * -1: Threshold is set to default value
+ * Driver will issue a warning message when threshold is reached
+ * and continue runtime services.
+ * 0: Disable bad page retirement
+ * Driver will not retire bad pages
+ * which is intended for debugging purpose.
+ * -2: Threshold is determined by a formula
+ * that assumes 1 bad page per 100M of local memory.
+ * Driver will continue runtime services when threhold is reached.
+ * 0 < threshold < max number of bad page records in EEPROM,
+ * A user-defined threshold is set
+ * Driver will halt runtime services when this custom threshold is reached.
*/
-
- if (amdgpu_bad_page_threshold < 0) {
+ if (amdgpu_bad_page_threshold == -2) {
u64 val = adev->gmc.mc_vram_size;
do_div(val, RAS_BAD_PAGE_COVER);
con->bad_page_cnt_threshold = min(lower_32_bits(val),
max_count);
+ } else if (amdgpu_bad_page_threshold == -1) {
+ con->bad_page_cnt_threshold = ((con->reserved_pages_in_bytes) >> 21) << 4;
} else {
con->bad_page_cnt_threshold = min_t(int, max_count,
amdgpu_bad_page_threshold);
@@ -3848,8 +3846,10 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 12):
+ con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT;
+ break;
case IP_VERSION(13, 0, 14):
- con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE;
+ con->reserved_pages_in_bytes = (AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT << 1);
break;
default:
break;