From 47fc644f801e4414753a9b7e87ed41f991cd68c3 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Date: Wed, 19 Apr 2023 16:12:45 +0530 Subject: drm/amd/amdgpu: Fix style errors in amdgpu_drv.c & amdgpu_device.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix following checkpatch style errors in amdgpu_drv.c & amdgpu_device.c ERROR: exactly one space required after that #ifdef ERROR: spaces required around that '+=' (ctx:WxV) ERROR: space required before the open brace '{' ERROR: spaces required around that '||' (ctx:VxE) ERROR: space prohibited before that close parenthesis ')' ERROR: space required before the open parenthesis '(' ERROR: space required before the open brace '{' ERROR: code indent should use tabs where possible Cc: Christian König <christian.koenig@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Cc: Mario Limonciello <mario.limonciello@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b1ca1ab6d6ad..b400d598b75a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1660,7 +1660,7 @@ static const u16 amdgpu_unsupported_pciidlist[] = { }; static const struct pci_device_id pciidlist[] = { -#ifdef CONFIG_DRM_AMDGPU_SI +#ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, -- cgit v1.2.3 From 8a93c691248e7ff2b3944107a1ead2671b6854f2 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Fri, 10 Mar 2023 13:17:11 -0500 Subject: drm/amdgpu: bump driver version number for CP GFX shadow So UMDs can determine whether the kernel supports this. Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986 Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b400d598b75a..b987022e11b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -110,9 +110,10 @@ * 3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info fields: * tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size, * gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi + * 3.53.0 - Support for GFX11 CP GFX shadowing */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 52 +#define KMS_DRIVER_MINOR 53 #define KMS_DRIVER_PATCHLEVEL 0 unsigned int amdgpu_vram_limit = UINT_MAX; -- cgit v1.2.3 From 489763af891d5dc35c0b64e18af284d6591286cf Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Date: Mon, 24 Apr 2023 19:25:32 +0200 Subject: drm/amdgpu: add new flag to AMDGPU_CTX_QUERY2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenGL EXT_robustness extension expects the driver to stop reporting GUILTY_CONTEXT_RESET when the reset has completed and the GPU is ready to accept submission again. This commit adds a AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS flag, that let the UMD know that the reset is still not finished. Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22290 Reviewed-by: Christian König <christian.koenig@amd.com> Reviewed-by: André Almeida <andrealmeid@igalia.com> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- include/uapi/drm/amdgpu_drm.h | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index d2139ac12159..e1f642a3dc2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -576,6 +576,9 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, if (atomic_read(&ctx->guilty)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; + if (amdgpu_in_reset(adev)) + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS; + if (adev->ras_enabled && con) { /* Return the cached values in O(1), * and schedule delayed work to cache diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b987022e11b4..45544ebe576e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -111,9 +111,10 @@ * tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size, * gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi * 3.53.0 - Support for GFX11 CP GFX shadowing + * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 53 +#define KMS_DRIVER_MINOR 54 #define KMS_DRIVER_PATCHLEVEL 0 unsigned int amdgpu_vram_limit = UINT_MAX; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index cc78528c3b4b..79b14828d542 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -245,6 +245,8 @@ union drm_amdgpu_bo_list { /* indicate some errors are detected by RAS */ #define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3) #define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4) +/* indicate that the reset hasn't completed yet */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5) /* Context priority level */ #define AMDGPU_CTX_PRIORITY_UNSET -2048 -- cgit v1.2.3 From 0fa49d108386c201b5c2cce68066a9b8f66883a5 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang <shiwu.zhang@amd.com> Date: Fri, 17 Dec 2021 11:27:53 +0800 Subject: drm/amdgpu: override partition mode through module parameter Add a module parameter to override the partition mode. Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com> Reviewed-by: Le Ma <Le.Ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 13 +++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 25 +++++++++++++++++++++++-- 3 files changed, 38 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0f163d266812..a277bdc86057 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -242,6 +242,8 @@ extern int amdgpu_num_kcq; extern int amdgpu_vcnfw_log; extern int amdgpu_sg_display; +extern uint amdgpu_user_partt_mode; + #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 45544ebe576e..9e9da2ac5c82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -193,6 +193,7 @@ int amdgpu_smartshift_bias; int amdgpu_use_xgmi_p2p = 1; int amdgpu_vcnfw_log; int amdgpu_sg_display = -1; /* auto */ +uint amdgpu_user_partt_mode; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -950,6 +951,18 @@ MODULE_PARM_DESC(smu_pptable_id, "specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)"); module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444); +/** + * DOC: partition_mode (int) + * Used to override the default SPX mode. + */ +MODULE_PARM_DESC(user_partt_mode, + "specify partition mode to be used (0 = AMDGPU_SPX_PARTITION_MODE(default value), \ + 1 = AMDGPU_DPX_PARTITION_MODE, \ + 2 = AMDGPU_TPX_PARTITION_MODE, \ + 3 = AMDGPU_QPX_PARTITION_MODE, \ + 4 = AMDGPU_CPX_PARTITION_MODE)"); +module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 1d15db9423c9..2676a185c232 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2162,8 +2162,29 @@ static int gfx_v9_4_3_early_init(void *handle) /* hardcode in emulation phase */ adev->gfx.num_xcd = 1; - adev->gfx.num_xcc_per_xcp = 1; - adev->gfx.partition_mode = AMDGPU_SPX_PARTITION_MODE; + + adev->gfx.partition_mode = amdgpu_user_partt_mode; + /* calculate the num_xcc_in_xcp for the partition mode*/ + switch (amdgpu_user_partt_mode) { + case AMDGPU_SPX_PARTITION_MODE: + adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd; + break; + case AMDGPU_DPX_PARTITION_MODE: + adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 2; + break; + case AMDGPU_TPX_PARTITION_MODE: + adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 3; + break; + case AMDGPU_QPX_PARTITION_MODE: + adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 4; + break; + case AMDGPU_CPX_PARTITION_MODE: + adev->gfx.num_xcc_per_xcp = 1; + break; + default: + adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd; + break; + } adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); -- cgit v1.2.3 From 4d5275ab0b18d17697392aafd93e206e6b9de647 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Fri, 27 Jan 2023 18:18:17 +0530 Subject: drm/amdgpu: Add parsing of acpi xcc objects Add parsing of ACPI xcc objects and fill in relevant info from them by invoking the DSM methods. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-and-tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 294 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 3 files changed, 297 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 39743d44b567..880bf9d67284 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1404,11 +1404,13 @@ int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps); bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev); void amdgpu_acpi_detect(void); +void amdgpu_acpi_release(void); #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; } static inline void amdgpu_acpi_detect(void) { } +static inline void amdgpu_acpi_release(void) { } static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index aeeec211861c..a3a2ef43abfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -38,6 +38,43 @@ #include "amd_acpi.h" #include "atom.h" +/* Declare GUID for AMD _DSM method for XCCs */ +static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 0xa556, 0x44f2, + 0xb8, 0xb4, 0x45, 0x56, 0x2e, + 0x8c, 0x5b, 0xec); + +#define AMD_XCC_HID_START 3000 +#define AMD_XCC_DSM_GET_NUM_FUNCS 0 +#define AMD_XCC_DSM_GET_SUPP_MODE 1 +#define AMD_XCC_DSM_GET_XCP_MODE 2 +#define AMD_XCC_DSM_GET_VF_XCC_MAPPING 4 +#define AMD_XCC_DSM_GET_TMR_INFO 5 +#define AMD_XCC_DSM_NUM_FUNCS 5 + +#define AMD_XCC_MAX_HID 24 + +/* Encapsulates the XCD acpi object information */ +struct amdgpu_acpi_xcc_info { + struct list_head list; + int mem_node; + uint8_t xcp_node; + uint8_t phy_id; + acpi_handle handle; +}; + +struct amdgpu_acpi_dev_info { + struct list_head list; + struct list_head xcc_list; + uint16_t bdf; + uint16_t supp_xcp_mode; + uint16_t xcp_mode; + uint16_t mem_mode; + uint64_t tmr_base; + uint64_t tmr_size; +}; + +struct list_head amdgpu_acpi_dev_list; + struct amdgpu_atif_notification_cfg { bool enabled; int command_code; @@ -801,6 +838,240 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta return r; } +/** + * amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu + * acpi device handle + * + * @handle: acpi handle + * @nid: NUMA Node id returned by the platform firmware + * + * Queries the ACPI interface to fetch the corresponding NUMA Node ID for a + * given amdgpu acpi device. + * + * Returns ACPI STATUS OK with Node ID on success or the corresponding failure reason + */ +acpi_status amdgpu_acpi_get_node_id(acpi_handle handle, int *nid) +{ +#ifdef CONFIG_ACPI_NUMA + u64 pxm; + acpi_status status; + + status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm); + + if (ACPI_FAILURE(status)) + return status; + + *nid = pxm_to_node(pxm); + + return_ACPI_STATUS(AE_OK); +#else + return_ACPI_STATUS(AE_NOT_EXIST); +#endif +} + +struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf) +{ + struct amdgpu_acpi_dev_info *acpi_dev; + + if (list_empty(&amdgpu_acpi_dev_list)) + return NULL; + + list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list) + if (acpi_dev->bdf == bdf) + return acpi_dev; + + return NULL; +} + +static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info, + struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf) +{ + struct amdgpu_acpi_dev_info *tmp; + union acpi_object *obj; + int ret = -ENOENT; + + *dev_info = NULL; + tmp = kzalloc(sizeof(struct amdgpu_acpi_dev_info), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + INIT_LIST_HEAD(&tmp->xcc_list); + INIT_LIST_HEAD(&tmp->list); + tmp->bdf = bdf; + + obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, + AMD_XCC_DSM_GET_SUPP_MODE, NULL, + ACPI_TYPE_INTEGER); + + if (!obj) { + acpi_handle_debug(xcc_info->handle, + "_DSM function %d evaluation failed", + AMD_XCC_DSM_GET_SUPP_MODE); + ret = -ENOENT; + goto out; + } + + tmp->supp_xcp_mode = obj->integer.value & 0xFFFF; + ACPI_FREE(obj); + + obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, + AMD_XCC_DSM_GET_XCP_MODE, NULL, + ACPI_TYPE_INTEGER); + + if (!obj) { + acpi_handle_debug(xcc_info->handle, + "_DSM function %d evaluation failed", + AMD_XCC_DSM_GET_XCP_MODE); + ret = -ENOENT; + goto out; + } + + tmp->xcp_mode = obj->integer.value & 0xFFFF; + tmp->mem_mode = (obj->integer.value >> 32) & 0xFFFF; + ACPI_FREE(obj); + + /* Evaluate DSMs and fill XCC information */ + obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, + AMD_XCC_DSM_GET_TMR_INFO, NULL, + ACPI_TYPE_PACKAGE); + + if (!obj || obj->package.count < 2) { + acpi_handle_debug(xcc_info->handle, + "_DSM function %d evaluation failed", + AMD_XCC_DSM_GET_TMR_INFO); + ret = -ENOENT; + goto out; + } + + tmp->tmr_base = obj->package.elements[0].integer.value; + tmp->tmr_size = obj->package.elements[1].integer.value; + ACPI_FREE(obj); + + DRM_DEBUG_DRIVER( + "New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ", + tmp->bdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode, + tmp->tmr_base, tmp->tmr_size); + list_add_tail(&tmp->list, &amdgpu_acpi_dev_list); + *dev_info = tmp; + + return 0; + +out: + if (obj) + ACPI_FREE(obj); + kfree(tmp); + + return ret; +} + +static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info, + u16 *bdf) +{ + union acpi_object *obj; + acpi_status status; + int ret = -ENOENT; + + obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, + AMD_XCC_DSM_GET_NUM_FUNCS, NULL, + ACPI_TYPE_INTEGER); + + if (!obj || obj->integer.value != AMD_XCC_DSM_NUM_FUNCS) + goto out; + ACPI_FREE(obj); + + /* Evaluate DSMs and fill XCC information */ + obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, + AMD_XCC_DSM_GET_VF_XCC_MAPPING, NULL, + ACPI_TYPE_INTEGER); + + if (!obj) { + acpi_handle_debug(xcc_info->handle, + "_DSM function %d evaluation failed", + AMD_XCC_DSM_GET_VF_XCC_MAPPING); + ret = -EINVAL; + goto out; + } + + /* PF xcc id [39:32] */ + xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF; + /* xcp node of this xcc [47:40] */ + xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF; + /* PF bus/dev/fn of this xcc [63:48] */ + *bdf = (obj->integer.value >> 48) & 0xFFFF; + ACPI_FREE(obj); + obj = NULL; + + status = amdgpu_acpi_get_node_id(xcc_info->handle, &xcc_info->mem_node); + + /* TODO: check if this check is required */ + if (ACPI_SUCCESS(status)) + ret = 0; +out: + if (obj) + ACPI_FREE(obj); + + return ret; +} + +static int amdgpu_acpi_enumerate_xcc(void) +{ + struct amdgpu_acpi_dev_info *dev_info = NULL; + struct amdgpu_acpi_xcc_info *xcc_info; + struct acpi_device *acpi_dev; + char hid[ACPI_ID_LEN]; + int ret, id; + u16 bdf; + + INIT_LIST_HEAD(&amdgpu_acpi_dev_list); + + for (id = 0; id < AMD_XCC_MAX_HID; id++) { + sprintf(hid, "%s%d", "AMD", AMD_XCC_HID_START + id); + acpi_dev = acpi_dev_get_first_match_dev(hid, NULL, -1); + /* These ACPI objects are expected to be in sequential order. If + * one is not found, no need to check the rest. + */ + if (!acpi_dev) { + DRM_DEBUG_DRIVER("No matching acpi device found for %s", + hid); + break; + } + + xcc_info = kzalloc(sizeof(struct amdgpu_acpi_xcc_info), + GFP_KERNEL); + if (!xcc_info) { + DRM_ERROR("Failed to allocate memory for xcc info\n"); + return -ENOMEM; + } + + INIT_LIST_HEAD(&xcc_info->list); + xcc_info->handle = acpi_device_handle(acpi_dev); + acpi_dev_put(acpi_dev); + + ret = amdgpu_acpi_get_xcc_info(xcc_info, &bdf); + if (ret) { + kfree(xcc_info); + continue; + } + + dev_info = amdgpu_acpi_get_dev(bdf); + + if (!dev_info) + ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, bdf); + + if (ret == -ENOMEM) + return ret; + + if (!dev_info) { + kfree(xcc_info); + continue; + } + + list_add_tail(&xcc_info->list, &dev_info->xcc_list); + } + + return 0; +} + /** * amdgpu_acpi_event - handle notify events * @@ -1054,6 +1325,29 @@ void amdgpu_acpi_detect(void) } else { atif->backlight_caps.caps_valid = false; } + + amdgpu_acpi_enumerate_xcc(); +} + +void amdgpu_acpi_release(void) +{ + struct amdgpu_acpi_dev_info *dev_info, *dev_tmp; + struct amdgpu_acpi_xcc_info *xcc_info, *xcc_tmp; + + if (list_empty(&amdgpu_acpi_dev_list)) + return; + + list_for_each_entry_safe(dev_info, dev_tmp, &amdgpu_acpi_dev_list, + list) { + list_for_each_entry_safe(xcc_info, xcc_tmp, &dev_info->xcc_list, + list) { + list_del(&xcc_info->list); + kfree(xcc_info); + } + + list_del(&dev_info->list); + kfree(dev_info); + } } #if IS_ENABLED(CONFIG_SUSPEND) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 9e9da2ac5c82..f319318a8813 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2898,6 +2898,7 @@ static void __exit amdgpu_exit(void) amdgpu_amdkfd_fini(); pci_unregister_driver(&amdgpu_kms_pci_driver); amdgpu_unregister_atpx_handler(); + amdgpu_acpi_release(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); mmu_notifier_synchronize(); -- cgit v1.2.3 From 570de94b9c5d93e1c5bc4e357946efb93c662da9 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Mon, 13 Feb 2023 19:26:18 +0530 Subject: drm/amdgpu: Add auto mode for compute partition When auto mode is specified, driver will choose the right compute partition mode. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Le Ma <le.ma@amd.com> Reviewed-by: Philip Yang <philip.yang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ .../gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c | 28 +++++++++++++++++++++- 4 files changed, 35 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f2bafab15ceb..cb9373f8c25a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -242,7 +242,7 @@ extern int amdgpu_num_kcq; extern int amdgpu_vcnfw_log; extern int amdgpu_sg_display; -extern uint amdgpu_user_partt_mode; +extern int amdgpu_user_partt_mode; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f319318a8813..da4e50aef95a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -193,7 +193,7 @@ int amdgpu_smartshift_bias; int amdgpu_use_xgmi_p2p = 1; int amdgpu_vcnfw_log; int amdgpu_sg_display = -1; /* auto */ -uint amdgpu_user_partt_mode; +int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -955,8 +955,10 @@ module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444); * DOC: partition_mode (int) * Used to override the default SPX mode. */ -MODULE_PARM_DESC(user_partt_mode, - "specify partition mode to be used (0 = AMDGPU_SPX_PARTITION_MODE(default value), \ +MODULE_PARM_DESC( + user_partt_mode, + "specify partition mode to be used (-2 = AMDGPU_AUTO_COMPUTE_PARTITION_MODE(default value) \ + 0 = AMDGPU_SPX_PARTITION_MODE, \ 1 = AMDGPU_DPX_PARTITION_MODE, \ 2 = AMDGPU_TPX_PARTITION_MODE, \ 3 = AMDGPU_QPX_PARTITION_MODE, \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 728977f8afe7..e9c93f6e12b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -62,6 +62,8 @@ enum amdgpu_gfx_partition { AMDGPU_QPX_PARTITION_MODE = 3, AMDGPU_CPX_PARTITION_MODE = 4, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE = -1, + /* Automatically choose the right mode */ + AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2, }; #define NUM_XCC(x) hweight16(x) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c index 7469de3fd6fe..a165b51e9e58 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c @@ -235,6 +235,30 @@ int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id, return 0; } +static enum amdgpu_gfx_partition +__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int num_xcc; + + num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask); + + if (adev->gmc.num_mem_partitions == 1) + return AMDGPU_SPX_PARTITION_MODE; + + if (adev->gmc.num_mem_partitions == num_xcc) + return AMDGPU_CPX_PARTITION_MODE; + + if (adev->gmc.num_mem_partitions == num_xcc / 2) + return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE : + AMDGPU_QPX_PARTITION_MODE; + + if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU)) + return AMDGPU_DPX_PARTITION_MODE; + + return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; +} + static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, enum amdgpu_gfx_partition mode) { @@ -304,7 +328,9 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, adev = xcp_mgr->adev; num_xcc = NUM_XCC(adev->gfx.xcc_mask); - if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) + if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) + mode = __aqua_vanjaram_get_auto_mode(xcp_mgr); + else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) return -EINVAL; if (adev->kfd.init_complete) -- cgit v1.2.3 From 895797d9193b38e759bc01268a8e3887e521f682 Mon Sep 17 00:00:00 2001 From: Graham Sider <Graham.Sider@amd.com> Date: Mon, 6 Feb 2023 14:04:42 -0500 Subject: drm/amdgpu/bu: Add use_mtype_cc_wa module param By default, set use_mtype_cc_wa to 1 to set PTE coherence flag MTYPE_CC instead of MTYPE_RW by default. This is required for the time being to mitigate a bug causing XCCs to hit stale data due to TCC marking fully dirty lines as exclusive. Signed-off-by: Graham Sider <Graham.Sider@amd.com> Reviewed-by: Joseph Greathouse <Joseph.Greathouse@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 +++++++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 7 +++++-- 4 files changed, 20 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cb9373f8c25a..cd2a29a7e26d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -212,6 +212,7 @@ extern int amdgpu_noretry; extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; extern int amdgpu_use_xgmi_p2p; +extern bool amdgpu_use_mtype_cc_wa; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index da4e50aef95a..8bc37826a99f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -822,6 +822,13 @@ MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault ( module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); #endif +/** + * DOC: use_mtype_cc_wa (bool) + */ +bool amdgpu_use_mtype_cc_wa = true; +MODULE_PARM_DESC(use_mtype_cc_wa, "Use MTYPE_CC workaround (0 = use MTYPE_RW where applicable, 1 = use MTYPE_CC where applicable (default))"); +module_param_named(use_mtype_cc_wa, amdgpu_use_mtype_cc_wa, bool, 0444); + /** * DOC: pcie_p2p (bool) * Enable PCIe P2P (requires large-BAR). Default value: true (on) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2eb67b53e497..8623b93c05ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1187,6 +1187,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; unsigned int mtype; + unsigned int mtype_default; bool snoop = false; switch (adev->ip_versions[GC_HWIP][0]) { @@ -1230,7 +1231,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, /* FIXME: Needs more work for handling multiple memory * partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU * modes. + * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable. + * To force use of MTYPE_RW, set use_mtype_cc_wa=0 */ + mtype_default = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; snoop = true; if (uncached) { mtype = MTYPE_UC; @@ -1245,14 +1249,14 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, * socket should be treated as remote access so MTYPE_RW * cannot be used always. */ - mtype = MTYPE_RW; + mtype = mtype_default; } else if (adev->flags & AMD_IS_APU) { /* APU on carve out mode */ - mtype = MTYPE_RW; + mtype = mtype_default; } else { /* dGPU */ if (is_vram && bo_adev == adev) - mtype = MTYPE_RW; + mtype = mtype_default; else if (is_vram) mtype = MTYPE_NC; else diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2b2129dd1e4a..477ef9294203 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1198,9 +1198,12 @@ svm_range_get_pte_flags(struct kfd_node *node, if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (domain == SVM_RANGE_VRAM_DOMAIN) { - /* local HBM region close to partition */ + /* local HBM region close to partition + * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable. + * To force use of MTYPE_RW, set use_mtype_cc_wa=0 + */ if (bo_node == node) - mapping_flags |= AMDGPU_VM_MTYPE_RW; + mapping_flags |= amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; /* local HBM region far from partition or remote XGMI GPU */ else if (svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_NC; -- cgit v1.2.3 From 76eb9c95a409ea820b2e7c968c220e7a38f27d76 Mon Sep 17 00:00:00 2001 From: David Francis <David.Francis@amd.com> Date: Mon, 27 Feb 2023 10:33:11 -0500 Subject: drm/amdgpu/bu: add mtype_local as a module parameter Selects the MTYPE to be used for local memory, (0 = MTYPE_CC (default), 1 = MTYPE_NC, 2 = MTYPE_RW) v2: squash in build fix (Alex) Reviewed-by: Graham Sider <Graham.Sider@amd.com> Signed-off-by: David Francis <David.Francis@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 19 ++++++++++++++++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 +-- 4 files changed, 22 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cd2a29a7e26d..c2feaf2fd070 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -212,7 +212,7 @@ extern int amdgpu_noretry; extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; extern int amdgpu_use_xgmi_p2p; -extern bool amdgpu_use_mtype_cc_wa; +extern int amdgpu_mtype_local; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8bc37826a99f..706ba4af062f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -823,11 +823,11 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm #endif /** - * DOC: use_mtype_cc_wa (bool) + * DOC: mtype_local (int) */ -bool amdgpu_use_mtype_cc_wa = true; -MODULE_PARM_DESC(use_mtype_cc_wa, "Use MTYPE_CC workaround (0 = use MTYPE_RW where applicable, 1 = use MTYPE_CC where applicable (default))"); -module_param_named(use_mtype_cc_wa, amdgpu_use_mtype_cc_wa, bool, 0444); +int amdgpu_mtype_local; +MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_CC (default), 1 = MTYPE_NC, 2 = MTYPE_RW)"); +module_param_named(mtype_local, amdgpu_mtype_local, int, 0444); /** * DOC: pcie_p2p (bool) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index c64a69f75da2..5a1414300271 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1235,7 +1235,16 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, * NUMA systems. Their MTYPE can be overridden per-page in * gmc_v9_0_override_vm_pte_flags. */ - mtype_local = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; + mtype_local = MTYPE_CC; + if (amdgpu_mtype_local == 1) { + DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); + mtype_local = MTYPE_NC; + } else if (amdgpu_mtype_local == 2) { + DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); + mtype_local = MTYPE_RW; + } else { + DRM_INFO_ONCE("Using MTYPE_CC for local memory\n"); + } is_local = (!is_vram && (adev->flags & AMD_IS_APU) && num_possible_nodes() <= 1) || (is_vram && adev == bo_adev /* TODO: memory partitions && @@ -1349,9 +1358,13 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", /*vm->mem_id*/0, local_node, nid); if (nid == local_node) { - unsigned int mtype_local = - amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; uint64_t old_flags = *flags; + unsigned int mtype_local = MTYPE_CC; + + if (amdgpu_mtype_local == 1) + mtype_local = MTYPE_NC; + else if (amdgpu_mtype_local == 2) + mtype_local = MTYPE_RW; *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | AMDGPU_PTE_MTYPE_VG10(mtype_local); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 4eec75b28917..df0ed5677609 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1192,8 +1192,7 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): - mtype_local = amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : - AMDGPU_VM_MTYPE_RW; + mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_RW : AMDGPU_VM_MTYPE_CC); snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; -- cgit v1.2.3 From b9cbd51000ad3541351ca832b00600870ac08e5c Mon Sep 17 00:00:00 2001 From: Graham Sider <Graham.Sider@amd.com> Date: Mon, 6 Mar 2023 17:56:44 -0500 Subject: drm/amdgpu/bu: update mtype_local parameter settings Update mtype_local module parameter to use MTYPE_RW by default. 0: MTYPE_RW (default) 1: MTYPE_NC 2: MTYPE_CC Signed-off-by: Graham Sider <Graham.Sider@amd.com> Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++------ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 ++- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 706ba4af062f..aa466a9eb956 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -826,7 +826,7 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm * DOC: mtype_local (int) */ int amdgpu_mtype_local; -MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_CC (default), 1 = MTYPE_NC, 2 = MTYPE_RW)"); +MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)"); module_param_named(mtype_local, amdgpu_mtype_local, int, 0444); /** diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 5a1414300271..32eb4f4f5492 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1235,15 +1235,15 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, * NUMA systems. Their MTYPE can be overridden per-page in * gmc_v9_0_override_vm_pte_flags. */ - mtype_local = MTYPE_CC; + mtype_local = MTYPE_RW; if (amdgpu_mtype_local == 1) { DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); mtype_local = MTYPE_NC; } else if (amdgpu_mtype_local == 2) { - DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); - mtype_local = MTYPE_RW; - } else { DRM_INFO_ONCE("Using MTYPE_CC for local memory\n"); + mtype_local = MTYPE_CC; + } else { + DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); } is_local = (!is_vram && (adev->flags & AMD_IS_APU) && num_possible_nodes() <= 1) || @@ -1359,12 +1359,12 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, /*vm->mem_id*/0, local_node, nid); if (nid == local_node) { uint64_t old_flags = *flags; - unsigned int mtype_local = MTYPE_CC; + unsigned int mtype_local = MTYPE_RW; if (amdgpu_mtype_local == 1) mtype_local = MTYPE_NC; else if (amdgpu_mtype_local == 2) - mtype_local = MTYPE_RW; + mtype_local = MTYPE_CC; *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | AMDGPU_PTE_MTYPE_VG10(mtype_local); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index df0ed5677609..e6348d4133fd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1192,7 +1192,8 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): - mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_RW : AMDGPU_VM_MTYPE_CC); + mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : + (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW); snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; -- cgit v1.2.3 From 2c1c7ba457d4ecf475c0e220ac5359971355c6eb Mon Sep 17 00:00:00 2001 From: James Zhu <James.Zhu@amd.com> Date: Mon, 15 Aug 2022 16:55:02 -0400 Subject: drm/amdgpu: support partition drm devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support partition drm devices on GC_HWIP IP_VERSION(9, 4, 3). This is a temporary solution and will be superceded. Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: James Zhu <James.Zhu@amd.com> Reviewed-and-tested-by: Philip Yang<Philip.Yang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 32 ++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 59 +++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 5 +++ 6 files changed, 99 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c2feaf2fd070..eb2fb968e3af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -107,6 +107,7 @@ #include "amdgpu_fdinfo.h" #include "amdgpu_mca.h" #include "amdgpu_ras.h" +#include "amdgpu_xcp.h" #define MAX_GPU_INSTANCE 64 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0c6f983fb2ad..4d9c535bcb0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6065,6 +6065,7 @@ void amdgpu_device_halt(struct amdgpu_device *adev) struct pci_dev *pdev = adev->pdev; struct drm_device *ddev = adev_to_drm(adev); + amdgpu_xcp_dev_unplug(adev); drm_dev_unplug(ddev); amdgpu_irq_disable_all(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index aa466a9eb956..7300df2a342c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2185,6 +2185,10 @@ retry_init: goto err_pci; } + ret = amdgpu_xcp_dev_register(adev, ent); + if (ret) + goto err_pci; + /* * 1. don't init fbdev on hw without DCE * 2. don't init fbdev if there are no connectors @@ -2257,6 +2261,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); + amdgpu_xcp_dev_unplug(adev); drm_dev_unplug(dev); if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { @@ -2840,6 +2845,33 @@ static const struct drm_driver amdgpu_kms_driver = { .patchlevel = KMS_DRIVER_PATCHLEVEL, }; +const struct drm_driver amdgpu_partition_driver = { + .driver_features = + DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, + .open = amdgpu_driver_open_kms, + .postclose = amdgpu_driver_postclose_kms, + .lastclose = amdgpu_driver_lastclose_kms, + .ioctls = amdgpu_ioctls_kms, + .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms), + .dumb_create = amdgpu_mode_dumb_create, + .dumb_map_offset = amdgpu_mode_dumb_mmap, + .fops = &amdgpu_driver_kms_fops, + .release = &amdgpu_driver_release_kms, + + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = amdgpu_gem_prime_import, + .gem_prime_mmap = drm_gem_prime_mmap, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = KMS_DRIVER_MAJOR, + .minor = KMS_DRIVER_MINOR, + .patchlevel = KMS_DRIVER_PATCHLEVEL, +}; + static struct pci_error_handlers amdgpu_pci_err_handler = { .error_detected = amdgpu_pci_error_detected, .mmio_enabled = amdgpu_pci_mmio_enabled, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h index 8178323e4bef..5bc2cb661af7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h @@ -42,6 +42,8 @@ #define DRIVER_DESC "AMD GPU" #define DRIVER_DATE "20150101" +extern const struct drm_driver amdgpu_partition_driver; + long amdgpu_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index bca226cc4e0b..8b28b18e4291 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -22,6 +22,9 @@ */ #include "amdgpu.h" #include "amdgpu_xcp.h" +#include "amdgpu_drv.h" + +#include <drm/drm_drv.h> static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr, struct amdgpu_xcp_ip *xcp_ip, int xcp_state) @@ -217,6 +220,31 @@ int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) return mode; } +static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev) +{ + struct drm_device *p_ddev; + struct pci_dev *pdev; + struct drm_device *ddev; + int i; + + pdev = adev->pdev; + ddev = adev_to_drm(adev); + + for (i = 0; i < MAX_XCP; i++) { + p_ddev = drm_dev_alloc(&amdgpu_partition_driver, + &pci_upstream_bridge(pdev)->dev); + if (IS_ERR(p_ddev)) + return PTR_ERR(p_ddev); + + /* Redirect all IOCTLs to the primary device */ + p_ddev->render->dev = ddev; + p_ddev->vma_offset_manager = ddev->vma_offset_manager; + adev->xcp_mgr->xcp[i].ddev = p_ddev; + } + + return 0; +} + int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode, int init_num_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs) @@ -242,7 +270,7 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode, adev->xcp_mgr = xcp_mgr; - return 0; + return amdgpu_xcp_dev_alloc(adev); } int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr, @@ -278,3 +306,32 @@ int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp, return 0; } + +int amdgpu_xcp_dev_register(struct amdgpu_device *adev, + const struct pci_device_id *ent) +{ + int i, ret; + + if (!adev->xcp_mgr) + return 0; + + for (i = 0; i < MAX_XCP; i++) { + ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data); + if (ret) + return ret; + } + + return 0; +} + +void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev) +{ + int i; + + if (!adev->xcp_mgr) + return; + + for (i = 0; i < MAX_XCP; i++) + drm_dev_unplug(adev->xcp_mgr->xcp[i].ddev); +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index e1319b887bf3..dad0b98d1ae7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -70,6 +70,7 @@ struct amdgpu_xcp { uint8_t id; uint8_t mem_id; bool valid; + struct drm_device *ddev; }; struct amdgpu_xcp_mgr { @@ -115,6 +116,10 @@ int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp, enum AMDGPU_XCP_IP_BLOCK ip, uint32_t *inst_mask); +int amdgpu_xcp_dev_register(struct amdgpu_device *adev, + const struct pci_device_id *ent); +void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev); + static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr) { if (!xcp_mgr) -- cgit v1.2.3 From 5d6cd20075c823565e7550f8de70d7615ec3c8b7 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang <shiwu.zhang@amd.com> Date: Mon, 22 May 2023 15:58:10 +0800 Subject: drm/amdgpu: add the accelerator pcie class v2: add the base class id for accelerator (lijo) v3: add the new pci class in amdgpu tree (hawking) Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com> Acked-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +++++ drivers/gpu/drm/amd/include/amd_shared.h | 1 + 2 files changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7300df2a342c..422c36ed8f36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2041,6 +2041,11 @@ static const struct pci_device_id pciidlist[] = { .class_mask = 0xffffff, .driver_data = CHIP_IP_DISCOVERY }, + { PCI_DEVICE(0x1002, PCI_ANY_ID), + .class = AMD_ACCELERATOR_PROCESSING << 8, + .class_mask = 0xffffff, + .driver_data = CHIP_IP_DISCOVERY }, + {0, 0, 0} }; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index f175e65b853a..57d95e2cc54b 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -27,6 +27,7 @@ #define AMD_MAX_USEC_TIMEOUT 1000000 /* 1000 ms */ +#define AMD_ACCELERATOR_PROCESSING 0x1200 /* hardcoded pci class */ /* * Chip flags -- cgit v1.2.3 From 9938333a46c9e20539c85ca7df42a739541b0493 Mon Sep 17 00:00:00 2001 From: James Zhu <James.Zhu@amd.com> Date: Tue, 25 Apr 2023 17:02:48 -0400 Subject: drm/amdgpu: use amdxcp platform device as spatial partition Use amdxcp platform device as spatial partition device. -v2: remove unused variable Signed-off-by: James Zhu <James.Zhu@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 12 +++++------- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 422c36ed8f36..7489b2b1a0d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -50,6 +50,7 @@ #include "amdgpu_ras.h" #include "amdgpu_xgmi.h" #include "amdgpu_reset.h" +#include "../amdxcp/amdgpu_xcp_drv.h" /* * KMS wrapper. @@ -2948,6 +2949,7 @@ static void __exit amdgpu_exit(void) amdgpu_sync_fini(); amdgpu_fence_slab_fini(); mmu_notifier_synchronize(); + amdgpu_xcp_drv_release(); } module_init(amdgpu_init); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index 86087faab689..d733fa6e7477 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -25,6 +25,7 @@ #include "amdgpu_drv.h" #include <drm/drm_drv.h> +#include "../amdxcp/amdgpu_xcp_drv.h" static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr, struct amdgpu_xcp_ip *xcp_ip, int xcp_state) @@ -226,18 +227,15 @@ int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev) { struct drm_device *p_ddev; - struct pci_dev *pdev; struct drm_device *ddev; - int i; + int i, ret; - pdev = adev->pdev; ddev = adev_to_drm(adev); for (i = 0; i < MAX_XCP; i++) { - p_ddev = drm_dev_alloc(&amdgpu_partition_driver, - &pci_upstream_bridge(pdev)->dev); - if (IS_ERR(p_ddev)) - return PTR_ERR(p_ddev); + ret = amdgpu_xcp_drm_dev_alloc(&p_ddev); + if (ret) + return ret; /* Redirect all IOCTLs to the primary device */ adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev; -- cgit v1.2.3 From c1ac2ea802f5adfd1d128fc01375af9c5f113932 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Tue, 6 Jun 2023 11:14:04 -0400 Subject: drm/amdgpu: add missing radeon secondary PCI ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0x5b70 is a missing RV370 secondary id. Add it so we don't try and probe it with amdgpu. Cc: michel@daenzer.net Reviewed-by: Michel Dänzer <mdaenzer@redhat.com> Tested-by: Michel Dänzer <mdaenzer@redhat.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7489b2b1a0d0..b8a1e4571cd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1640,6 +1640,7 @@ static const u16 amdgpu_unsupported_pciidlist[] = { 0x5874, 0x5940, 0x5941, + 0x5b70, 0x5b72, 0x5b73, 0x5b74, -- cgit v1.2.3 From 9d65b1b4bcf3918164e17365eec169875eef8ee3 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang <shiwu.zhang@amd.com> Date: Tue, 23 May 2023 12:02:32 +0800 Subject: drm/amdgpu: add the accelerator PCIe class Add the accelerator PCIe class and match the class in amdgpu for 0x1002 devices of that class. From PCI spec: "PCI Code and ID Assignment, r1.9, sec 1, 1.19" Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com> Acked-by: Lijo Lazar <lijo.lazar@amd.com> Acked-by: Bjorn Helgaas <bhelgaas@google.com> # pci_ids.h Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +- drivers/gpu/drm/amd/include/amd_shared.h | 1 - include/linux/pci_ids.h | 3 +++ 4 files changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b8a1e4571cd9..8e58d187b173 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2044,7 +2044,7 @@ static const struct pci_device_id pciidlist[] = { .driver_data = CHIP_IP_DISCOVERY }, { PCI_DEVICE(0x1002, PCI_ANY_ID), - .class = AMD_ACCELERATOR_PROCESSING << 8, + .class = PCI_CLASS_ACCELERATOR_PROCESSING << 8, .class_mask = 0xffffff, .driver_data = CHIP_IP_DISCOVERY }, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0f1ca0136f50..25b4d7f0bd35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -57,7 +57,7 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) /* enable virtual display */ if (adev->asic_type != CHIP_ALDEBARAN && adev->asic_type != CHIP_ARCTURUS && - ((adev->pdev->class >> 8) != AMD_ACCELERATOR_PROCESSING)) { + ((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) { if (adev->mode_info.num_crtc == 0) adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 57d95e2cc54b..f175e65b853a 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -27,7 +27,6 @@ #define AMD_MAX_USEC_TIMEOUT 1000000 /* 1000 ms */ -#define AMD_ACCELERATOR_PROCESSING 0x1200 /* hardcoded pci class */ /* * Chip flags diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 45c3d62e616d..0fbfbda3dc26 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -151,6 +151,9 @@ #define PCI_CLASS_SP_DPIO 0x1100 #define PCI_CLASS_SP_OTHER 0x1180 +#define PCI_BASE_CLASS_ACCELERATOR 0x12 +#define PCI_CLASS_ACCELERATOR_PROCESSING 0x1200 + #define PCI_CLASS_OTHERS 0xff /* Vendors and devices. Sort key: vendor first, device next. */ -- cgit v1.2.3 From 80e709ee6ecc9eba8bd8d188218472822e1b38bd Mon Sep 17 00:00:00 2001 From: Chong Li <chongli2@amd.com> Date: Wed, 7 Jun 2023 15:56:12 +0800 Subject: drm/amdgpu: add option params to enforce process isolation between graphics and compute enforce process isolation between graphics and compute via using the same reserved vmid. v2: remove params "struct amdgpu_vm *vm" from amdgpu_vmid_alloc_reserved and amdgpu_vmid_free_reserved. Signed-off-by: Chong Li <chongli2@amd.com> Reviewed-by: Christian Koenig <Christian.Koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 17 +++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 25 +++++++++++++++++-------- 5 files changed, 36 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 74104d46a7be..a84bd4a0c421 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -214,6 +214,7 @@ extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; extern int amdgpu_use_xgmi_p2p; extern int amdgpu_mtype_local; +extern bool enforce_isolation; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8e58d187b173..999d008b6b48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -153,7 +153,7 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; - +bool enforce_isolation; /* * OverDrive(bit 14) disabled by default * GFX DCS(bit 19) disabled by default @@ -973,6 +973,14 @@ MODULE_PARM_DESC( 4 = AMDGPU_CPX_PARTITION_MODE)"); module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); + +/** + * DOC: enforce_isolation (bool) + * enforce process isolation between graphics and compute via using the same reserved vmid. + */ +module_param(enforce_isolation, bool, 0444); +MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index c991ca0b7a1c..ff1ea99292fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -409,7 +409,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (vm->reserved_vmid[vmhub]) { + if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -460,14 +460,11 @@ error: } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, unsigned vmhub) { struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; mutex_lock(&id_mgr->lock); - if (vm->reserved_vmid[vmhub]) - goto unlock; ++id_mgr->reserved_use_count; if (!id_mgr->reserved) { @@ -479,27 +476,23 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, list_del_init(&id->list); id_mgr->reserved = id; } - vm->reserved_vmid[vmhub] = true; -unlock: mutex_unlock(&id_mgr->lock); return 0; } void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, unsigned vmhub) { struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; mutex_lock(&id_mgr->lock); - if (vm->reserved_vmid[vmhub] && - !--id_mgr->reserved_use_count) { + if (!--id_mgr->reserved_use_count) { /* give the reserved ID back to normal round robin */ list_add(&id_mgr->reserved->list, &id_mgr->ids_lru); id_mgr->reserved = NULL; } - vm->reserved_vmid[vmhub] = false; + mutex_unlock(&id_mgr->lock); } @@ -578,6 +571,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); } } + /* alloc a default reserved vmid to enforce isolation */ + if (enforce_isolation) + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); + } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index d1cc09b45da4..fa8c42c83d5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -79,11 +79,9 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - unsigned vmhub); + unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - unsigned vmhub); + unsigned vmhub); int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_job *job, struct dma_fence **fence); void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 76d57bc7ac62..dc80c9c8fd14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2284,8 +2284,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } dma_fence_put(vm->last_update); - for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) - amdgpu_vmid_free_reserved(adev, vm, i); + + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { + if (vm->reserved_vmid[i]) { + amdgpu_vmid_free_reserved(adev, i); + vm->reserved_vmid[i] = false; + } + } + } /** @@ -2368,7 +2374,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_vm *args = data; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = filp->driver_priv; - int r; /* No valid flags defined yet */ if (args->in.flags) @@ -2377,13 +2382,17 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: /* We only have requirement to reserve vmid from gfxhub */ - r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, - AMDGPU_GFXHUB(0)); - if (r) - return r; + if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); + fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true; + } + break; case AMDGPU_VM_OP_UNRESERVE_VMID: - amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB(0)); + if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { + amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0)); + fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false; + } break; default: return -EINVAL; -- cgit v1.2.3