summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorDafna Hirschfeld <dhirschfeld@habana.ai>2023-02-15 12:15:57 +0200
committerOded Gabbay <ogabbay@kernel.org>2023-03-15 13:29:14 +0200
commit86b74d843897fdb9f2366bc515e9d52816e3d247 (patch)
tree80d6aaa65fc82efa33fd40f80f6eb14f99141b0d /drivers
parentd85f0531b9285cf7dc6784b06e7e7dbc1c04dcee (diff)
downloadlwn-86b74d843897fdb9f2366bc515e9d52816e3d247.tar.gz
lwn-86b74d843897fdb9f2366bc515e9d52816e3d247.zip
accel/habanalabs: assert return value of hw_fini
Since hw_fini return error code for failure indication, we should check its return value. Currently it might only fail upon soft-reset from hl_device_reset. Later patch will add hw_fini failure in case of polling timeout in hard-reset. Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/accel/habanalabs/common/device.c12
-rw-r--r--drivers/accel/habanalabs/gaudi/gaudi.c7
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2.c7
-rw-r--r--drivers/accel/habanalabs/goya/goya.c7
4 files changed, 27 insertions, 6 deletions
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 7ade32487138..99e793dfb126 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1472,7 +1472,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
schedule_hard_reset = false, delay_reset, from_dev_release, from_watchdog_thread;
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
struct hl_ctx *ctx;
- int i, rc;
+ int i, rc, hw_fini_rc;
if (!hdev->init_done) {
dev_err(hdev->dev, "Can't reset before initialization is done\n");
@@ -1634,7 +1634,7 @@ kill_processes:
}
/* Reset the H/W. It will be in idle state after this returns */
- hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
+ hw_fini_rc = hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
if (hard_reset) {
hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
@@ -1661,6 +1661,10 @@ kill_processes:
hl_ctx_put(ctx);
}
+ if (hw_fini_rc) {
+ rc = hw_fini_rc;
+ goto out_err;
+ }
/* Finished tear-down, starting to re-initialize */
if (hard_reset) {
@@ -2416,7 +2420,9 @@ void hl_device_fini(struct hl_device *hdev)
hl_cb_pool_fini(hdev);
/* Reset the H/W. It will be in idle state after this returns */
- hdev->asic_funcs->hw_fini(hdev, true, false);
+ rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+ if (rc)
+ dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc);
hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index cc4cd581c8ef..d37c5d4893b9 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -868,13 +868,18 @@ pci_init:
rc = hl_fw_read_preboot_status(hdev);
if (rc) {
if (hdev->reset_on_preboot_fail)
+ /* we are already on failure flow, so don't check if hw_fini fails. */
hdev->asic_funcs->hw_fini(hdev, true, false);
goto pci_fini;
}
if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
- hdev->asic_funcs->hw_fini(hdev, true, false);
+ rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+ if (rc) {
+ dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
+ goto pci_fini;
+ }
}
return 0;
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index bf1d4ef6bbec..c2af874e758c 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -2886,13 +2886,18 @@ static int gaudi2_early_init(struct hl_device *hdev)
rc = hl_fw_read_preboot_status(hdev);
if (rc) {
if (hdev->reset_on_preboot_fail)
+ /* we are already on failure flow, so don't check if hw_fini fails. */
hdev->asic_funcs->hw_fini(hdev, true, false);
goto pci_fini;
}
if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
- hdev->asic_funcs->hw_fini(hdev, true, false);
+ rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+ if (rc) {
+ dev_err(hdev->dev, "failed to reset HW during early init (%d)\n", rc);
+ goto pci_fini;
+ }
}
return 0;
diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
index c869cfe44ce7..e02de936b7b5 100644
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -669,13 +669,18 @@ pci_init:
rc = hl_fw_read_preboot_status(hdev);
if (rc) {
if (hdev->reset_on_preboot_fail)
+ /* we are already on failure flow, so don't check if hw_fini fails. */
hdev->asic_funcs->hw_fini(hdev, true, false);
goto pci_fini;
}
if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
- hdev->asic_funcs->hw_fini(hdev, true, false);
+ rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+ if (rc) {
+ dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
+ goto pci_fini;
+ }
}
if (!hdev->pldm) {