diff options
author | Farah Kassabri <fkassabri@habana.ai> | 2024-04-09 14:46:19 +0300 |
---|---|---|
committer | Ofir Bitton <obitton@habana.ai> | 2024-06-23 09:53:32 +0300 |
commit | 9ee446f9b5d0172a94681aae01fabde4891f7123 (patch) | |
tree | 6b4ddafce4c2171f13360e990c8831bd3001404b /drivers/accel | |
parent | fda92282b09ed6dc85af22ab4195aec791cdde2f (diff) | |
download | lwn-9ee446f9b5d0172a94681aae01fabde4891f7123.tar.gz lwn-9ee446f9b5d0172a94681aae01fabde4891f7123.zip |
accel/habanalabs: add more info upon cpu pkt timeout
In order to have better debuggability upon encountering FW issues,
We are adding additional info once CPU packet timeout expires.
Signed-off-by: Farah Kassabri <fkassabri@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Diffstat (limited to 'drivers/accel')
-rw-r--r-- | drivers/accel/habanalabs/common/firmware_if.c | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c index 6f0c40b12072..3cd8a1f69980 100644 --- a/drivers/accel/habanalabs/common/firmware_if.c +++ b/drivers/accel/habanalabs/common/firmware_if.c @@ -460,11 +460,19 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, /* If FW performed reset just before sending it a packet, we will get a timeout. * This is expected behavior, hence no need for error message. */ - if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset) + if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset) { dev_dbg(hdev->dev, "Device CPU packet timeout (0x%x) due to FW reset\n", tmp); - else - dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n", tmp); + } else { + struct hl_bd *bd = queue->kernel_address; + + bd += hl_pi_2_offset(queue->pi); + + dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n" + "Pkt info: dma_addr: 0x%llx, kernel_addr: %p, len:0x%x, ctl: 0x%x, ptr:0x%llx, dram_bd:%u\n", + tmp, pkt_dma_addr, (void *)pkt, bd->len, bd->ctl, bd->ptr, + queue->dram_bd); + } hdev->device_cpu_disabled = true; goto out; } |