summaryrefslogtreecommitdiff
path: root/drivers/accel/habanalabs/gaudi2
diff options
context:
space:
mode:
authorfarah kassabri <fkassabri@habana.ai>2023-08-08 12:56:47 +0300
committerOded Gabbay <ogabbay@kernel.org>2023-10-09 12:37:21 +0300
commit7c4130e6ddd709be2033a6635c91d445cb2baea5 (patch)
treebdf924dfe7763fad06a826e80e115c6b8e9cf390 /drivers/accel/habanalabs/gaudi2
parent72bff371b2e26a0088d4cb1b44aee20c77e423ba (diff)
downloadlwn-7c4130e6ddd709be2033a6635c91d445cb2baea5.tar.gz
lwn-7c4130e6ddd709be2033a6635c91d445cb2baea5.zip
accel/habanalabs/gaudi2: handle eq health heartbeat check
Add mechanism for fw eq health check. this will be done using two flows: using the heartbeat mechanism and raising a dedicated interrupt to indicate an eq failure like EQ full. This patch will add implementation for the eq heartbeat for gaudi2 asic. More info about the heartbeat mechanism: Expand the heartbeat mechanism to monitor a new event that will be sent from FW upon receiving heartbeat message. that way driver can know that the eq is working or not. Signed-off-by: farah kassabri <fkassabri@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/accel/habanalabs/gaudi2')
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2.c10
1 files changed, 10 insertions, 0 deletions
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 677900e18519..e507847bf460 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7804,6 +7804,7 @@ static inline bool is_info_event(u32 event)
* an indication to an error.
*/
case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
+ case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
return true;
default:
return false;
@@ -9765,6 +9766,11 @@ static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
return U16_MAX;
}
+static void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
+{
+ hdev->eq_heartbeat_received = true;
+}
+
static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
{
struct gaudi2_device *gaudi2 = hdev->asic_specific;
@@ -10190,6 +10196,10 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
gaudi2_irq_map_table[event_type].name);
break;
+ case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
+ hl_eq_heartbeat_event_handle(hdev);
+ error_count = GAUDI2_NA_EVENT_CAUSE;
+ break;
default:
if (gaudi2_irq_map_table[event_type].valid) {
dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",