diff options
author | farah kassabri <fkassabri@habana.ai> | 2022-08-17 17:43:43 +0300 |
---|---|---|
committer | Oded Gabbay <ogabbay@kernel.org> | 2022-09-19 15:08:37 +0300 |
commit | 4745b2f0d0d4b291ec69619c815f53fd8a968d9a (patch) | |
tree | 2b3fdedc710fb13b32125d15fbe1483caeddb4de | |
parent | 0855bf8b17374fef702844664af70454fa6951ef (diff) | |
download | lwn-4745b2f0d0d4b291ec69619c815f53fd8a968d9a.tar.gz lwn-4745b2f0d0d4b291ec69619c815f53fd8a968d9a.zip |
habanalabs: send device active message to f/w
As part of the RAS that is done by the f/w, we should send a message
to the f/w when a user either acquires or releases the device.
Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
-rw-r--r-- | drivers/misc/habanalabs/common/device.c | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/firmware_if.c | 15 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 3 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs_drv.c | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 6 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi2/gaudi2.c | 12 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi2/gaudi2P.h | 1 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 6 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/common/cpucp_if.h | 11 |
9 files changed, 58 insertions, 0 deletions
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 230b7eeef962..d6df0bd55e9f 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -470,6 +470,8 @@ static int hl_device_release(struct inode *inode, struct file *filp) hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif; + hdev->asic_funcs->send_device_activity(hdev, false); + return 0; } diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 4ede4bb03e8e..cd2eb7e73be5 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -454,6 +454,21 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, size); } +int hl_fw_send_device_activity(struct hl_device *hdev, bool open) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_ACTIVE_STATUS_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.value = cpu_to_le64(open); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc) + dev_err(hdev->dev, "failed to send device activity msg(%u)\n", open); + + return rc; +} + int hl_fw_send_heartbeat(struct hl_device *hdev) { struct cpucp_packet hb_pkt; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 33c6476b60a9..c1bd82d4a83c 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1528,6 +1528,7 @@ struct engines_data { * @access_dev_mem: access device memory * @set_dram_bar_base: set the base of the DRAM BAR * @set_engine_cores: set a config command to enigne cores + * @send_device_activity: indication to FW about device availability */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1664,6 +1665,7 @@ struct hl_asic_funcs { u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids, u32 num_cores, u32 core_command); + int (*send_device_activity)(struct hl_device *hdev, bool open); }; @@ -3715,6 +3717,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev, struct cpucp_hbm_row_info *info); int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num); int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid); +int hl_fw_send_device_activity(struct hl_device *hdev, bool open); int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data); diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index e12148428731..849e54fe78a6 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -204,6 +204,8 @@ int hl_device_open(struct inode *inode, struct file *filp) goto out_err; } + rc = hdev->asic_funcs->send_device_activity(hdev, true); + list_add(&hpriv->dev_node, &hdev->fpriv_list); mutex_unlock(&hdev->fpriv_list_lock); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 96020693ac29..87dbdbb220da 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -9132,6 +9132,11 @@ static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; } +static int gaudi_send_device_activity(struct hl_device *hdev, bool open) +{ + return 0; +} + static const struct hl_asic_funcs gaudi_funcs = { .early_init = gaudi_early_init, .early_fini = gaudi_early_fini, @@ -9224,6 +9229,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .mmu_get_real_page_size = hl_mmu_get_real_page_size, .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = gaudi_set_hbm_bar_base, + .send_device_activity = gaudi_send_device_activity, }; /** diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 4696da7a57c1..330869cb4c0b 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -10031,6 +10031,17 @@ static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data) return -EOPNOTSUPP; } +int gaudi2_send_device_activity(struct hl_device *hdev, bool open) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + + if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) || hdev->fw_major_version < 37) + return 0; + + /* TODO: add check for FW version using minor ver once it's known */ + return hl_fw_send_device_activity(hdev, open); +} + static const struct hl_asic_funcs gaudi2_funcs = { .early_init = gaudi2_early_init, .early_fini = gaudi2_early_fini, @@ -10127,6 +10138,7 @@ static const struct hl_asic_funcs gaudi2_funcs = { .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = gaudi2_set_hbm_bar_base, .set_engine_cores = gaudi2_set_engine_cores, + .send_device_activity = gaudi2_send_device_activity, }; void gaudi2_set_asic_funcs(struct hl_device *hdev) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h index 9094a702678d..a99c348bbf39 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h @@ -553,5 +553,6 @@ void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32 u32 offended_addr); int gaudi2_init_security(struct hl_device *hdev); void gaudi2_ack_protection_bits_errors(struct hl_device *hdev); +int gaudi2_send_device_activity(struct hl_device *hdev, bool open); #endif /* GAUDI2P_H_ */ diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index d8fb91d257b9..5ef9e3ca97a6 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5420,6 +5420,11 @@ static int goya_scrub_device_dram(struct hl_device *hdev, u64 val) return -EOPNOTSUPP; } +static int goya_send_device_activity(struct hl_device *hdev, bool open) +{ + return 0; +} + static const struct hl_asic_funcs goya_funcs = { .early_init = goya_early_init, .early_fini = goya_early_fini, @@ -5512,6 +5517,7 @@ static const struct hl_asic_funcs goya_funcs = { .mmu_get_real_page_size = hl_mmu_get_real_page_size, .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = goya_set_ddr_bar_base, + .send_device_activity = goya_send_device_activity, }; /* diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index abf40e1c4965..b837bb1f4cd3 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -636,6 +636,10 @@ enum pq_init_status { * passes the max size it allows the CpuCP to write to the structure, to prevent * data corruption in case of mismatched driver/FW versions. * Relevant only to Gaudi. + * + * CPUCP_PACKET_ACTIVE_STATUS_SET - + * LKD sends FW indication whether device is free or in use, this indication is reported + * also to the BMC. */ enum cpucp_packet_id { @@ -691,6 +695,13 @@ enum cpucp_packet_id { CPUCP_PACKET_RESERVED4, /* not used */ CPUCP_PACKET_RESERVED5, /* not used */ CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ + CPUCP_PACKET_RESERVED6, /* not used */ + CPUCP_PACKET_RESERVED7, /* not used */ + CPUCP_PACKET_RESERVED8, /* not used */ + CPUCP_PACKET_RESERVED9, /* not used */ + CPUCP_PACKET_RESERVED10, /* not used */ + CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ + CPUCP_PACKET_ID_MAX /* must be last */ }; #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 |