diff options
author | Tomer Tayar <ttayar@habana.ai> | 2022-06-30 11:22:54 +0300 |
---|---|---|
committer | Oded Gabbay <ogabbay@kernel.org> | 2022-07-12 09:09:31 +0300 |
commit | 3f043b3192454e430ece973a75c5b9fd9a344dd8 (patch) | |
tree | bcc583bfbb0536afe18484533f83b81e1034c6a1 | |
parent | bfbf5a0a711f111564de5d8f6bab7063ba4af2d8 (diff) | |
download | lwn-3f043b3192454e430ece973a75c5b9fd9a344dd8.tar.gz lwn-3f043b3192454e430ece973a75c5b9fd9a344dd8.zip |
habanalabs/gaudi2: modify decoder to use virtual MSI-X doorbell
Modify the decoder wrapper blocks to generate interrupts using the
virtual MSI-X doorbell.
As a decoder wrapper block cannot write directly to HBW upon completion,
it writes instead to SOB which is monitored by a master monitor.
When resolved, this monitor will be the one to actually write to the
virtual MSI-X doorbell.
Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
-rw-r--r-- | drivers/misc/habanalabs/gaudi2/gaudi2.c | 148 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi2/gaudi2P.h | 15 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi2/gaudi2_masks.h | 3 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h | 6 |
4 files changed, 161 insertions, 11 deletions
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 343e55a14ca5..ba70fd444002 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -4252,6 +4252,109 @@ static void gaudi2_init_edma(struct hl_device *hdev) } } +/* + * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell. + * @hdev: pointer to habanalabs device structure. + * @sob_id: sync object ID. + * @first_mon_id: ID of first monitor out of 3 consecutive monitors. + * @interrupt_id: interrupt ID. + * + * Some initiators cannot have HBW address in their completion address registers, and thus cannot + * write directly to the HBW host memory of the virtual MSI-X doorbell. + * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write. + * + * The mechanism in the sync manager block is composed of a master monitor with 3 messages. + * In addition to the HBW write, the other 2 messages are for preparing the monitor to next + * completion, by decrementing the sync object value and re-arming the monitor. + */ +static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id, + u32 first_mon_id, u32 interrupt_id) +{ + u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config; + struct gaudi2_device *gaudi2 = hdev->asic_specific; + u64 addr; + u8 mask; + + /* Reset the SOB value */ + sob_offset = sob_id * sizeof(u32); + WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); + + /* Configure 3 monitors: + * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor) + * 2. Decrement SOB value by 1. + * 3. Re-arm the master monitor. + */ + + first_mon_offset = first_mon_id * sizeof(u32); + + /* 2nd monitor: Decrement SOB value by 1 */ + mon_offset = first_mon_offset + sizeof(u32); + + addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; + WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); + WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); + + payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */ + FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) | + FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1); + WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); + + /* 3rd monitor: Re-arm the master monitor */ + mon_offset = first_mon_offset + 2 * sizeof(u32); + + addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset; + WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); + WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); + + sob_group = sob_id / 8; + mask = ~BIT(sob_id & 0x7); + mode = 0; /* comparison mode is "greater than or equal to" */ + arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) | + FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) | + FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) | + FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1); + + payload = arm; + WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); + + /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */ + mon_offset = first_mon_offset; + + config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */ + WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config); + + addr = gaudi2->virt_msix_db_dma_addr; + WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); + WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); + + payload = interrupt_id; + WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); + + WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm); +} + +static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev) +{ + u32 decoder_id, sob_id, first_mon_id, interrupt_id; + struct asic_fixed_properties *prop = &hdev->asic_prop; + + /* Decoder normal/abnormal interrupts */ + for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) { + if (!(prop->decoder_enabled_mask & BIT(decoder_id))) + continue; + + sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; + first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id; + interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id; + gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); + + sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; + first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id; + interrupt_id += 1; + gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); + } +} + static void gaudi2_init_sm(struct hl_device *hdev) { struct gaudi2_device *gaudi2 = hdev->asic_specific; @@ -4304,6 +4407,9 @@ static void gaudi2_init_sm(struct hl_device *hdev) /* Configure kernel ASID and MMU BP*/ WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000); WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0); + + /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */ + gaudi2_prepare_sm_for_virt_msix_db(hdev); } static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base) @@ -4452,16 +4558,41 @@ static void gaudi2_init_rotator(struct hl_device *hdev) } } -static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 msix_id) +static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id) { - WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, msix_id); - WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, msix_id + 1); + u32 sob_id; + + /* TODO: + * Remove when virtual MSI-X doorbell is supported in simulator (SW-93022) and in F/W + * (SW-93024). + */ + if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) { + u32 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id; + + WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF); + WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, interrupt_id); + WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF); + WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, interrupt_id + 1); + return; + } + + /* VCMD normal interrupt */ + sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; + WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); + WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); + + /* VCMD abnormal interrupt */ + sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; + WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); + WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); } static void gaudi2_init_dec(struct hl_device *hdev) { struct gaudi2_device *gaudi2 = hdev->asic_specific; - u32 dcore_id, dec_id, dec_bit, msix_id; + u32 dcore_id, dec_id, dec_bit; u64 base_addr; if (!hdev->asic_prop.decoder_enabled_mask) @@ -4482,10 +4613,7 @@ static void gaudi2_init_dec(struct hl_device *hdev) dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET; - msix_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + - (dcore_id * NUM_OF_DEC_PER_DCORE + dec_id) * 2; - - gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, msix_id); + gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); } @@ -4498,9 +4626,7 @@ static void gaudi2_init_dec(struct hl_device *hdev) base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET + dec_id * DCORE_VDEC_OFFSET; - msix_id = GAUDI2_IRQ_NUM_SHARED_DEC0_NRM + (dec_id * 2); - - gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, msix_id); + gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); } diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h index 826d24284dc5..e4bc4009f05b 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h @@ -243,11 +243,20 @@ #define GAUDI2_ARC_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 28)) >> 28) +#define GAUDI2_SOB_INCREMENT_BY_ONE (FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \ + FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1)) + enum gaudi2_reserved_sob_id { GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST, GAUDI2_RESERVED_SOB_CS_COMPLETION_LAST = GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST + GAUDI2_MAX_PENDING_CS - 1, GAUDI2_RESERVED_SOB_KDMA_COMPLETION, + GAUDI2_RESERVED_SOB_DEC_NRM_FIRST, + GAUDI2_RESERVED_SOB_DEC_NRM_LAST = + GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + NUMBER_OF_DEC - 1, + GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST, + GAUDI2_RESERVED_SOB_DEC_ABNRM_LAST = + GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + NUMBER_OF_DEC - 1, GAUDI2_RESERVED_SOB_NUMBER }; @@ -256,6 +265,12 @@ enum gaudi2_reserved_mon_id { GAUDI2_RESERVED_MON_CS_COMPLETION_LAST = GAUDI2_RESERVED_MON_CS_COMPLETION_FIRST + GAUDI2_MAX_PENDING_CS - 1, GAUDI2_RESERVED_MON_KDMA_COMPLETION, + GAUDI2_RESERVED_MON_DEC_NRM_FIRST, + GAUDI2_RESERVED_MON_DEC_NRM_LAST = + GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * NUMBER_OF_DEC - 1, + GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST, + GAUDI2_RESERVED_MON_DEC_ABNRM_LAST = + GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * NUMBER_OF_DEC - 1, GAUDI2_RESERVED_MON_NUMBER }; diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h index 3fd5cf4a8645..eed16d642a5a 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h @@ -135,4 +135,7 @@ #define PCIE_DBI_MSIX_ADDRESS_MATCH_LOW_OFF_MSIX_ADDRESS_MATCH_EN_SHIFT 0 #define PCIE_DBI_MSIX_ADDRESS_MATCH_LOW_OFF_MSIX_ADDRESS_MATCH_EN_MASK 0x1 +#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_SHIFT 15 +#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK 0x8000 + #endif /* GAUDI2_MASKS_H_ */ diff --git a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h index e5fe9d5e07f5..d0e2c68a639f 100644 --- a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h +++ b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h @@ -415,9 +415,15 @@ #define SFT_DCORE_OFFSET (mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE - mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE) #define SFT_IF_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE - mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE) +#define BRDG_CTRL_NRM_MSIX_LBW_AWADDR \ + (mmDCORE0_VDEC0_BRDG_CTRL_NRM_MSIX_LBW_AWADDR - mmDCORE0_VDEC0_BRDG_CTRL_BASE) + #define BRDG_CTRL_NRM_MSIX_LBW_WDATA \ (mmDCORE0_VDEC0_BRDG_CTRL_NRM_MSIX_LBW_WDATA - mmDCORE0_VDEC0_BRDG_CTRL_BASE) +#define BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR \ + (mmDCORE0_VDEC0_BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR - mmDCORE0_VDEC0_BRDG_CTRL_BASE) + #define BRDG_CTRL_ABNRM_MSIX_LBW_WDATA \ (mmDCORE0_VDEC0_BRDG_CTRL_ABNRM_MSIX_LBW_WDATA - mmDCORE0_VDEC0_BRDG_CTRL_BASE) |